From ca8458644371eb47596a22a385b92679924a9b33 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 24 Sep 2012 16:15:57 -0400 Subject: [PATCH 01/90] Adding default intellij configuration files --- .idea/.name | 1 + .idea/ant.xml | 15 + .idea/codeStyleSettings.xml | 13 + .idea/compiler.xml | 21 + .idea/copyright/profiles_settings.xml | 5 + .idea/encodings.xml | 5 + .idea/highlighting.xml | 8 + .idea/inspectionProfiles/Project_Default.xml | 11 + .../inspectionProfiles/profiles_settings.xml | 7 + .idea/libraries/GATK_libraries.xml | 13 + .idea/misc.xml | 32 ++ .idea/modules.xml | 9 + .idea/scopes/scope_settings.xml | 5 + .idea/uiDesigner.xml | 125 ++++++ .idea/vcs.xml | 10 + .idea/workspace.xml | 386 ++++++++++++++++++ cmi-gatk.iml | 23 ++ 17 files changed, 689 insertions(+) create mode 100644 .idea/.name create mode 100644 .idea/ant.xml create mode 100644 .idea/codeStyleSettings.xml create mode 100644 .idea/compiler.xml create mode 100644 .idea/copyright/profiles_settings.xml create mode 100644 .idea/encodings.xml create mode 100644 .idea/highlighting.xml create mode 100644 .idea/inspectionProfiles/Project_Default.xml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/libraries/GATK_libraries.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/scopes/scope_settings.xml create mode 100644 .idea/uiDesigner.xml create mode 100644 .idea/vcs.xml create mode 100644 .idea/workspace.xml create mode 100644 cmi-gatk.iml diff --git a/.idea/.name b/.idea/.name new file mode 100644 index 000000000..7014f65a5 --- /dev/null +++ b/.idea/.name @@ -0,0 +1 @@ +cmi-gatk \ No newline at end of file diff --git a/.idea/ant.xml b/.idea/ant.xml new file mode 100644 index 000000000..4674eeac9 --- /dev/null +++ b/.idea/ant.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/.idea/codeStyleSettings.xml b/.idea/codeStyleSettings.xml new file mode 100644 index 000000000..9178b389f --- /dev/null +++ b/.idea/codeStyleSettings.xml @@ -0,0 +1,13 @@ + + + + + + + diff --git a/.idea/compiler.xml b/.idea/compiler.xml new file mode 100644 index 000000000..ded2e9a1d --- /dev/null +++ b/.idea/compiler.xml @@ -0,0 +1,21 @@ + + + + + + diff --git a/.idea/copyright/profiles_settings.xml b/.idea/copyright/profiles_settings.xml new file mode 100644 index 000000000..3572571ad --- /dev/null +++ b/.idea/copyright/profiles_settings.xml @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/.idea/encodings.xml b/.idea/encodings.xml new file mode 100644 index 000000000..e206d70d8 --- /dev/null +++ b/.idea/encodings.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/.idea/highlighting.xml b/.idea/highlighting.xml new file mode 100644 index 000000000..f33b64d94 --- /dev/null +++ b/.idea/highlighting.xml @@ -0,0 +1,8 @@ + + + + + + diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 000000000..b8c243dbe --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,11 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 000000000..3b312839b --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,7 @@ + + + + \ No newline at end of file diff --git a/.idea/libraries/GATK_libraries.xml b/.idea/libraries/GATK_libraries.xml new file mode 100644 index 000000000..970d0a3dc --- /dev/null +++ b/.idea/libraries/GATK_libraries.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 000000000..afd7f3778 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,32 @@ + + + + + + + + + + http://www.w3.org/1999/xhtml + + + + + + + diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 000000000..09caa2933 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/.idea/scopes/scope_settings.xml b/.idea/scopes/scope_settings.xml new file mode 100644 index 000000000..922003b84 --- /dev/null +++ b/.idea/scopes/scope_settings.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/.idea/uiDesigner.xml b/.idea/uiDesigner.xml new file mode 100644 index 000000000..3b0002030 --- /dev/null +++ b/.idea/uiDesigner.xml @@ -0,0 +1,125 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 000000000..cbc984988 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,10 @@ + + + + + + + + + diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 000000000..87ab79287 --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,386 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + localhost + 5050 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + cmi-gatk + + + + + + + + GATK libraries + + + + + + + + + diff --git a/cmi-gatk.iml b/cmi-gatk.iml new file mode 100644 index 000000000..e63aff535 --- /dev/null +++ b/cmi-gatk.iml @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + + From 4aad135f8c14e0d8d60fa4782024b1a5f29dd5dc Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 24 Sep 2012 17:01:17 -0400 Subject: [PATCH 02/90] Generic input file name recognition (still need to implement support to FastQ, but it now can at least accept it) --- .../qscripts/DataProcessingPipeline.scala | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index 56f6460fb..c21db30ce 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -96,6 +96,7 @@ class DataProcessingPipeline extends QScript { var cleanModelEnum: ConsensusDeterminationModel = ConsensusDeterminationModel.USE_READS + val bwaParameters: String = " -q 5 -l 32 -k 2 -t 4 -o 1 " @@ -165,12 +166,15 @@ class DataProcessingPipeline extends QScript { var realignedBams: Seq[File] = Seq() var index = 1 for (bam <- bams) { - // first revert the BAM file to the original qualities - val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai") - val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai") - val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam") - val realignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.bam") - val rgRealignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.rg.bam") + val extension = bam.toString.substring(bam.toString.length - 4) + + + + val saiFile1 = swapExt(bam, extension, "." + index + ".1.sai") + val saiFile2 = swapExt(bam, extension, "." + index + ".2.sai") + val realignedSamFile = swapExt(bam, extension, "." + index + ".realigned.sam") + val realignedBamFile = swapExt(bam, extension, "." + index + ".realigned.bam") + val rgRealignedBamFile = swapExt(bam, extension, "." + index + ".realigned.rg.bam") if (useBWAse) { val revertedBAM = revertBAM(bam, true) @@ -444,7 +448,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file") var sai = outSai - def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b " + bam + " > " + sai this.analysisName = queueLogDir + outSai + ".bwa_aln_se" this.jobName = queueLogDir + outSai + ".bwa_aln_se" } @@ -452,7 +456,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file for 1st mating pair") var sai = outSai1 - def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b" + index + " " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b" + index + " " + bam + " > " + sai this.analysisName = queueLogDir + outSai1 + ".bwa_aln_pe1" this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1" } From 4324bd72fdec5b9215ec10a6bd41b60c83135157 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 10:51:53 -0400 Subject: [PATCH 03/90] Updating Intellij enviroment and adding Scala --- .idea/libraries/GATK_libraries.xml | 1 - .idea/misc.xml | 2 +- .idea/workspace.xml | 221 ++++++++++++++++++++++++----- cmi-gatk.iml | 10 +- 4 files changed, 192 insertions(+), 42 deletions(-) diff --git a/.idea/libraries/GATK_libraries.xml b/.idea/libraries/GATK_libraries.xml index 970d0a3dc..b363bbe6c 100644 --- a/.idea/libraries/GATK_libraries.xml +++ b/.idea/libraries/GATK_libraries.xml @@ -6,7 +6,6 @@ - diff --git a/.idea/misc.xml b/.idea/misc.xml index afd7f3778..a79280c52 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -24,7 +24,7 @@ http://www.w3.org/1999/xhtml - + diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 87ab79287..f6d4567fd 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -1,7 +1,12 @@ - + + + + + + + + + + + + + + - + @@ -112,33 +140,7 @@ - - - - - - - - - - - - - - - - - - - - - + @@ -147,7 +149,7 @@ - + @@ -228,8 +230,9 @@ - + + @@ -286,7 +289,7 @@ + + - + + + + + + + + + + + + + + + + + - + @@ -333,6 +464,18 @@ + + + Detection + + + + + @@ -346,6 +489,7 @@ + 1.6 diff --git a/cmi-gatk.iml b/cmi-gatk.iml index e63aff535..4dbee1336 100644 --- a/cmi-gatk.iml +++ b/cmi-gatk.iml @@ -1,5 +1,13 @@ + + + + + + @@ -17,7 +25,7 @@ - + From 65b100f9b0de9ba03a35f1bb51b1c8e55af92513 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 12:02:34 -0400 Subject: [PATCH 04/90] Reverting the DPP to the original version, going to create a new simplified version for CMI in private. --- .../qscripts/DataProcessingPipeline.scala | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index c21db30ce..56f6460fb 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -96,7 +96,6 @@ class DataProcessingPipeline extends QScript { var cleanModelEnum: ConsensusDeterminationModel = ConsensusDeterminationModel.USE_READS - val bwaParameters: String = " -q 5 -l 32 -k 2 -t 4 -o 1 " @@ -166,15 +165,12 @@ class DataProcessingPipeline extends QScript { var realignedBams: Seq[File] = Seq() var index = 1 for (bam <- bams) { - val extension = bam.toString.substring(bam.toString.length - 4) - - - - val saiFile1 = swapExt(bam, extension, "." + index + ".1.sai") - val saiFile2 = swapExt(bam, extension, "." + index + ".2.sai") - val realignedSamFile = swapExt(bam, extension, "." + index + ".realigned.sam") - val realignedBamFile = swapExt(bam, extension, "." + index + ".realigned.bam") - val rgRealignedBamFile = swapExt(bam, extension, "." + index + ".realigned.rg.bam") + // first revert the BAM file to the original qualities + val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai") + val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai") + val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam") + val realignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.bam") + val rgRealignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.rg.bam") if (useBWAse) { val revertedBAM = revertBAM(bam, true) @@ -448,7 +444,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file") var sai = outSai - def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b " + bam + " > " + sai this.analysisName = queueLogDir + outSai + ".bwa_aln_se" this.jobName = queueLogDir + outSai + ".bwa_aln_se" } @@ -456,7 +452,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file for 1st mating pair") var sai = outSai1 - def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b" + index + " " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b" + index + " " + bam + " > " + sai this.analysisName = queueLogDir + outSai1 + ".bwa_aln_pe1" this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1" } From cb8d4c97e119bd76b382dcb5cc69277700456897 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 17:13:50 -0400 Subject: [PATCH 05/90] First implementation of a generic 'bundled' Data Processing Pipeline for germline and cancer. not ready for prime time yet! --- .../src/org/broadinstitute/sting/queue/util/QScriptUtils.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala index 1529d9951..f684e533f 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala @@ -57,7 +57,8 @@ object QScriptUtils { for (file <- fromFile(in).getLines()) if (!file.startsWith("#") && !file.isEmpty ) list :+= new File(file.trim()) - list.sortWith(_.compareTo(_) < 0) +// list.sortWith(_.compareTo(_) < 0) + list } /** From c9c2682f8688d5978b001d21eee4fd7f111c9350 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 17:18:44 -0400 Subject: [PATCH 07/90] removing annoying xml from IDEA configuration --- .idea/workspace.xml | 529 -------------------------------------------- 1 file changed, 529 deletions(-) delete mode 100644 .idea/workspace.xml diff --git a/.idea/workspace.xml b/.idea/workspace.xml deleted file mode 100644 index f6d4567fd..000000000 --- a/.idea/workspace.xml +++ /dev/null @@ -1,529 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - localhost - 5050 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Detection - - - - - - - - - - - - - - - 1.6 - - - - - - - - cmi-gatk - - - - - - - - GATK libraries - - - - - - - - - From 3e68fee76489a6667d070210c88aa0e3509ad2a8 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 27 Sep 2012 11:04:56 -0400 Subject: [PATCH 10/90] Removed the intellij files from the root and made an example package for new users. This allows users to start at the same page and then change it as they see fit without interfering with the repo (thanks guillermo!) --- .idea/.name | 1 - .idea/ant.xml | 15 --- .idea/codeStyleSettings.xml | 13 -- .idea/compiler.xml | 21 --- .idea/copyright/profiles_settings.xml | 5 - .idea/encodings.xml | 5 - .idea/highlighting.xml | 8 -- .idea/inspectionProfiles/Project_Default.xml | 11 -- .../inspectionProfiles/profiles_settings.xml | 7 - .idea/libraries/GATK_libraries.xml | 12 -- .idea/misc.xml | 32 ----- .idea/modules.xml | 9 -- .idea/scopes/scope_settings.xml | 5 - .idea/uiDesigner.xml | 125 ------------------ .idea/vcs.xml | 10 -- cmi-gatk.iml | 31 ----- intellij_example.tar.bz2 | Bin 0 -> 7520 bytes 17 files changed, 310 deletions(-) delete mode 100644 .idea/.name delete mode 100644 .idea/ant.xml delete mode 100644 .idea/codeStyleSettings.xml delete mode 100644 .idea/compiler.xml delete mode 100644 .idea/copyright/profiles_settings.xml delete mode 100644 .idea/encodings.xml delete mode 100644 .idea/highlighting.xml delete mode 100644 .idea/inspectionProfiles/Project_Default.xml delete mode 100644 .idea/inspectionProfiles/profiles_settings.xml delete mode 100644 .idea/libraries/GATK_libraries.xml delete mode 100644 .idea/misc.xml delete mode 100644 .idea/modules.xml delete mode 100644 .idea/scopes/scope_settings.xml delete mode 100644 .idea/uiDesigner.xml delete mode 100644 .idea/vcs.xml delete mode 100644 cmi-gatk.iml create mode 100644 intellij_example.tar.bz2 diff --git a/.idea/.name b/.idea/.name deleted file mode 100644 index 7014f65a5..000000000 --- a/.idea/.name +++ /dev/null @@ -1 +0,0 @@ -cmi-gatk \ No newline at end of file diff --git a/.idea/ant.xml b/.idea/ant.xml deleted file mode 100644 index 4674eeac9..000000000 --- a/.idea/ant.xml +++ /dev/null @@ -1,15 +0,0 @@ - - - - - - - - - - - - - - - diff --git a/.idea/codeStyleSettings.xml b/.idea/codeStyleSettings.xml deleted file mode 100644 index 9178b389f..000000000 --- a/.idea/codeStyleSettings.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - diff --git a/.idea/compiler.xml b/.idea/compiler.xml deleted file mode 100644 index ded2e9a1d..000000000 --- a/.idea/compiler.xml +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - diff --git a/.idea/copyright/profiles_settings.xml b/.idea/copyright/profiles_settings.xml deleted file mode 100644 index 3572571ad..000000000 --- a/.idea/copyright/profiles_settings.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - - \ No newline at end of file diff --git a/.idea/encodings.xml b/.idea/encodings.xml deleted file mode 100644 index e206d70d8..000000000 --- a/.idea/encodings.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - - diff --git a/.idea/highlighting.xml b/.idea/highlighting.xml deleted file mode 100644 index f33b64d94..000000000 --- a/.idea/highlighting.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml deleted file mode 100644 index b8c243dbe..000000000 --- a/.idea/inspectionProfiles/Project_Default.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml deleted file mode 100644 index 3b312839b..000000000 --- a/.idea/inspectionProfiles/profiles_settings.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/libraries/GATK_libraries.xml b/.idea/libraries/GATK_libraries.xml deleted file mode 100644 index b363bbe6c..000000000 --- a/.idea/libraries/GATK_libraries.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index a79280c52..000000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,32 +0,0 @@ - - - - - - - - - - http://www.w3.org/1999/xhtml - - - - - - - diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index 09caa2933..000000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - diff --git a/.idea/scopes/scope_settings.xml b/.idea/scopes/scope_settings.xml deleted file mode 100644 index 922003b84..000000000 --- a/.idea/scopes/scope_settings.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/uiDesigner.xml b/.idea/uiDesigner.xml deleted file mode 100644 index 3b0002030..000000000 --- a/.idea/uiDesigner.xml +++ /dev/null @@ -1,125 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index cbc984988..000000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - diff --git a/cmi-gatk.iml b/cmi-gatk.iml deleted file mode 100644 index 4dbee1336..000000000 --- a/cmi-gatk.iml +++ /dev/null @@ -1,31 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/intellij_example.tar.bz2 b/intellij_example.tar.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..bce16045cd1cc476305c5e59d07ff9b94b8e5d73 GIT binary patch literal 7520 zcmV-m9iQStT4*^jL0KkKS)7!cJOD4F|M~yi5CDHu|NsC0|M36+|L{No06+`?06+jh z1{h!`o8{-7h4qB5wR>^y>D#MWyS_S4UPrYW)`~XXKBY)=w?n(T?w=>u4%}F@T^prq zM&}*98+P}1CC>JGY3}jFJo}ytO>BKWz24nKq=%-|-tIdTv1Z%Ou|py0B#;mw z0$?VFLqIe|HdE0LQ`8zCr9V{lAE^M-CYlc@B!L1ZL{xsM>S#SrDYXw!Hq`?_27mwn z0006)At_HHdU~eyLFyU;NYu?W zO%G7?YBUW!LqkS@9-*KBcwhGq&L2>c7{2Bv%KPh~XqovPD`P$wqr0wA{OX>F7ZKmy zDsL?$v;dI_RDWe46~zQU)lvw>NPvKQ{r?Z1(}ww2_)M*fQwCl2HJP1e>Ri?dCZpMN zP=M?SA|wk1NPtvh=J3} zDw+~D0aFk#a8yBk1VvNe{^Q2@ATLqre5hlqO+ zJH6U_g@n*il^HZ-l$Xlw^mBWD_ji=uWfv%71y1==(-ozyjYp|LKHs?f9BLgry=7~; zQzc6vctrO5Ohh{&v$XFjif1`0L{s*Olu^?q8x}#rqzyaoVB#B4g(81Jx};qlYMjj? z2dYy`yjW&t6?wYx0N9Uh!RRNLa}rLdMi`DPNP-P&$TYbGk_IqjZvxK0qQ|cbe@$>KA+XQo15<95gH&cGF>(8JUi?q#;jjQhX7>?$D_)jV*fSGa99hISqAgCirHOBRlz z5-9I|sR$Ai`Yfa3X$-2y?KsEHOGX z@HXt%cKK^%iU{@ItVyi`Mvy~M#VuYFejK)2%tD1XPotWR1C6g7^>_3=M{GBTR=x6} zL$ie9VdKIYD#p_7X2Ij5c!ACsYkPJWj&|WWp}5G;)cO~hDhd@kB;Qp{=JQuL+IA<+ z9G6K-0B_4%_wIgYeR?@~V~!zKJsytphBh&$h7$B()y02uDWc?iHKJ3Oou2*+MfMx^=gIf{X3|lWppAz z-I7p3R)QX!Idb;|10w?J`_#t4+9oPM5Ep6hKa7!-%b*Z%Ne=e{WH2)WCKzJBSF^U& zK`^`eM3i5TFVExdQsTLk;G@qq9{>}@N65UeiIk4 z@~k{rAI=Uv%!sgaYY$=o+l0!JKSDr9R$?^eCXzi64~|&0Ce%+awpg_DqW1T;JnT{d zru{VON@j0;c#A;BkSsr#?|iam4$XGUx$RTb)H_pMY<00{Yt{q!{n-}(G#x&{RZq!9 z?#T+_N>%`kP|_a@fmo2-OJrXX!d!y~G&Dn3dYM^vWSZJ!#jw;(nuR5VhC?^k`y8?h4xtX)+j?PU}@iN>ljD+(Qmb z8dniBZ(_g=D1mLVH)}(M5H0vcNC?Cv1(;-)C)j(xXY2nS+gLe%zZ*W^p1!|_zv=O} z_Q!+DzkQ#@*8b@JY?61J{_8LQ7Wa7yo_=DdhrF->(?ZCJE#uL2ybpcSMKqaVBwSv) zNU6k^_VtahV8JC~!U6{d!bq&!4p|_$D-~)<6%s3$x=9Tg-hN~ZTTL>hPndt*gp7cg z+wR+roDg$1Hu?=(-&o%#Ayf15RN!Z%05j!NTZ&r35#?&$A)o_Kq_+aG2Tmv-^Ti}kzPUMz}gW$fs9 zJC4FgjTw|ACp)ypVq7XolvK9GOTU706q(I(#Eu(UtE5jUBYT&==_1{Y?P%VlGrzMb zBE|_o9do|DW~w@PU%$xNzAis=G|wxF)AvA^EC+4@aDj~7z-#+Yu_mAdbYOG_n;S_f z2MF%vrdGcJHml~P2xvLQOIQJlnTe)5Z8JtNk{R2)nVMB@y3m5+0?^4LZPW(|mtd*? zDv;N1W_Rb+ea4+?lrAx`155T=*|7;B>>wiy;Ml)7#VrgvNDTg;;hzJ_HNI$RTTqf4 zfyMeU1R+7uY;dsXgq*(jmUH&D>*C?4NtOzLb-!$s%bXb_Sue0mN5$wkpEM3kv8IvGA=_?Zzv1%_C1tsyl^1@RKH{a zciZw1@6q0f3Zxbb;ED{RY}JFbgv`jo;vnjTP++40u_0!9X$&Jll(K*-u?XPE#-34@ z(2*iq168t)2LIl%)odnH8EI_Yl*{iaXWjeIC?DX z@F63H#+Z$15ur99k-Q~?3g|8Zc=x8AR4D_lS&{5(Wb(9OWn~6P1uWA@lOmW&8qFIU z7SBN;v4-l*AW>A(2|!UvMJG5MwJuL6M+i`eC_Z*#ky3?khBR#txTdfRi~`O|X#<>q zay;fL4M@CAa10!#6DOq2oZBZ*`QRkS_ry;K+IvB5vHgUgDqb| z3@cbx6TJE0H(;83Aleb%_>debXaSWl^D1it&Z*F@y=N{NvpYj~%SKvwWbjz}(2QH5 zzP2fmjwex!Q0DDKn@@wk;d+8XInO!CB-!he7tkDYlzvB5io3y3~eaI5w=k}~_n&-pi{RD^=|p|INIK%M7^Ju>U^q=8=L%d;bA7r& zav%4cJ$b-?7%3m*P6<7LL;|e%;!{W=n0z(DI%^o;Bei5F?M_70?60j?x99A# z3qM)=cXkVG2kmpPJ8oP#mdXvhiT3f-MRyJ&15Gf(d7`7!iiTN|-|lq+clXLmg&CS~L$9jF&hP0Bm8xWJJK=$OMFB>@>4P6z#Xn3dcffp+LAC z!`V2YWzCFixsCZ_7L;F_QL9T?&p+$c)*hPKS8gSO~VTFj|ro3G{Iuof@swuwn#16JUnNC5#N!PbpVLE zvff%F#RxrAU($v^EeV!2DVBm~xc6xi-$OudTP+k6K2v02fE5E45$L!=gi;#?e7UiT zMhuXoWDk1Yu>gn#4)=DU3)lONdeFJ7$d8PZ2g9kss?ltvQp=qrImnG5r+V3zF4fd( z=B;z~@$<2{hrSQ|A=lv6Q712!p!qTbzdD`k(bs>{2Pp3?yqCWFJqT zzsdD8a3Kh21F_vQhkQ6nV4J6$yI@VnB-^q@1Poy1V4pzHT>!|B!-$_&1xU~WX+}BS| zI+H9JIpulj0|c`K4h+Jv+i8{#sDpzsY3kK&nf6^uxxRKVpRY}nq>h`5p#2eSW1JCB zoQDl9g0bddzNVPmI`lIIC+n3Z-%)6 z)m1D@3`N7B@Sgj)AR36~a4}*AFBJoK>y5WL$p%MGX_9PAb!p#rxvRNmYjZFf_p^MM zS*F2;S$4gmw~WV5MV$7|80zk2IN^|p!(1CUj&QiGuRFLn?P!*xF?*d9(+V$}IXT1} zTha_h(muFR1txBW5H`^S1RVSVru%s#2xv)Akeo8AEDyF+y-8{s&ujwd5cgA z{vZA1IN}ss`1mWy37bexU#GAM?R5hGixGW*YN1F6 zNe^y$@Ot{b0s=j%3iJAec(;rNi2(`#p%tYeftrrHgl%C`y`dwy83`F7-YoY_ox@e$ zL&B&_2$DC2^*`wAL)aSaJF>S~WkZ7Y-Du;z{ zz=U=xW(E3CCn_nOid4kc4!;tEos&Z9t35>kz>s9yLG>gOM({gKT^#n%!cAcT@WPry3G z2V$fF(Bh#$fe>JI^|(v(OqCu~Gq|s3b?7=j5C#Z=5wJo%cpU|9AqFF9LZ;v@MFUXd zs2N){bdrWWIl=TxEeH;2AWyYeEYN~qcFD}6*o@`j5Unt1JJgj_YN^CRhB!bw5Oy*P z1DcHm8GutysIu}&!*1M~s!zPA&Oi}v=#?bEF!B<|)OQu(7P)!*MrromGcLP%0kRC2-f z-Ma&_=fdV8BrOEsXnl*Tw^mqWLHgJM!Kwz2*{PSy3 zu1y0}WCf==J8l%kErz3IvwaFAt0TVM@-$^1MCp&5TKidOUd1p zYf^#|AzIhOeK`tn48Ym)u!~-}_Q+l&S8a}MP zqt<;`1h6{%dl7fl4{{^4=5Z9~GklcT&KW~EA3Cu3&;?mZn=%p-SGX&lf>n(5j&iOY zYQ5cz$?+OcwiV^|gbZ zDl{t&8X}C%;tMPtc~k`|q$FrGfm%YMX(X@VI=uFI0&+ldLE%k6`SmVPkC_HWF>Jjk zUDTu0qvTmpsIKA3Q+lPhpdN}HV1^2u*J2-*FWMiW;5$l`j~ao*gwCP$RM5i#_C_Ex zH)zO<;CO6Q=Wwgery}Pl$N)(r*fN-s03O~z4|h@!W6)yYC(`*Uf$0mj(1PL>cZ3#Z z4TbDihB0%-2YpbP;Si{_nnei&VL3_(rX}Qwue;<9@ZzbMbgFgWaWzMX7*HysutU{s z6$yK$fP%s@0>K!oAjF959~anz@iKxw9tVi2%a|O!4?k>B;`D(%h5u(vcUQi+mcKd#qyCaVfaiWCOtfxqGP5gZ9C@~hnd`7h<>b=e_+%r-y-BvC8wl_K{AeW^0CX@V5&D!>>QsSU^i-+=i>^k-=!j-OGaIbfVpkOo zov0DlGYvzkWdki>W|%{31zZ$_CjdJbXwoesiiTpR&LP}<$G(Ags#wwbgBY)fP=_5T zQ4Xh%0B4=sfb<{^vtZvf!~pRs?Q2_tCyTUQVcvR>^r#ARkO%Guv=4>qj_B@o4jqbM zt7aXvQ8X%2q25s!VhE5U6mV4RggF8x`vC+b>2V@PWF#OH>&BrAM934Jfz;}KW^E!s zf_sHg^L}8s>!NOo>yjx%-@oa32x!;2(Ek5!w#C`ezx9s#*X(t`x1;-X6+ z7Tr_H3_VZlT{4sbK3ug=pGr!XjFT71U-2y+_{;q+T4bJi(b= zap}Jvh}lP|*qy&ou8QcP>pMaQngHZp=&Vk$3eem_f*S=$R8;1V4~ZCnf}#?r8iXVQ z0j{X_oxDt(h>0TcAk-a!;E`5M2WU_pz^PFx0iuHO=~{qY8>t!!C^m$obS{f0N*n5P z90a#3kBG3kQg+Y~XP`YbFs-J>`*$M_gdyA-C_|>(hkfw|+l<+w zf}KDpwJ=nwn0Cr`@CL~cY#kjD!*FzDfO#Xz58x^D4{2FCkP+ji120 z`AK?g`I(q&&XIvsdP8c|-6RwyD9PaUmZHX{h)-f%GL}>`AdodPFC>U>q}W4M$7Yrv zXn_GT_PIBf+CkyE8OEjAE9XU38OMGBuvkEl2#MCGQP6b&cqsd};rC<;+96%VZpibB za5i6A1WKt|DOYLN0p46>D$r5;!~>XE;UL^8L$+U03KV+NtWE_Trs&B5hj{M;05y_z( zIB@{aYt}fs#2_Ui8xH+<1|!JJl0XMS2m}_23jO|IA2?LhL%8il_4O)!cmxPzpqfGY zc__a@$5HV6afV(GYNQppH5ZI_4QL$Cz*M(x41|T~>WW%5dc!&O=BaR(D8S87Uc(Wb z4GAcqP$MD;MhMW2k||&T_-I1Sc7quRcUP6)AC5{qas)meG;bs!BEU|AAWLmX=fy+6 z$p=H}qdcCy^{S?BA;kyA^aH=II~+xSl}Oz`ntcF$U;6-Xk#{x0N-aG@bNDK>cMb(tan literal 0 HcmV?d00001 From a640afa995a7be890d1753085e568b58f9e449d2 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 27 Sep 2012 11:09:41 -0400 Subject: [PATCH 11/90] adding some directories to gitignore --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index 8623fa076..927caf98d 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,8 @@ queueScatterGather /bar* integrationtests/ public/testdata/onTheFlyOutputTest.vcf +build/ +dist/ +dump/ +lib/ +out/ From dca7c7fa9cb183a57a08952147847ad97f334cd4 Mon Sep 17 00:00:00 2001 From: Kristian Cibulskis Date: Wed, 3 Oct 2012 16:25:34 -0400 Subject: [PATCH 14/90] initial cancer pipeline with mutations and partial indel support --- .../queue/extensions/cancer/MuTect.scala | 378 ++++++++++++++++++ 1 file changed, 378 insertions(+) create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala new file mode 100644 index 000000000..623d397d4 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala @@ -0,0 +1,378 @@ +package org.broadinstitute.sting.queue.extensions.cancer + +import java.io.File +import org.broadinstitute.sting.commandline.Argument +import org.broadinstitute.sting.commandline.Gather +import org.broadinstitute.sting.commandline.Input +import org.broadinstitute.sting.commandline.Output +import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction +import org.broadinstitute.sting.queue.extensions.gatk.{LocusScatterFunction, TaggedFile} + +class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineGATK with ScatterGatherableFunction { + analysisName = "MuTect" + analysis_type = "MuTect" + scatterClass = classOf[LocusScatterFunction] + + /** used for debugging, basically exit as soon as we get the reads */ + @Argument(fullName="noop", shortName="", doc="used for debugging, basically exit as soon as we get the reads", required=false, exclusiveOf="", validation="") + var noop: Boolean = _ + + /** add many additional columns of statistics to the output file */ + @Argument(fullName="enable_extended_output", shortName="", doc="add many additional columns of statistics to the output file", required=false, exclusiveOf="", validation="") + var enable_extended_output: Boolean = _ + + /** used when running the caller on a normal (as if it were a tumor) to detect artifacts */ + @Argument(fullName="artifact_detection_mode", shortName="", doc="used when running the caller on a normal (as if it were a tumor) to detect artifacts", required=false, exclusiveOf="", validation="") + var artifact_detection_mode: Boolean = _ + + /** name to use for tumor in output files */ + @Argument(fullName="tumor_sample_name", shortName="", doc="name to use for tumor in output files", required=false, exclusiveOf="", validation="") + var tumor_sample_name: String = _ + + /** if the tumor bam contains multiple samples, only use read groups with SM equal to this value */ + @Argument(fullName="bam_tumor_sample_name", shortName="", doc="if the tumor bam contains multiple samples, only use read groups with SM equal to this value", required=false, exclusiveOf="", validation="") + var bam_tumor_sample_name: String = _ + + /** name to use for normal in output files */ + @Argument(fullName="normal_sample_name", shortName="", doc="name to use for normal in output files", required=false, exclusiveOf="", validation="") + var normal_sample_name: String = _ + + /** force output for each site */ + @Argument(fullName="force_output", shortName="", doc="force output for each site", required=false, exclusiveOf="", validation="") + var force_output: Boolean = _ + + /** force output for all alleles at each site */ + @Argument(fullName="force_alleles", shortName="", doc="force output for all alleles at each site", required=false, exclusiveOf="", validation="") + var force_alleles: Boolean = _ + + /** Initial LOD threshold for calling tumor variant */ + @Argument(fullName="initial_tumor_lod", shortName="", doc="Initial LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") + var initial_tumor_lod: Option[Float] = None + + /** Format string for initial_tumor_lod */ + @Argument(fullName="initial_tumor_lodFormat", shortName="", doc="Format string for initial_tumor_lod", required=false, exclusiveOf="", validation="") + var initial_tumor_lodFormat: String = "%s" + + /** LOD threshold for calling tumor variant */ + @Argument(fullName="tumor_lod", shortName="", doc="LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") + var tumor_lod: Option[Float] = None + + /** Format string for tumor_lod */ + @Argument(fullName="tumor_lodFormat", shortName="", doc="Format string for tumor_lod", required=false, exclusiveOf="", validation="") + var tumor_lodFormat: String = "%s" + + /** estimate of fraction (0-1) of physical contamination with other unrelated samples */ + @Argument(fullName="fraction_contamination", shortName="", doc="estimate of fraction (0-1) of physical contamination with other unrelated samples", required=false, exclusiveOf="", validation="") + var fraction_contamination: Option[Float] = None + + /** Format string for fraction_contamination */ + @Argument(fullName="fraction_contaminationFormat", shortName="", doc="Format string for fraction_contamination", required=false, exclusiveOf="", validation="") + var fraction_contaminationFormat: String = "%s" + + /** minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination */ + @Argument(fullName="minimum_mutation_cell_fraction", shortName="", doc="minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination", required=false, exclusiveOf="", validation="") + var minimum_mutation_cell_fraction: Option[Float] = None + + /** Format string for minimum_mutation_cell_fraction */ + @Argument(fullName="minimum_mutation_cell_fractionFormat", shortName="", doc="Format string for minimum_mutation_cell_fraction", required=false, exclusiveOf="", validation="") + var minimum_mutation_cell_fractionFormat: String = "%s" + + /** LOD threshold for calling normal non-germline */ + @Argument(fullName="normal_lod", shortName="", doc="LOD threshold for calling normal non-germline", required=false, exclusiveOf="", validation="") + var normal_lod: Option[Float] = None + + /** Format string for normal_lod */ + @Argument(fullName="normal_lodFormat", shortName="", doc="Format string for normal_lod", required=false, exclusiveOf="", validation="") + var normal_lodFormat: String = "%s" + + /** LOD threshold for calling normal non-variant */ + @Argument(fullName="normal_artifact_lod", shortName="", doc="LOD threshold for calling normal non-variant", required=false, exclusiveOf="", validation="") + var normal_artifact_lod: Option[Float] = None + + /** Format string for normal_artifact_lod */ + @Argument(fullName="normal_artifact_lodFormat", shortName="", doc="Format string for normal_artifact_lod", required=false, exclusiveOf="", validation="") + var normal_artifact_lodFormat: String = "%s" + + /** LOD threshold for calling strand bias */ + @Argument(fullName="strand_artifact_lod", shortName="", doc="LOD threshold for calling strand bias", required=false, exclusiveOf="", validation="") + var strand_artifact_lod: Option[Float] = None + + /** Format string for strand_artifact_lod */ + @Argument(fullName="strand_artifact_lodFormat", shortName="", doc="Format string for strand_artifact_lod", required=false, exclusiveOf="", validation="") + var strand_artifact_lodFormat: String = "%s" + + /** power threshold for calling strand bias */ + @Argument(fullName="strand_artifact_power_threshold", shortName="", doc="power threshold for calling strand bias", required=false, exclusiveOf="", validation="") + var strand_artifact_power_threshold: Option[Float] = None + + /** Format string for strand_artifact_power_threshold */ + @Argument(fullName="strand_artifact_power_thresholdFormat", shortName="", doc="Format string for strand_artifact_power_threshold", required=false, exclusiveOf="", validation="") + var strand_artifact_power_thresholdFormat: String = "%s" + + /** LOD threshold for calling normal non-variant at dbsnp sites */ + @Argument(fullName="dbsnp_normal_lod", shortName="", doc="LOD threshold for calling normal non-variant at dbsnp sites", required=false, exclusiveOf="", validation="") + var dbsnp_normal_lod: Option[Float] = None + + /** Format string for dbsnp_normal_lod */ + @Argument(fullName="dbsnp_normal_lodFormat", shortName="", doc="Format string for dbsnp_normal_lod", required=false, exclusiveOf="", validation="") + var dbsnp_normal_lodFormat: String = "%s" + + /** Power threshold for normal to determine germline vs variant */ + @Argument(fullName="somatic_classification_normal_power_threshold", shortName="", doc="Power threshold for normal to determine germline vs variant", required=false, exclusiveOf="", validation="") + var somatic_classification_normal_power_threshold: Option[Float] = None + + /** Format string for somatic_classification_normal_power_threshold */ + @Argument(fullName="somatic_classification_normal_power_thresholdFormat", shortName="", doc="Format string for somatic_classification_normal_power_threshold", required=false, exclusiveOf="", validation="") + var somatic_classification_normal_power_thresholdFormat: String = "%s" + + /** minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor */ + @Argument(fullName="minimum_normal_allele_fraction", shortName="", doc="minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor", required=false, exclusiveOf="", validation="") + var minimum_normal_allele_fraction: Option[Float] = None + + /** Format string for minimum_normal_allele_fraction */ + @Argument(fullName="minimum_normal_allele_fractionFormat", shortName="", doc="Format string for minimum_normal_allele_fraction", required=false, exclusiveOf="", validation="") + var minimum_normal_allele_fractionFormat: String = "%s" + + /** for computational efficiency, reject sites with allelic fraction below this threshold */ + @Argument(fullName="tumor_f_pretest", shortName="", doc="for computational efficiency, reject sites with allelic fraction below this threshold", required=false, exclusiveOf="", validation="") + var tumor_f_pretest: Option[Float] = None + + /** Format string for tumor_f_pretest */ + @Argument(fullName="tumor_f_pretestFormat", shortName="", doc="Format string for tumor_f_pretest", required=false, exclusiveOf="", validation="") + var tumor_f_pretestFormat: String = "%s" + + /** threshold for minimum base quality score */ + @Argument(fullName="min_qscore", shortName="", doc="threshold for minimum base quality score", required=false, exclusiveOf="", validation="") + var min_qscore: Option[Int] = None + + /** how many gapped events (ins/del) are allowed in proximity to this candidate */ + @Argument(fullName="gap_events_threshold", shortName="", doc="how many gapped events (ins/del) are allowed in proximity to this candidate", required=false, exclusiveOf="", validation="") + var gap_events_threshold: Option[Int] = None + + /** if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling */ + @Argument(fullName="heavily_clipped_read_fraction", shortName="", doc="if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling", required=false, exclusiveOf="", validation="") + var heavily_clipped_read_fraction: Option[Float] = None + + /** Format string for heavily_clipped_read_fraction */ + @Argument(fullName="heavily_clipped_read_fractionFormat", shortName="", doc="Format string for heavily_clipped_read_fraction", required=false, exclusiveOf="", validation="") + var heavily_clipped_read_fractionFormat: String = "%s" + + /** pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads */ + @Argument(fullName="clipping_bias_pvalue_threshold", shortName="", doc="pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads", required=false, exclusiveOf="", validation="") + var clipping_bias_pvalue_threshold: Option[Float] = None + + /** Format string for clipping_bias_pvalue_threshold */ + @Argument(fullName="clipping_bias_pvalue_thresholdFormat", shortName="", doc="Format string for clipping_bias_pvalue_threshold", required=false, exclusiveOf="", validation="") + var clipping_bias_pvalue_thresholdFormat: String = "%s" + + /** threshold for determining if there is relatedness between the alt and ref allele read piles */ + @Argument(fullName="fraction_mapq0_threshold", shortName="", doc="threshold for determining if there is relatedness between the alt and ref allele read piles", required=false, exclusiveOf="", validation="") + var fraction_mapq0_threshold: Option[Float] = None + + /** Format string for fraction_mapq0_threshold */ + @Argument(fullName="fraction_mapq0_thresholdFormat", shortName="", doc="Format string for fraction_mapq0_threshold", required=false, exclusiveOf="", validation="") + var fraction_mapq0_thresholdFormat: String = "%s" + + /** threshold for clustered read position artifact median */ + @Argument(fullName="pir_median_threshold", shortName="", doc="threshold for clustered read position artifact median", required=false, exclusiveOf="", validation="") + var pir_median_threshold: Option[Double] = None + + /** Format string for pir_median_threshold */ + @Argument(fullName="pir_median_thresholdFormat", shortName="", doc="Format string for pir_median_threshold", required=false, exclusiveOf="", validation="") + var pir_median_thresholdFormat: String = "%s" + + /** threshold for clustered read position artifact MAD */ + @Argument(fullName="pir_mad_threshold", shortName="", doc="threshold for clustered read position artifact MAD", required=false, exclusiveOf="", validation="") + var pir_mad_threshold: Option[Double] = None + + /** Format string for pir_mad_threshold */ + @Argument(fullName="pir_mad_thresholdFormat", shortName="", doc="Format string for pir_mad_threshold", required=false, exclusiveOf="", validation="") + var pir_mad_thresholdFormat: String = "%s" + + /** required minimum value for tumor alt allele maximum mapping quality score */ + @Argument(fullName="required_maximum_alt_allele_mapping_quality_score", shortName="", doc="required minimum value for tumor alt allele maximum mapping quality score", required=false, exclusiveOf="", validation="") + var required_maximum_alt_allele_mapping_quality_score: Option[Int] = None + + /** threshold for maximum alternate allele counts in normal */ + @Argument(fullName="max_alt_alleles_in_normal_count", shortName="", doc="threshold for maximum alternate allele counts in normal", required=false, exclusiveOf="", validation="") + var max_alt_alleles_in_normal_count: Option[Int] = None + + /** threshold for maximum alternate allele quality score sum in normal */ + @Argument(fullName="max_alt_alleles_in_normal_qscore_sum", shortName="", doc="threshold for maximum alternate allele quality score sum in normal", required=false, exclusiveOf="", validation="") + var max_alt_alleles_in_normal_qscore_sum: Option[Int] = None + + /** threshold for maximum alternate allele fraction in normal */ + @Argument(fullName="max_alt_allele_in_normal_fraction", shortName="", doc="threshold for maximum alternate allele fraction in normal", required=false, exclusiveOf="", validation="") + var max_alt_allele_in_normal_fraction: Option[Double] = None + + /** Format string for max_alt_allele_in_normal_fraction */ + @Argument(fullName="max_alt_allele_in_normal_fractionFormat", shortName="", doc="Format string for max_alt_allele_in_normal_fraction", required=false, exclusiveOf="", validation="") + var max_alt_allele_in_normal_fractionFormat: String = "%s" + + /** Phred scale quality score constant to use in power calculations */ + @Argument(fullName="power_constant_qscore", shortName="", doc="Phred scale quality score constant to use in power calculations", required=false, exclusiveOf="", validation="") + var power_constant_qscore: Option[Int] = None + + /** Absolute Copy Number Data, as defined by Absolute, to use in power calculations */ + @Argument(fullName="absolute_copy_number_data", shortName="", doc="Absolute Copy Number Data, as defined by Absolute, to use in power calculations", required=false, exclusiveOf="", validation="") + var absolute_copy_number_data: File = _ + + /** Allelic fraction constant to use in power calculations */ + @Argument(fullName="power_constant_af", shortName="", doc="Allelic fraction constant to use in power calculations", required=false, exclusiveOf="", validation="") + var power_constant_af: Option[Double] = None + + /** Format string for power_constant_af */ + @Argument(fullName="power_constant_afFormat", shortName="", doc="Format string for power_constant_af", required=false, exclusiveOf="", validation="") + var power_constant_afFormat: String = "%s" + + /** Call-stats output */ + @Output(fullName="out", shortName="o", doc="Call-stats output", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var out: File = _ + + /** + * Short name of out + * @return Short name of out + */ + def o = this.out + + /** + * Short name of out + * @param value Short name of out + */ + def o_=(value: File) { this.out = value } + + /** VCF file of DBSNP information */ + @Input(fullName="dbsnp", shortName="dbsnp", doc="VCF file of DBSNP information", required=false, exclusiveOf="", validation="") + var dbsnp: Seq[File] = Nil + + /** Dependencies on any indexes of dbsnp */ + @Input(fullName="dbsnpIndexes", shortName="", doc="Dependencies on any indexes of dbsnp", required=false, exclusiveOf="", validation="") + private var dbsnpIndexes: Seq[File] = Nil + + /** VCF file of COSMIC sites */ + @Input(fullName="cosmic", shortName="cosmic", doc="VCF file of COSMIC sites", required=false, exclusiveOf="", validation="") + var cosmic: Seq[File] = Nil + + /** Dependencies on any indexes of cosmic */ + @Input(fullName="cosmicIndexes", shortName="", doc="Dependencies on any indexes of cosmic", required=false, exclusiveOf="", validation="") + private var cosmicIndexes: Seq[File] = Nil + + /** VCF file of sites observed in normal */ + @Input(fullName="normal_panel", shortName="normal_panel", doc="VCF file of sites observed in normal", required=false, exclusiveOf="", validation="") + var normal_panel: Seq[File] = Nil + + /** Dependencies on any indexes of normal_panel */ + @Input(fullName="normal_panelIndexes", shortName="", doc="Dependencies on any indexes of normal_panel", required=false, exclusiveOf="", validation="") + private var normal_panelIndexes: Seq[File] = Nil + + /** write out coverage in WIGGLE format to this file */ + @Output(fullName="coverage_file", shortName="cov", doc="write out coverage in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var coverage_file: File = _ + + /** + * Short name of coverage_file + * @return Short name of coverage_file + */ + def cov = this.coverage_file + + /** + * Short name of coverage_file + * @param value Short name of coverage_file + */ + def cov_=(value: File) { this.coverage_file = value } + + /** write out 20x of Q20 coverage in WIGGLE format to this file */ + @Output(fullName="coverage_20_q20_file", shortName="cov_q20", doc="write out 20x of Q20 coverage in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var coverage_20_q20_file: File = _ + + /** + * Short name of coverage_20_q20_file + * @return Short name of coverage_20_q20_file + */ + def cov_q20 = this.coverage_20_q20_file + + /** + * Short name of coverage_20_q20_file + * @param value Short name of coverage_20_q20_file + */ + def cov_q20_=(value: File) { this.coverage_20_q20_file = value } + + /** write out power in WIGGLE format to this file */ + @Output(fullName="power_file", shortName="pow", doc="write out power in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var power_file: File = _ + + /** + * Short name of power_file + * @return Short name of power_file + */ + def pow = this.power_file + + /** + * Short name of power_file + * @param value Short name of power_file + */ + def pow_=(value: File) { this.power_file = value } + + /** write out tumor read depth in WIGGLE format to this file */ + @Output(fullName="tumor_depth_file", shortName="tdf", doc="write out tumor read depth in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var tumor_depth_file: File = _ + + /** + * Short name of tumor_depth_file + * @return Short name of tumor_depth_file + */ + def tdf = this.tumor_depth_file + + /** + * Short name of tumor_depth_file + * @param value Short name of tumor_depth_file + */ + def tdf_=(value: File) { this.tumor_depth_file = value } + + /** write out normal read depth in WIGGLE format to this file */ + @Output(fullName="normal_depth_file", shortName="ndf", doc="write out normal read depth in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var normal_depth_file: File = _ + + /** + * Short name of normal_depth_file + * @return Short name of normal_depth_file + */ + def ndf = this.normal_depth_file + + /** + * Short name of normal_depth_file + * @param value Short name of normal_depth_file + */ + def ndf_=(value: File) { this.normal_depth_file = value } + + /** if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up. */ + @Argument(fullName="filter_mismatching_base_and_quals", shortName="filterMBQ", doc="if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up.", required=false, exclusiveOf="", validation="") + var filter_mismatching_base_and_quals: Boolean = _ + + /** + * Short name of filter_mismatching_base_and_quals + * @return Short name of filter_mismatching_base_and_quals + */ + def filterMBQ = this.filter_mismatching_base_and_quals + + /** + * Short name of filter_mismatching_base_and_quals + * @param value Short name of filter_mismatching_base_and_quals + */ + def filterMBQ_=(value: Boolean) { this.filter_mismatching_base_and_quals = value } + + override def freezeFieldValues() { + super.freezeFieldValues() + dbsnpIndexes ++= dbsnp.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + cosmicIndexes ++= cosmic.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + normal_panelIndexes ++= normal_panel.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + } + + override def commandLine = super.commandLine + conditional(noop, "--noop", escape=true, format="%s") + conditional(enable_extended_output, "--enable_extended_output", escape=true, format="%s") + conditional(artifact_detection_mode, "--artifact_detection_mode", escape=true, format="%s") + optional("--tumor_sample_name", tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--bam_tumor_sample_name", bam_tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--normal_sample_name", normal_sample_name, spaceSeparated=true, escape=true, format="%s") + conditional(force_output, "--force_output", escape=true, format="%s") + conditional(force_alleles, "--force_alleles", escape=true, format="%s") + optional("--initial_tumor_lod", initial_tumor_lod, spaceSeparated=true, escape=true, format=initial_tumor_lodFormat) + optional("--tumor_lod", tumor_lod, spaceSeparated=true, escape=true, format=tumor_lodFormat) + optional("--fraction_contamination", fraction_contamination, spaceSeparated=true, escape=true, format=fraction_contaminationFormat) + optional("--minimum_mutation_cell_fraction", minimum_mutation_cell_fraction, spaceSeparated=true, escape=true, format=minimum_mutation_cell_fractionFormat) + optional("--normal_lod", normal_lod, spaceSeparated=true, escape=true, format=normal_lodFormat) + optional("--normal_artifact_lod", normal_artifact_lod, spaceSeparated=true, escape=true, format=normal_artifact_lodFormat) + optional("--strand_artifact_lod", strand_artifact_lod, spaceSeparated=true, escape=true, format=strand_artifact_lodFormat) + optional("--strand_artifact_power_threshold", strand_artifact_power_threshold, spaceSeparated=true, escape=true, format=strand_artifact_power_thresholdFormat) + optional("--dbsnp_normal_lod", dbsnp_normal_lod, spaceSeparated=true, escape=true, format=dbsnp_normal_lodFormat) + optional("--somatic_classification_normal_power_threshold", somatic_classification_normal_power_threshold, spaceSeparated=true, escape=true, format=somatic_classification_normal_power_thresholdFormat) + optional("--minimum_normal_allele_fraction", minimum_normal_allele_fraction, spaceSeparated=true, escape=true, format=minimum_normal_allele_fractionFormat) + optional("--tumor_f_pretest", tumor_f_pretest, spaceSeparated=true, escape=true, format=tumor_f_pretestFormat) + optional("--min_qscore", min_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--gap_events_threshold", gap_events_threshold, spaceSeparated=true, escape=true, format="%s") + optional("--heavily_clipped_read_fraction", heavily_clipped_read_fraction, spaceSeparated=true, escape=true, format=heavily_clipped_read_fractionFormat) + optional("--clipping_bias_pvalue_threshold", clipping_bias_pvalue_threshold, spaceSeparated=true, escape=true, format=clipping_bias_pvalue_thresholdFormat) + optional("--fraction_mapq0_threshold", fraction_mapq0_threshold, spaceSeparated=true, escape=true, format=fraction_mapq0_thresholdFormat) + optional("--pir_median_threshold", pir_median_threshold, spaceSeparated=true, escape=true, format=pir_median_thresholdFormat) + optional("--pir_mad_threshold", pir_mad_threshold, spaceSeparated=true, escape=true, format=pir_mad_thresholdFormat) + optional("--required_maximum_alt_allele_mapping_quality_score", required_maximum_alt_allele_mapping_quality_score, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_count", max_alt_alleles_in_normal_count, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_qscore_sum", max_alt_alleles_in_normal_qscore_sum, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_allele_in_normal_fraction", max_alt_allele_in_normal_fraction, spaceSeparated=true, escape=true, format=max_alt_allele_in_normal_fractionFormat) + optional("--power_constant_qscore", power_constant_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--absolute_copy_number_data", absolute_copy_number_data, spaceSeparated=true, escape=true, format="%s") + optional("--power_constant_af", power_constant_af, spaceSeparated=true, escape=true, format=power_constant_afFormat) + optional("-o", out, spaceSeparated=true, escape=true, format="%s") + repeat("-dbsnp", dbsnp, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-cosmic", cosmic, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-normal_panel", normal_panel, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + optional("-cov", coverage_file, spaceSeparated=true, escape=true, format="%s") + optional("-cov_q20", coverage_20_q20_file, spaceSeparated=true, escape=true, format="%s") + optional("-pow", power_file, spaceSeparated=true, escape=true, format="%s") + optional("-tdf", tumor_depth_file, spaceSeparated=true, escape=true, format="%s") + optional("-ndf", normal_depth_file, spaceSeparated=true, escape=true, format="%s") + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape=true, format="%s") +} From 0afde9906a1c043d3644e47e45603b9bf9e6a382 Mon Sep 17 00:00:00 2001 From: Kristian Cibulskis Date: Wed, 3 Oct 2012 16:25:34 -0400 Subject: [PATCH 16/90] initial cancer pipeline with mutations and partial indel support --- .../queue/extensions/cancer/MuTect.scala | 378 ++++++++++++++++++ 1 file changed, 378 insertions(+) create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala new file mode 100644 index 000000000..623d397d4 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala @@ -0,0 +1,378 @@ +package org.broadinstitute.sting.queue.extensions.cancer + +import java.io.File +import org.broadinstitute.sting.commandline.Argument +import org.broadinstitute.sting.commandline.Gather +import org.broadinstitute.sting.commandline.Input +import org.broadinstitute.sting.commandline.Output +import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction +import org.broadinstitute.sting.queue.extensions.gatk.{LocusScatterFunction, TaggedFile} + +class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineGATK with ScatterGatherableFunction { + analysisName = "MuTect" + analysis_type = "MuTect" + scatterClass = classOf[LocusScatterFunction] + + /** used for debugging, basically exit as soon as we get the reads */ + @Argument(fullName="noop", shortName="", doc="used for debugging, basically exit as soon as we get the reads", required=false, exclusiveOf="", validation="") + var noop: Boolean = _ + + /** add many additional columns of statistics to the output file */ + @Argument(fullName="enable_extended_output", shortName="", doc="add many additional columns of statistics to the output file", required=false, exclusiveOf="", validation="") + var enable_extended_output: Boolean = _ + + /** used when running the caller on a normal (as if it were a tumor) to detect artifacts */ + @Argument(fullName="artifact_detection_mode", shortName="", doc="used when running the caller on a normal (as if it were a tumor) to detect artifacts", required=false, exclusiveOf="", validation="") + var artifact_detection_mode: Boolean = _ + + /** name to use for tumor in output files */ + @Argument(fullName="tumor_sample_name", shortName="", doc="name to use for tumor in output files", required=false, exclusiveOf="", validation="") + var tumor_sample_name: String = _ + + /** if the tumor bam contains multiple samples, only use read groups with SM equal to this value */ + @Argument(fullName="bam_tumor_sample_name", shortName="", doc="if the tumor bam contains multiple samples, only use read groups with SM equal to this value", required=false, exclusiveOf="", validation="") + var bam_tumor_sample_name: String = _ + + /** name to use for normal in output files */ + @Argument(fullName="normal_sample_name", shortName="", doc="name to use for normal in output files", required=false, exclusiveOf="", validation="") + var normal_sample_name: String = _ + + /** force output for each site */ + @Argument(fullName="force_output", shortName="", doc="force output for each site", required=false, exclusiveOf="", validation="") + var force_output: Boolean = _ + + /** force output for all alleles at each site */ + @Argument(fullName="force_alleles", shortName="", doc="force output for all alleles at each site", required=false, exclusiveOf="", validation="") + var force_alleles: Boolean = _ + + /** Initial LOD threshold for calling tumor variant */ + @Argument(fullName="initial_tumor_lod", shortName="", doc="Initial LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") + var initial_tumor_lod: Option[Float] = None + + /** Format string for initial_tumor_lod */ + @Argument(fullName="initial_tumor_lodFormat", shortName="", doc="Format string for initial_tumor_lod", required=false, exclusiveOf="", validation="") + var initial_tumor_lodFormat: String = "%s" + + /** LOD threshold for calling tumor variant */ + @Argument(fullName="tumor_lod", shortName="", doc="LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") + var tumor_lod: Option[Float] = None + + /** Format string for tumor_lod */ + @Argument(fullName="tumor_lodFormat", shortName="", doc="Format string for tumor_lod", required=false, exclusiveOf="", validation="") + var tumor_lodFormat: String = "%s" + + /** estimate of fraction (0-1) of physical contamination with other unrelated samples */ + @Argument(fullName="fraction_contamination", shortName="", doc="estimate of fraction (0-1) of physical contamination with other unrelated samples", required=false, exclusiveOf="", validation="") + var fraction_contamination: Option[Float] = None + + /** Format string for fraction_contamination */ + @Argument(fullName="fraction_contaminationFormat", shortName="", doc="Format string for fraction_contamination", required=false, exclusiveOf="", validation="") + var fraction_contaminationFormat: String = "%s" + + /** minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination */ + @Argument(fullName="minimum_mutation_cell_fraction", shortName="", doc="minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination", required=false, exclusiveOf="", validation="") + var minimum_mutation_cell_fraction: Option[Float] = None + + /** Format string for minimum_mutation_cell_fraction */ + @Argument(fullName="minimum_mutation_cell_fractionFormat", shortName="", doc="Format string for minimum_mutation_cell_fraction", required=false, exclusiveOf="", validation="") + var minimum_mutation_cell_fractionFormat: String = "%s" + + /** LOD threshold for calling normal non-germline */ + @Argument(fullName="normal_lod", shortName="", doc="LOD threshold for calling normal non-germline", required=false, exclusiveOf="", validation="") + var normal_lod: Option[Float] = None + + /** Format string for normal_lod */ + @Argument(fullName="normal_lodFormat", shortName="", doc="Format string for normal_lod", required=false, exclusiveOf="", validation="") + var normal_lodFormat: String = "%s" + + /** LOD threshold for calling normal non-variant */ + @Argument(fullName="normal_artifact_lod", shortName="", doc="LOD threshold for calling normal non-variant", required=false, exclusiveOf="", validation="") + var normal_artifact_lod: Option[Float] = None + + /** Format string for normal_artifact_lod */ + @Argument(fullName="normal_artifact_lodFormat", shortName="", doc="Format string for normal_artifact_lod", required=false, exclusiveOf="", validation="") + var normal_artifact_lodFormat: String = "%s" + + /** LOD threshold for calling strand bias */ + @Argument(fullName="strand_artifact_lod", shortName="", doc="LOD threshold for calling strand bias", required=false, exclusiveOf="", validation="") + var strand_artifact_lod: Option[Float] = None + + /** Format string for strand_artifact_lod */ + @Argument(fullName="strand_artifact_lodFormat", shortName="", doc="Format string for strand_artifact_lod", required=false, exclusiveOf="", validation="") + var strand_artifact_lodFormat: String = "%s" + + /** power threshold for calling strand bias */ + @Argument(fullName="strand_artifact_power_threshold", shortName="", doc="power threshold for calling strand bias", required=false, exclusiveOf="", validation="") + var strand_artifact_power_threshold: Option[Float] = None + + /** Format string for strand_artifact_power_threshold */ + @Argument(fullName="strand_artifact_power_thresholdFormat", shortName="", doc="Format string for strand_artifact_power_threshold", required=false, exclusiveOf="", validation="") + var strand_artifact_power_thresholdFormat: String = "%s" + + /** LOD threshold for calling normal non-variant at dbsnp sites */ + @Argument(fullName="dbsnp_normal_lod", shortName="", doc="LOD threshold for calling normal non-variant at dbsnp sites", required=false, exclusiveOf="", validation="") + var dbsnp_normal_lod: Option[Float] = None + + /** Format string for dbsnp_normal_lod */ + @Argument(fullName="dbsnp_normal_lodFormat", shortName="", doc="Format string for dbsnp_normal_lod", required=false, exclusiveOf="", validation="") + var dbsnp_normal_lodFormat: String = "%s" + + /** Power threshold for normal to determine germline vs variant */ + @Argument(fullName="somatic_classification_normal_power_threshold", shortName="", doc="Power threshold for normal to determine germline vs variant", required=false, exclusiveOf="", validation="") + var somatic_classification_normal_power_threshold: Option[Float] = None + + /** Format string for somatic_classification_normal_power_threshold */ + @Argument(fullName="somatic_classification_normal_power_thresholdFormat", shortName="", doc="Format string for somatic_classification_normal_power_threshold", required=false, exclusiveOf="", validation="") + var somatic_classification_normal_power_thresholdFormat: String = "%s" + + /** minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor */ + @Argument(fullName="minimum_normal_allele_fraction", shortName="", doc="minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor", required=false, exclusiveOf="", validation="") + var minimum_normal_allele_fraction: Option[Float] = None + + /** Format string for minimum_normal_allele_fraction */ + @Argument(fullName="minimum_normal_allele_fractionFormat", shortName="", doc="Format string for minimum_normal_allele_fraction", required=false, exclusiveOf="", validation="") + var minimum_normal_allele_fractionFormat: String = "%s" + + /** for computational efficiency, reject sites with allelic fraction below this threshold */ + @Argument(fullName="tumor_f_pretest", shortName="", doc="for computational efficiency, reject sites with allelic fraction below this threshold", required=false, exclusiveOf="", validation="") + var tumor_f_pretest: Option[Float] = None + + /** Format string for tumor_f_pretest */ + @Argument(fullName="tumor_f_pretestFormat", shortName="", doc="Format string for tumor_f_pretest", required=false, exclusiveOf="", validation="") + var tumor_f_pretestFormat: String = "%s" + + /** threshold for minimum base quality score */ + @Argument(fullName="min_qscore", shortName="", doc="threshold for minimum base quality score", required=false, exclusiveOf="", validation="") + var min_qscore: Option[Int] = None + + /** how many gapped events (ins/del) are allowed in proximity to this candidate */ + @Argument(fullName="gap_events_threshold", shortName="", doc="how many gapped events (ins/del) are allowed in proximity to this candidate", required=false, exclusiveOf="", validation="") + var gap_events_threshold: Option[Int] = None + + /** if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling */ + @Argument(fullName="heavily_clipped_read_fraction", shortName="", doc="if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling", required=false, exclusiveOf="", validation="") + var heavily_clipped_read_fraction: Option[Float] = None + + /** Format string for heavily_clipped_read_fraction */ + @Argument(fullName="heavily_clipped_read_fractionFormat", shortName="", doc="Format string for heavily_clipped_read_fraction", required=false, exclusiveOf="", validation="") + var heavily_clipped_read_fractionFormat: String = "%s" + + /** pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads */ + @Argument(fullName="clipping_bias_pvalue_threshold", shortName="", doc="pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads", required=false, exclusiveOf="", validation="") + var clipping_bias_pvalue_threshold: Option[Float] = None + + /** Format string for clipping_bias_pvalue_threshold */ + @Argument(fullName="clipping_bias_pvalue_thresholdFormat", shortName="", doc="Format string for clipping_bias_pvalue_threshold", required=false, exclusiveOf="", validation="") + var clipping_bias_pvalue_thresholdFormat: String = "%s" + + /** threshold for determining if there is relatedness between the alt and ref allele read piles */ + @Argument(fullName="fraction_mapq0_threshold", shortName="", doc="threshold for determining if there is relatedness between the alt and ref allele read piles", required=false, exclusiveOf="", validation="") + var fraction_mapq0_threshold: Option[Float] = None + + /** Format string for fraction_mapq0_threshold */ + @Argument(fullName="fraction_mapq0_thresholdFormat", shortName="", doc="Format string for fraction_mapq0_threshold", required=false, exclusiveOf="", validation="") + var fraction_mapq0_thresholdFormat: String = "%s" + + /** threshold for clustered read position artifact median */ + @Argument(fullName="pir_median_threshold", shortName="", doc="threshold for clustered read position artifact median", required=false, exclusiveOf="", validation="") + var pir_median_threshold: Option[Double] = None + + /** Format string for pir_median_threshold */ + @Argument(fullName="pir_median_thresholdFormat", shortName="", doc="Format string for pir_median_threshold", required=false, exclusiveOf="", validation="") + var pir_median_thresholdFormat: String = "%s" + + /** threshold for clustered read position artifact MAD */ + @Argument(fullName="pir_mad_threshold", shortName="", doc="threshold for clustered read position artifact MAD", required=false, exclusiveOf="", validation="") + var pir_mad_threshold: Option[Double] = None + + /** Format string for pir_mad_threshold */ + @Argument(fullName="pir_mad_thresholdFormat", shortName="", doc="Format string for pir_mad_threshold", required=false, exclusiveOf="", validation="") + var pir_mad_thresholdFormat: String = "%s" + + /** required minimum value for tumor alt allele maximum mapping quality score */ + @Argument(fullName="required_maximum_alt_allele_mapping_quality_score", shortName="", doc="required minimum value for tumor alt allele maximum mapping quality score", required=false, exclusiveOf="", validation="") + var required_maximum_alt_allele_mapping_quality_score: Option[Int] = None + + /** threshold for maximum alternate allele counts in normal */ + @Argument(fullName="max_alt_alleles_in_normal_count", shortName="", doc="threshold for maximum alternate allele counts in normal", required=false, exclusiveOf="", validation="") + var max_alt_alleles_in_normal_count: Option[Int] = None + + /** threshold for maximum alternate allele quality score sum in normal */ + @Argument(fullName="max_alt_alleles_in_normal_qscore_sum", shortName="", doc="threshold for maximum alternate allele quality score sum in normal", required=false, exclusiveOf="", validation="") + var max_alt_alleles_in_normal_qscore_sum: Option[Int] = None + + /** threshold for maximum alternate allele fraction in normal */ + @Argument(fullName="max_alt_allele_in_normal_fraction", shortName="", doc="threshold for maximum alternate allele fraction in normal", required=false, exclusiveOf="", validation="") + var max_alt_allele_in_normal_fraction: Option[Double] = None + + /** Format string for max_alt_allele_in_normal_fraction */ + @Argument(fullName="max_alt_allele_in_normal_fractionFormat", shortName="", doc="Format string for max_alt_allele_in_normal_fraction", required=false, exclusiveOf="", validation="") + var max_alt_allele_in_normal_fractionFormat: String = "%s" + + /** Phred scale quality score constant to use in power calculations */ + @Argument(fullName="power_constant_qscore", shortName="", doc="Phred scale quality score constant to use in power calculations", required=false, exclusiveOf="", validation="") + var power_constant_qscore: Option[Int] = None + + /** Absolute Copy Number Data, as defined by Absolute, to use in power calculations */ + @Argument(fullName="absolute_copy_number_data", shortName="", doc="Absolute Copy Number Data, as defined by Absolute, to use in power calculations", required=false, exclusiveOf="", validation="") + var absolute_copy_number_data: File = _ + + /** Allelic fraction constant to use in power calculations */ + @Argument(fullName="power_constant_af", shortName="", doc="Allelic fraction constant to use in power calculations", required=false, exclusiveOf="", validation="") + var power_constant_af: Option[Double] = None + + /** Format string for power_constant_af */ + @Argument(fullName="power_constant_afFormat", shortName="", doc="Format string for power_constant_af", required=false, exclusiveOf="", validation="") + var power_constant_afFormat: String = "%s" + + /** Call-stats output */ + @Output(fullName="out", shortName="o", doc="Call-stats output", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var out: File = _ + + /** + * Short name of out + * @return Short name of out + */ + def o = this.out + + /** + * Short name of out + * @param value Short name of out + */ + def o_=(value: File) { this.out = value } + + /** VCF file of DBSNP information */ + @Input(fullName="dbsnp", shortName="dbsnp", doc="VCF file of DBSNP information", required=false, exclusiveOf="", validation="") + var dbsnp: Seq[File] = Nil + + /** Dependencies on any indexes of dbsnp */ + @Input(fullName="dbsnpIndexes", shortName="", doc="Dependencies on any indexes of dbsnp", required=false, exclusiveOf="", validation="") + private var dbsnpIndexes: Seq[File] = Nil + + /** VCF file of COSMIC sites */ + @Input(fullName="cosmic", shortName="cosmic", doc="VCF file of COSMIC sites", required=false, exclusiveOf="", validation="") + var cosmic: Seq[File] = Nil + + /** Dependencies on any indexes of cosmic */ + @Input(fullName="cosmicIndexes", shortName="", doc="Dependencies on any indexes of cosmic", required=false, exclusiveOf="", validation="") + private var cosmicIndexes: Seq[File] = Nil + + /** VCF file of sites observed in normal */ + @Input(fullName="normal_panel", shortName="normal_panel", doc="VCF file of sites observed in normal", required=false, exclusiveOf="", validation="") + var normal_panel: Seq[File] = Nil + + /** Dependencies on any indexes of normal_panel */ + @Input(fullName="normal_panelIndexes", shortName="", doc="Dependencies on any indexes of normal_panel", required=false, exclusiveOf="", validation="") + private var normal_panelIndexes: Seq[File] = Nil + + /** write out coverage in WIGGLE format to this file */ + @Output(fullName="coverage_file", shortName="cov", doc="write out coverage in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var coverage_file: File = _ + + /** + * Short name of coverage_file + * @return Short name of coverage_file + */ + def cov = this.coverage_file + + /** + * Short name of coverage_file + * @param value Short name of coverage_file + */ + def cov_=(value: File) { this.coverage_file = value } + + /** write out 20x of Q20 coverage in WIGGLE format to this file */ + @Output(fullName="coverage_20_q20_file", shortName="cov_q20", doc="write out 20x of Q20 coverage in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var coverage_20_q20_file: File = _ + + /** + * Short name of coverage_20_q20_file + * @return Short name of coverage_20_q20_file + */ + def cov_q20 = this.coverage_20_q20_file + + /** + * Short name of coverage_20_q20_file + * @param value Short name of coverage_20_q20_file + */ + def cov_q20_=(value: File) { this.coverage_20_q20_file = value } + + /** write out power in WIGGLE format to this file */ + @Output(fullName="power_file", shortName="pow", doc="write out power in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var power_file: File = _ + + /** + * Short name of power_file + * @return Short name of power_file + */ + def pow = this.power_file + + /** + * Short name of power_file + * @param value Short name of power_file + */ + def pow_=(value: File) { this.power_file = value } + + /** write out tumor read depth in WIGGLE format to this file */ + @Output(fullName="tumor_depth_file", shortName="tdf", doc="write out tumor read depth in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var tumor_depth_file: File = _ + + /** + * Short name of tumor_depth_file + * @return Short name of tumor_depth_file + */ + def tdf = this.tumor_depth_file + + /** + * Short name of tumor_depth_file + * @param value Short name of tumor_depth_file + */ + def tdf_=(value: File) { this.tumor_depth_file = value } + + /** write out normal read depth in WIGGLE format to this file */ + @Output(fullName="normal_depth_file", shortName="ndf", doc="write out normal read depth in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var normal_depth_file: File = _ + + /** + * Short name of normal_depth_file + * @return Short name of normal_depth_file + */ + def ndf = this.normal_depth_file + + /** + * Short name of normal_depth_file + * @param value Short name of normal_depth_file + */ + def ndf_=(value: File) { this.normal_depth_file = value } + + /** if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up. */ + @Argument(fullName="filter_mismatching_base_and_quals", shortName="filterMBQ", doc="if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up.", required=false, exclusiveOf="", validation="") + var filter_mismatching_base_and_quals: Boolean = _ + + /** + * Short name of filter_mismatching_base_and_quals + * @return Short name of filter_mismatching_base_and_quals + */ + def filterMBQ = this.filter_mismatching_base_and_quals + + /** + * Short name of filter_mismatching_base_and_quals + * @param value Short name of filter_mismatching_base_and_quals + */ + def filterMBQ_=(value: Boolean) { this.filter_mismatching_base_and_quals = value } + + override def freezeFieldValues() { + super.freezeFieldValues() + dbsnpIndexes ++= dbsnp.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + cosmicIndexes ++= cosmic.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + normal_panelIndexes ++= normal_panel.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + } + + override def commandLine = super.commandLine + conditional(noop, "--noop", escape=true, format="%s") + conditional(enable_extended_output, "--enable_extended_output", escape=true, format="%s") + conditional(artifact_detection_mode, "--artifact_detection_mode", escape=true, format="%s") + optional("--tumor_sample_name", tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--bam_tumor_sample_name", bam_tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--normal_sample_name", normal_sample_name, spaceSeparated=true, escape=true, format="%s") + conditional(force_output, "--force_output", escape=true, format="%s") + conditional(force_alleles, "--force_alleles", escape=true, format="%s") + optional("--initial_tumor_lod", initial_tumor_lod, spaceSeparated=true, escape=true, format=initial_tumor_lodFormat) + optional("--tumor_lod", tumor_lod, spaceSeparated=true, escape=true, format=tumor_lodFormat) + optional("--fraction_contamination", fraction_contamination, spaceSeparated=true, escape=true, format=fraction_contaminationFormat) + optional("--minimum_mutation_cell_fraction", minimum_mutation_cell_fraction, spaceSeparated=true, escape=true, format=minimum_mutation_cell_fractionFormat) + optional("--normal_lod", normal_lod, spaceSeparated=true, escape=true, format=normal_lodFormat) + optional("--normal_artifact_lod", normal_artifact_lod, spaceSeparated=true, escape=true, format=normal_artifact_lodFormat) + optional("--strand_artifact_lod", strand_artifact_lod, spaceSeparated=true, escape=true, format=strand_artifact_lodFormat) + optional("--strand_artifact_power_threshold", strand_artifact_power_threshold, spaceSeparated=true, escape=true, format=strand_artifact_power_thresholdFormat) + optional("--dbsnp_normal_lod", dbsnp_normal_lod, spaceSeparated=true, escape=true, format=dbsnp_normal_lodFormat) + optional("--somatic_classification_normal_power_threshold", somatic_classification_normal_power_threshold, spaceSeparated=true, escape=true, format=somatic_classification_normal_power_thresholdFormat) + optional("--minimum_normal_allele_fraction", minimum_normal_allele_fraction, spaceSeparated=true, escape=true, format=minimum_normal_allele_fractionFormat) + optional("--tumor_f_pretest", tumor_f_pretest, spaceSeparated=true, escape=true, format=tumor_f_pretestFormat) + optional("--min_qscore", min_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--gap_events_threshold", gap_events_threshold, spaceSeparated=true, escape=true, format="%s") + optional("--heavily_clipped_read_fraction", heavily_clipped_read_fraction, spaceSeparated=true, escape=true, format=heavily_clipped_read_fractionFormat) + optional("--clipping_bias_pvalue_threshold", clipping_bias_pvalue_threshold, spaceSeparated=true, escape=true, format=clipping_bias_pvalue_thresholdFormat) + optional("--fraction_mapq0_threshold", fraction_mapq0_threshold, spaceSeparated=true, escape=true, format=fraction_mapq0_thresholdFormat) + optional("--pir_median_threshold", pir_median_threshold, spaceSeparated=true, escape=true, format=pir_median_thresholdFormat) + optional("--pir_mad_threshold", pir_mad_threshold, spaceSeparated=true, escape=true, format=pir_mad_thresholdFormat) + optional("--required_maximum_alt_allele_mapping_quality_score", required_maximum_alt_allele_mapping_quality_score, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_count", max_alt_alleles_in_normal_count, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_qscore_sum", max_alt_alleles_in_normal_qscore_sum, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_allele_in_normal_fraction", max_alt_allele_in_normal_fraction, spaceSeparated=true, escape=true, format=max_alt_allele_in_normal_fractionFormat) + optional("--power_constant_qscore", power_constant_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--absolute_copy_number_data", absolute_copy_number_data, spaceSeparated=true, escape=true, format="%s") + optional("--power_constant_af", power_constant_af, spaceSeparated=true, escape=true, format=power_constant_afFormat) + optional("-o", out, spaceSeparated=true, escape=true, format="%s") + repeat("-dbsnp", dbsnp, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-cosmic", cosmic, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-normal_panel", normal_panel, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + optional("-cov", coverage_file, spaceSeparated=true, escape=true, format="%s") + optional("-cov_q20", coverage_20_q20_file, spaceSeparated=true, escape=true, format="%s") + optional("-pow", power_file, spaceSeparated=true, escape=true, format="%s") + optional("-tdf", tumor_depth_file, spaceSeparated=true, escape=true, format="%s") + optional("-ndf", normal_depth_file, spaceSeparated=true, escape=true, format="%s") + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape=true, format="%s") +} From 3ffba77656606f4378daf0474b18794741b4423f Mon Sep 17 00:00:00 2001 From: Scott Frazer Date: Thu, 4 Oct 2012 11:37:54 -0400 Subject: [PATCH 17/90] Revert "initial cancer pipeline with mutations and partial indel support" This reverts commit 4a2e5b1fcc3ad53dbb26d43eed1220b0257e9901. --- .../queue/extensions/cancer/MuTect.scala | 378 ------------------ 1 file changed, 378 deletions(-) delete mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala deleted file mode 100644 index 623d397d4..000000000 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala +++ /dev/null @@ -1,378 +0,0 @@ -package org.broadinstitute.sting.queue.extensions.cancer - -import java.io.File -import org.broadinstitute.sting.commandline.Argument -import org.broadinstitute.sting.commandline.Gather -import org.broadinstitute.sting.commandline.Input -import org.broadinstitute.sting.commandline.Output -import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction -import org.broadinstitute.sting.queue.extensions.gatk.{LocusScatterFunction, TaggedFile} - -class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineGATK with ScatterGatherableFunction { - analysisName = "MuTect" - analysis_type = "MuTect" - scatterClass = classOf[LocusScatterFunction] - - /** used for debugging, basically exit as soon as we get the reads */ - @Argument(fullName="noop", shortName="", doc="used for debugging, basically exit as soon as we get the reads", required=false, exclusiveOf="", validation="") - var noop: Boolean = _ - - /** add many additional columns of statistics to the output file */ - @Argument(fullName="enable_extended_output", shortName="", doc="add many additional columns of statistics to the output file", required=false, exclusiveOf="", validation="") - var enable_extended_output: Boolean = _ - - /** used when running the caller on a normal (as if it were a tumor) to detect artifacts */ - @Argument(fullName="artifact_detection_mode", shortName="", doc="used when running the caller on a normal (as if it were a tumor) to detect artifacts", required=false, exclusiveOf="", validation="") - var artifact_detection_mode: Boolean = _ - - /** name to use for tumor in output files */ - @Argument(fullName="tumor_sample_name", shortName="", doc="name to use for tumor in output files", required=false, exclusiveOf="", validation="") - var tumor_sample_name: String = _ - - /** if the tumor bam contains multiple samples, only use read groups with SM equal to this value */ - @Argument(fullName="bam_tumor_sample_name", shortName="", doc="if the tumor bam contains multiple samples, only use read groups with SM equal to this value", required=false, exclusiveOf="", validation="") - var bam_tumor_sample_name: String = _ - - /** name to use for normal in output files */ - @Argument(fullName="normal_sample_name", shortName="", doc="name to use for normal in output files", required=false, exclusiveOf="", validation="") - var normal_sample_name: String = _ - - /** force output for each site */ - @Argument(fullName="force_output", shortName="", doc="force output for each site", required=false, exclusiveOf="", validation="") - var force_output: Boolean = _ - - /** force output for all alleles at each site */ - @Argument(fullName="force_alleles", shortName="", doc="force output for all alleles at each site", required=false, exclusiveOf="", validation="") - var force_alleles: Boolean = _ - - /** Initial LOD threshold for calling tumor variant */ - @Argument(fullName="initial_tumor_lod", shortName="", doc="Initial LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") - var initial_tumor_lod: Option[Float] = None - - /** Format string for initial_tumor_lod */ - @Argument(fullName="initial_tumor_lodFormat", shortName="", doc="Format string for initial_tumor_lod", required=false, exclusiveOf="", validation="") - var initial_tumor_lodFormat: String = "%s" - - /** LOD threshold for calling tumor variant */ - @Argument(fullName="tumor_lod", shortName="", doc="LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") - var tumor_lod: Option[Float] = None - - /** Format string for tumor_lod */ - @Argument(fullName="tumor_lodFormat", shortName="", doc="Format string for tumor_lod", required=false, exclusiveOf="", validation="") - var tumor_lodFormat: String = "%s" - - /** estimate of fraction (0-1) of physical contamination with other unrelated samples */ - @Argument(fullName="fraction_contamination", shortName="", doc="estimate of fraction (0-1) of physical contamination with other unrelated samples", required=false, exclusiveOf="", validation="") - var fraction_contamination: Option[Float] = None - - /** Format string for fraction_contamination */ - @Argument(fullName="fraction_contaminationFormat", shortName="", doc="Format string for fraction_contamination", required=false, exclusiveOf="", validation="") - var fraction_contaminationFormat: String = "%s" - - /** minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination */ - @Argument(fullName="minimum_mutation_cell_fraction", shortName="", doc="minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination", required=false, exclusiveOf="", validation="") - var minimum_mutation_cell_fraction: Option[Float] = None - - /** Format string for minimum_mutation_cell_fraction */ - @Argument(fullName="minimum_mutation_cell_fractionFormat", shortName="", doc="Format string for minimum_mutation_cell_fraction", required=false, exclusiveOf="", validation="") - var minimum_mutation_cell_fractionFormat: String = "%s" - - /** LOD threshold for calling normal non-germline */ - @Argument(fullName="normal_lod", shortName="", doc="LOD threshold for calling normal non-germline", required=false, exclusiveOf="", validation="") - var normal_lod: Option[Float] = None - - /** Format string for normal_lod */ - @Argument(fullName="normal_lodFormat", shortName="", doc="Format string for normal_lod", required=false, exclusiveOf="", validation="") - var normal_lodFormat: String = "%s" - - /** LOD threshold for calling normal non-variant */ - @Argument(fullName="normal_artifact_lod", shortName="", doc="LOD threshold for calling normal non-variant", required=false, exclusiveOf="", validation="") - var normal_artifact_lod: Option[Float] = None - - /** Format string for normal_artifact_lod */ - @Argument(fullName="normal_artifact_lodFormat", shortName="", doc="Format string for normal_artifact_lod", required=false, exclusiveOf="", validation="") - var normal_artifact_lodFormat: String = "%s" - - /** LOD threshold for calling strand bias */ - @Argument(fullName="strand_artifact_lod", shortName="", doc="LOD threshold for calling strand bias", required=false, exclusiveOf="", validation="") - var strand_artifact_lod: Option[Float] = None - - /** Format string for strand_artifact_lod */ - @Argument(fullName="strand_artifact_lodFormat", shortName="", doc="Format string for strand_artifact_lod", required=false, exclusiveOf="", validation="") - var strand_artifact_lodFormat: String = "%s" - - /** power threshold for calling strand bias */ - @Argument(fullName="strand_artifact_power_threshold", shortName="", doc="power threshold for calling strand bias", required=false, exclusiveOf="", validation="") - var strand_artifact_power_threshold: Option[Float] = None - - /** Format string for strand_artifact_power_threshold */ - @Argument(fullName="strand_artifact_power_thresholdFormat", shortName="", doc="Format string for strand_artifact_power_threshold", required=false, exclusiveOf="", validation="") - var strand_artifact_power_thresholdFormat: String = "%s" - - /** LOD threshold for calling normal non-variant at dbsnp sites */ - @Argument(fullName="dbsnp_normal_lod", shortName="", doc="LOD threshold for calling normal non-variant at dbsnp sites", required=false, exclusiveOf="", validation="") - var dbsnp_normal_lod: Option[Float] = None - - /** Format string for dbsnp_normal_lod */ - @Argument(fullName="dbsnp_normal_lodFormat", shortName="", doc="Format string for dbsnp_normal_lod", required=false, exclusiveOf="", validation="") - var dbsnp_normal_lodFormat: String = "%s" - - /** Power threshold for normal to determine germline vs variant */ - @Argument(fullName="somatic_classification_normal_power_threshold", shortName="", doc="Power threshold for normal to determine germline vs variant", required=false, exclusiveOf="", validation="") - var somatic_classification_normal_power_threshold: Option[Float] = None - - /** Format string for somatic_classification_normal_power_threshold */ - @Argument(fullName="somatic_classification_normal_power_thresholdFormat", shortName="", doc="Format string for somatic_classification_normal_power_threshold", required=false, exclusiveOf="", validation="") - var somatic_classification_normal_power_thresholdFormat: String = "%s" - - /** minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor */ - @Argument(fullName="minimum_normal_allele_fraction", shortName="", doc="minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor", required=false, exclusiveOf="", validation="") - var minimum_normal_allele_fraction: Option[Float] = None - - /** Format string for minimum_normal_allele_fraction */ - @Argument(fullName="minimum_normal_allele_fractionFormat", shortName="", doc="Format string for minimum_normal_allele_fraction", required=false, exclusiveOf="", validation="") - var minimum_normal_allele_fractionFormat: String = "%s" - - /** for computational efficiency, reject sites with allelic fraction below this threshold */ - @Argument(fullName="tumor_f_pretest", shortName="", doc="for computational efficiency, reject sites with allelic fraction below this threshold", required=false, exclusiveOf="", validation="") - var tumor_f_pretest: Option[Float] = None - - /** Format string for tumor_f_pretest */ - @Argument(fullName="tumor_f_pretestFormat", shortName="", doc="Format string for tumor_f_pretest", required=false, exclusiveOf="", validation="") - var tumor_f_pretestFormat: String = "%s" - - /** threshold for minimum base quality score */ - @Argument(fullName="min_qscore", shortName="", doc="threshold for minimum base quality score", required=false, exclusiveOf="", validation="") - var min_qscore: Option[Int] = None - - /** how many gapped events (ins/del) are allowed in proximity to this candidate */ - @Argument(fullName="gap_events_threshold", shortName="", doc="how many gapped events (ins/del) are allowed in proximity to this candidate", required=false, exclusiveOf="", validation="") - var gap_events_threshold: Option[Int] = None - - /** if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling */ - @Argument(fullName="heavily_clipped_read_fraction", shortName="", doc="if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling", required=false, exclusiveOf="", validation="") - var heavily_clipped_read_fraction: Option[Float] = None - - /** Format string for heavily_clipped_read_fraction */ - @Argument(fullName="heavily_clipped_read_fractionFormat", shortName="", doc="Format string for heavily_clipped_read_fraction", required=false, exclusiveOf="", validation="") - var heavily_clipped_read_fractionFormat: String = "%s" - - /** pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads */ - @Argument(fullName="clipping_bias_pvalue_threshold", shortName="", doc="pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads", required=false, exclusiveOf="", validation="") - var clipping_bias_pvalue_threshold: Option[Float] = None - - /** Format string for clipping_bias_pvalue_threshold */ - @Argument(fullName="clipping_bias_pvalue_thresholdFormat", shortName="", doc="Format string for clipping_bias_pvalue_threshold", required=false, exclusiveOf="", validation="") - var clipping_bias_pvalue_thresholdFormat: String = "%s" - - /** threshold for determining if there is relatedness between the alt and ref allele read piles */ - @Argument(fullName="fraction_mapq0_threshold", shortName="", doc="threshold for determining if there is relatedness between the alt and ref allele read piles", required=false, exclusiveOf="", validation="") - var fraction_mapq0_threshold: Option[Float] = None - - /** Format string for fraction_mapq0_threshold */ - @Argument(fullName="fraction_mapq0_thresholdFormat", shortName="", doc="Format string for fraction_mapq0_threshold", required=false, exclusiveOf="", validation="") - var fraction_mapq0_thresholdFormat: String = "%s" - - /** threshold for clustered read position artifact median */ - @Argument(fullName="pir_median_threshold", shortName="", doc="threshold for clustered read position artifact median", required=false, exclusiveOf="", validation="") - var pir_median_threshold: Option[Double] = None - - /** Format string for pir_median_threshold */ - @Argument(fullName="pir_median_thresholdFormat", shortName="", doc="Format string for pir_median_threshold", required=false, exclusiveOf="", validation="") - var pir_median_thresholdFormat: String = "%s" - - /** threshold for clustered read position artifact MAD */ - @Argument(fullName="pir_mad_threshold", shortName="", doc="threshold for clustered read position artifact MAD", required=false, exclusiveOf="", validation="") - var pir_mad_threshold: Option[Double] = None - - /** Format string for pir_mad_threshold */ - @Argument(fullName="pir_mad_thresholdFormat", shortName="", doc="Format string for pir_mad_threshold", required=false, exclusiveOf="", validation="") - var pir_mad_thresholdFormat: String = "%s" - - /** required minimum value for tumor alt allele maximum mapping quality score */ - @Argument(fullName="required_maximum_alt_allele_mapping_quality_score", shortName="", doc="required minimum value for tumor alt allele maximum mapping quality score", required=false, exclusiveOf="", validation="") - var required_maximum_alt_allele_mapping_quality_score: Option[Int] = None - - /** threshold for maximum alternate allele counts in normal */ - @Argument(fullName="max_alt_alleles_in_normal_count", shortName="", doc="threshold for maximum alternate allele counts in normal", required=false, exclusiveOf="", validation="") - var max_alt_alleles_in_normal_count: Option[Int] = None - - /** threshold for maximum alternate allele quality score sum in normal */ - @Argument(fullName="max_alt_alleles_in_normal_qscore_sum", shortName="", doc="threshold for maximum alternate allele quality score sum in normal", required=false, exclusiveOf="", validation="") - var max_alt_alleles_in_normal_qscore_sum: Option[Int] = None - - /** threshold for maximum alternate allele fraction in normal */ - @Argument(fullName="max_alt_allele_in_normal_fraction", shortName="", doc="threshold for maximum alternate allele fraction in normal", required=false, exclusiveOf="", validation="") - var max_alt_allele_in_normal_fraction: Option[Double] = None - - /** Format string for max_alt_allele_in_normal_fraction */ - @Argument(fullName="max_alt_allele_in_normal_fractionFormat", shortName="", doc="Format string for max_alt_allele_in_normal_fraction", required=false, exclusiveOf="", validation="") - var max_alt_allele_in_normal_fractionFormat: String = "%s" - - /** Phred scale quality score constant to use in power calculations */ - @Argument(fullName="power_constant_qscore", shortName="", doc="Phred scale quality score constant to use in power calculations", required=false, exclusiveOf="", validation="") - var power_constant_qscore: Option[Int] = None - - /** Absolute Copy Number Data, as defined by Absolute, to use in power calculations */ - @Argument(fullName="absolute_copy_number_data", shortName="", doc="Absolute Copy Number Data, as defined by Absolute, to use in power calculations", required=false, exclusiveOf="", validation="") - var absolute_copy_number_data: File = _ - - /** Allelic fraction constant to use in power calculations */ - @Argument(fullName="power_constant_af", shortName="", doc="Allelic fraction constant to use in power calculations", required=false, exclusiveOf="", validation="") - var power_constant_af: Option[Double] = None - - /** Format string for power_constant_af */ - @Argument(fullName="power_constant_afFormat", shortName="", doc="Format string for power_constant_af", required=false, exclusiveOf="", validation="") - var power_constant_afFormat: String = "%s" - - /** Call-stats output */ - @Output(fullName="out", shortName="o", doc="Call-stats output", required=false, exclusiveOf="", validation="") - @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) - var out: File = _ - - /** - * Short name of out - * @return Short name of out - */ - def o = this.out - - /** - * Short name of out - * @param value Short name of out - */ - def o_=(value: File) { this.out = value } - - /** VCF file of DBSNP information */ - @Input(fullName="dbsnp", shortName="dbsnp", doc="VCF file of DBSNP information", required=false, exclusiveOf="", validation="") - var dbsnp: Seq[File] = Nil - - /** Dependencies on any indexes of dbsnp */ - @Input(fullName="dbsnpIndexes", shortName="", doc="Dependencies on any indexes of dbsnp", required=false, exclusiveOf="", validation="") - private var dbsnpIndexes: Seq[File] = Nil - - /** VCF file of COSMIC sites */ - @Input(fullName="cosmic", shortName="cosmic", doc="VCF file of COSMIC sites", required=false, exclusiveOf="", validation="") - var cosmic: Seq[File] = Nil - - /** Dependencies on any indexes of cosmic */ - @Input(fullName="cosmicIndexes", shortName="", doc="Dependencies on any indexes of cosmic", required=false, exclusiveOf="", validation="") - private var cosmicIndexes: Seq[File] = Nil - - /** VCF file of sites observed in normal */ - @Input(fullName="normal_panel", shortName="normal_panel", doc="VCF file of sites observed in normal", required=false, exclusiveOf="", validation="") - var normal_panel: Seq[File] = Nil - - /** Dependencies on any indexes of normal_panel */ - @Input(fullName="normal_panelIndexes", shortName="", doc="Dependencies on any indexes of normal_panel", required=false, exclusiveOf="", validation="") - private var normal_panelIndexes: Seq[File] = Nil - - /** write out coverage in WIGGLE format to this file */ - @Output(fullName="coverage_file", shortName="cov", doc="write out coverage in WIGGLE format to this file", required=false, exclusiveOf="", validation="") - @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) - var coverage_file: File = _ - - /** - * Short name of coverage_file - * @return Short name of coverage_file - */ - def cov = this.coverage_file - - /** - * Short name of coverage_file - * @param value Short name of coverage_file - */ - def cov_=(value: File) { this.coverage_file = value } - - /** write out 20x of Q20 coverage in WIGGLE format to this file */ - @Output(fullName="coverage_20_q20_file", shortName="cov_q20", doc="write out 20x of Q20 coverage in WIGGLE format to this file", required=false, exclusiveOf="", validation="") - @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) - var coverage_20_q20_file: File = _ - - /** - * Short name of coverage_20_q20_file - * @return Short name of coverage_20_q20_file - */ - def cov_q20 = this.coverage_20_q20_file - - /** - * Short name of coverage_20_q20_file - * @param value Short name of coverage_20_q20_file - */ - def cov_q20_=(value: File) { this.coverage_20_q20_file = value } - - /** write out power in WIGGLE format to this file */ - @Output(fullName="power_file", shortName="pow", doc="write out power in WIGGLE format to this file", required=false, exclusiveOf="", validation="") - @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) - var power_file: File = _ - - /** - * Short name of power_file - * @return Short name of power_file - */ - def pow = this.power_file - - /** - * Short name of power_file - * @param value Short name of power_file - */ - def pow_=(value: File) { this.power_file = value } - - /** write out tumor read depth in WIGGLE format to this file */ - @Output(fullName="tumor_depth_file", shortName="tdf", doc="write out tumor read depth in WIGGLE format to this file", required=false, exclusiveOf="", validation="") - @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) - var tumor_depth_file: File = _ - - /** - * Short name of tumor_depth_file - * @return Short name of tumor_depth_file - */ - def tdf = this.tumor_depth_file - - /** - * Short name of tumor_depth_file - * @param value Short name of tumor_depth_file - */ - def tdf_=(value: File) { this.tumor_depth_file = value } - - /** write out normal read depth in WIGGLE format to this file */ - @Output(fullName="normal_depth_file", shortName="ndf", doc="write out normal read depth in WIGGLE format to this file", required=false, exclusiveOf="", validation="") - @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) - var normal_depth_file: File = _ - - /** - * Short name of normal_depth_file - * @return Short name of normal_depth_file - */ - def ndf = this.normal_depth_file - - /** - * Short name of normal_depth_file - * @param value Short name of normal_depth_file - */ - def ndf_=(value: File) { this.normal_depth_file = value } - - /** if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up. */ - @Argument(fullName="filter_mismatching_base_and_quals", shortName="filterMBQ", doc="if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up.", required=false, exclusiveOf="", validation="") - var filter_mismatching_base_and_quals: Boolean = _ - - /** - * Short name of filter_mismatching_base_and_quals - * @return Short name of filter_mismatching_base_and_quals - */ - def filterMBQ = this.filter_mismatching_base_and_quals - - /** - * Short name of filter_mismatching_base_and_quals - * @param value Short name of filter_mismatching_base_and_quals - */ - def filterMBQ_=(value: Boolean) { this.filter_mismatching_base_and_quals = value } - - override def freezeFieldValues() { - super.freezeFieldValues() - dbsnpIndexes ++= dbsnp.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) - cosmicIndexes ++= cosmic.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) - normal_panelIndexes ++= normal_panel.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) - } - - override def commandLine = super.commandLine + conditional(noop, "--noop", escape=true, format="%s") + conditional(enable_extended_output, "--enable_extended_output", escape=true, format="%s") + conditional(artifact_detection_mode, "--artifact_detection_mode", escape=true, format="%s") + optional("--tumor_sample_name", tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--bam_tumor_sample_name", bam_tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--normal_sample_name", normal_sample_name, spaceSeparated=true, escape=true, format="%s") + conditional(force_output, "--force_output", escape=true, format="%s") + conditional(force_alleles, "--force_alleles", escape=true, format="%s") + optional("--initial_tumor_lod", initial_tumor_lod, spaceSeparated=true, escape=true, format=initial_tumor_lodFormat) + optional("--tumor_lod", tumor_lod, spaceSeparated=true, escape=true, format=tumor_lodFormat) + optional("--fraction_contamination", fraction_contamination, spaceSeparated=true, escape=true, format=fraction_contaminationFormat) + optional("--minimum_mutation_cell_fraction", minimum_mutation_cell_fraction, spaceSeparated=true, escape=true, format=minimum_mutation_cell_fractionFormat) + optional("--normal_lod", normal_lod, spaceSeparated=true, escape=true, format=normal_lodFormat) + optional("--normal_artifact_lod", normal_artifact_lod, spaceSeparated=true, escape=true, format=normal_artifact_lodFormat) + optional("--strand_artifact_lod", strand_artifact_lod, spaceSeparated=true, escape=true, format=strand_artifact_lodFormat) + optional("--strand_artifact_power_threshold", strand_artifact_power_threshold, spaceSeparated=true, escape=true, format=strand_artifact_power_thresholdFormat) + optional("--dbsnp_normal_lod", dbsnp_normal_lod, spaceSeparated=true, escape=true, format=dbsnp_normal_lodFormat) + optional("--somatic_classification_normal_power_threshold", somatic_classification_normal_power_threshold, spaceSeparated=true, escape=true, format=somatic_classification_normal_power_thresholdFormat) + optional("--minimum_normal_allele_fraction", minimum_normal_allele_fraction, spaceSeparated=true, escape=true, format=minimum_normal_allele_fractionFormat) + optional("--tumor_f_pretest", tumor_f_pretest, spaceSeparated=true, escape=true, format=tumor_f_pretestFormat) + optional("--min_qscore", min_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--gap_events_threshold", gap_events_threshold, spaceSeparated=true, escape=true, format="%s") + optional("--heavily_clipped_read_fraction", heavily_clipped_read_fraction, spaceSeparated=true, escape=true, format=heavily_clipped_read_fractionFormat) + optional("--clipping_bias_pvalue_threshold", clipping_bias_pvalue_threshold, spaceSeparated=true, escape=true, format=clipping_bias_pvalue_thresholdFormat) + optional("--fraction_mapq0_threshold", fraction_mapq0_threshold, spaceSeparated=true, escape=true, format=fraction_mapq0_thresholdFormat) + optional("--pir_median_threshold", pir_median_threshold, spaceSeparated=true, escape=true, format=pir_median_thresholdFormat) + optional("--pir_mad_threshold", pir_mad_threshold, spaceSeparated=true, escape=true, format=pir_mad_thresholdFormat) + optional("--required_maximum_alt_allele_mapping_quality_score", required_maximum_alt_allele_mapping_quality_score, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_count", max_alt_alleles_in_normal_count, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_qscore_sum", max_alt_alleles_in_normal_qscore_sum, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_allele_in_normal_fraction", max_alt_allele_in_normal_fraction, spaceSeparated=true, escape=true, format=max_alt_allele_in_normal_fractionFormat) + optional("--power_constant_qscore", power_constant_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--absolute_copy_number_data", absolute_copy_number_data, spaceSeparated=true, escape=true, format="%s") + optional("--power_constant_af", power_constant_af, spaceSeparated=true, escape=true, format=power_constant_afFormat) + optional("-o", out, spaceSeparated=true, escape=true, format="%s") + repeat("-dbsnp", dbsnp, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-cosmic", cosmic, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-normal_panel", normal_panel, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + optional("-cov", coverage_file, spaceSeparated=true, escape=true, format="%s") + optional("-cov_q20", coverage_20_q20_file, spaceSeparated=true, escape=true, format="%s") + optional("-pow", power_file, spaceSeparated=true, escape=true, format="%s") + optional("-tdf", tumor_depth_file, spaceSeparated=true, escape=true, format="%s") + optional("-ndf", normal_depth_file, spaceSeparated=true, escape=true, format="%s") + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape=true, format="%s") -} From f9095c7ab74d59b35b85750886c99711b44f143c Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 24 Sep 2012 17:01:17 -0400 Subject: [PATCH 19/90] Generic input file name recognition (still need to implement support to FastQ, but it now can at least accept it) --- .../qscripts/DataProcessingPipeline.scala | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index 56f6460fb..c21db30ce 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -96,6 +96,7 @@ class DataProcessingPipeline extends QScript { var cleanModelEnum: ConsensusDeterminationModel = ConsensusDeterminationModel.USE_READS + val bwaParameters: String = " -q 5 -l 32 -k 2 -t 4 -o 1 " @@ -165,12 +166,15 @@ class DataProcessingPipeline extends QScript { var realignedBams: Seq[File] = Seq() var index = 1 for (bam <- bams) { - // first revert the BAM file to the original qualities - val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai") - val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai") - val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam") - val realignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.bam") - val rgRealignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.rg.bam") + val extension = bam.toString.substring(bam.toString.length - 4) + + + + val saiFile1 = swapExt(bam, extension, "." + index + ".1.sai") + val saiFile2 = swapExt(bam, extension, "." + index + ".2.sai") + val realignedSamFile = swapExt(bam, extension, "." + index + ".realigned.sam") + val realignedBamFile = swapExt(bam, extension, "." + index + ".realigned.bam") + val rgRealignedBamFile = swapExt(bam, extension, "." + index + ".realigned.rg.bam") if (useBWAse) { val revertedBAM = revertBAM(bam, true) @@ -444,7 +448,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file") var sai = outSai - def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b " + bam + " > " + sai this.analysisName = queueLogDir + outSai + ".bwa_aln_se" this.jobName = queueLogDir + outSai + ".bwa_aln_se" } @@ -452,7 +456,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file for 1st mating pair") var sai = outSai1 - def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b" + index + " " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b" + index + " " + bam + " > " + sai this.analysisName = queueLogDir + outSai1 + ".bwa_aln_pe1" this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1" } From 08b6d1559c2d072541dec3f960c8978e0b952fba Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 12:02:34 -0400 Subject: [PATCH 20/90] Reverting the DPP to the original version, going to create a new simplified version for CMI in private. --- .../qscripts/DataProcessingPipeline.scala | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index c21db30ce..56f6460fb 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -96,7 +96,6 @@ class DataProcessingPipeline extends QScript { var cleanModelEnum: ConsensusDeterminationModel = ConsensusDeterminationModel.USE_READS - val bwaParameters: String = " -q 5 -l 32 -k 2 -t 4 -o 1 " @@ -166,15 +165,12 @@ class DataProcessingPipeline extends QScript { var realignedBams: Seq[File] = Seq() var index = 1 for (bam <- bams) { - val extension = bam.toString.substring(bam.toString.length - 4) - - - - val saiFile1 = swapExt(bam, extension, "." + index + ".1.sai") - val saiFile2 = swapExt(bam, extension, "." + index + ".2.sai") - val realignedSamFile = swapExt(bam, extension, "." + index + ".realigned.sam") - val realignedBamFile = swapExt(bam, extension, "." + index + ".realigned.bam") - val rgRealignedBamFile = swapExt(bam, extension, "." + index + ".realigned.rg.bam") + // first revert the BAM file to the original qualities + val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai") + val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai") + val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam") + val realignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.bam") + val rgRealignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.rg.bam") if (useBWAse) { val revertedBAM = revertBAM(bam, true) @@ -448,7 +444,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file") var sai = outSai - def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b " + bam + " > " + sai this.analysisName = queueLogDir + outSai + ".bwa_aln_se" this.jobName = queueLogDir + outSai + ".bwa_aln_se" } @@ -456,7 +452,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file for 1st mating pair") var sai = outSai1 - def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b" + index + " " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b" + index + " " + bam + " > " + sai this.analysisName = queueLogDir + outSai1 + ".bwa_aln_pe1" this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1" } From 0c177092231c623dca8c0e84fb47a4af94092817 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 17:13:50 -0400 Subject: [PATCH 21/90] First implementation of a generic 'bundled' Data Processing Pipeline for germline and cancer. not ready for prime time yet! --- .../src/org/broadinstitute/sting/queue/util/QScriptUtils.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala index 1529d9951..f684e533f 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala @@ -57,7 +57,8 @@ object QScriptUtils { for (file <- fromFile(in).getLines()) if (!file.startsWith("#") && !file.isEmpty ) list :+= new File(file.trim()) - list.sortWith(_.compareTo(_) < 0) +// list.sortWith(_.compareTo(_) < 0) + list } /** From 2311606de4addf07c65540735c8b09b1385f30db Mon Sep 17 00:00:00 2001 From: Kristian Cibulskis Date: Wed, 3 Oct 2012 16:25:34 -0400 Subject: [PATCH 28/90] initial cancer pipeline with mutations and partial indel support --- .../queue/extensions/cancer/MuTect.scala | 378 ++++++++++++++++++ 1 file changed, 378 insertions(+) create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala new file mode 100644 index 000000000..623d397d4 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala @@ -0,0 +1,378 @@ +package org.broadinstitute.sting.queue.extensions.cancer + +import java.io.File +import org.broadinstitute.sting.commandline.Argument +import org.broadinstitute.sting.commandline.Gather +import org.broadinstitute.sting.commandline.Input +import org.broadinstitute.sting.commandline.Output +import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction +import org.broadinstitute.sting.queue.extensions.gatk.{LocusScatterFunction, TaggedFile} + +class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineGATK with ScatterGatherableFunction { + analysisName = "MuTect" + analysis_type = "MuTect" + scatterClass = classOf[LocusScatterFunction] + + /** used for debugging, basically exit as soon as we get the reads */ + @Argument(fullName="noop", shortName="", doc="used for debugging, basically exit as soon as we get the reads", required=false, exclusiveOf="", validation="") + var noop: Boolean = _ + + /** add many additional columns of statistics to the output file */ + @Argument(fullName="enable_extended_output", shortName="", doc="add many additional columns of statistics to the output file", required=false, exclusiveOf="", validation="") + var enable_extended_output: Boolean = _ + + /** used when running the caller on a normal (as if it were a tumor) to detect artifacts */ + @Argument(fullName="artifact_detection_mode", shortName="", doc="used when running the caller on a normal (as if it were a tumor) to detect artifacts", required=false, exclusiveOf="", validation="") + var artifact_detection_mode: Boolean = _ + + /** name to use for tumor in output files */ + @Argument(fullName="tumor_sample_name", shortName="", doc="name to use for tumor in output files", required=false, exclusiveOf="", validation="") + var tumor_sample_name: String = _ + + /** if the tumor bam contains multiple samples, only use read groups with SM equal to this value */ + @Argument(fullName="bam_tumor_sample_name", shortName="", doc="if the tumor bam contains multiple samples, only use read groups with SM equal to this value", required=false, exclusiveOf="", validation="") + var bam_tumor_sample_name: String = _ + + /** name to use for normal in output files */ + @Argument(fullName="normal_sample_name", shortName="", doc="name to use for normal in output files", required=false, exclusiveOf="", validation="") + var normal_sample_name: String = _ + + /** force output for each site */ + @Argument(fullName="force_output", shortName="", doc="force output for each site", required=false, exclusiveOf="", validation="") + var force_output: Boolean = _ + + /** force output for all alleles at each site */ + @Argument(fullName="force_alleles", shortName="", doc="force output for all alleles at each site", required=false, exclusiveOf="", validation="") + var force_alleles: Boolean = _ + + /** Initial LOD threshold for calling tumor variant */ + @Argument(fullName="initial_tumor_lod", shortName="", doc="Initial LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") + var initial_tumor_lod: Option[Float] = None + + /** Format string for initial_tumor_lod */ + @Argument(fullName="initial_tumor_lodFormat", shortName="", doc="Format string for initial_tumor_lod", required=false, exclusiveOf="", validation="") + var initial_tumor_lodFormat: String = "%s" + + /** LOD threshold for calling tumor variant */ + @Argument(fullName="tumor_lod", shortName="", doc="LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") + var tumor_lod: Option[Float] = None + + /** Format string for tumor_lod */ + @Argument(fullName="tumor_lodFormat", shortName="", doc="Format string for tumor_lod", required=false, exclusiveOf="", validation="") + var tumor_lodFormat: String = "%s" + + /** estimate of fraction (0-1) of physical contamination with other unrelated samples */ + @Argument(fullName="fraction_contamination", shortName="", doc="estimate of fraction (0-1) of physical contamination with other unrelated samples", required=false, exclusiveOf="", validation="") + var fraction_contamination: Option[Float] = None + + /** Format string for fraction_contamination */ + @Argument(fullName="fraction_contaminationFormat", shortName="", doc="Format string for fraction_contamination", required=false, exclusiveOf="", validation="") + var fraction_contaminationFormat: String = "%s" + + /** minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination */ + @Argument(fullName="minimum_mutation_cell_fraction", shortName="", doc="minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination", required=false, exclusiveOf="", validation="") + var minimum_mutation_cell_fraction: Option[Float] = None + + /** Format string for minimum_mutation_cell_fraction */ + @Argument(fullName="minimum_mutation_cell_fractionFormat", shortName="", doc="Format string for minimum_mutation_cell_fraction", required=false, exclusiveOf="", validation="") + var minimum_mutation_cell_fractionFormat: String = "%s" + + /** LOD threshold for calling normal non-germline */ + @Argument(fullName="normal_lod", shortName="", doc="LOD threshold for calling normal non-germline", required=false, exclusiveOf="", validation="") + var normal_lod: Option[Float] = None + + /** Format string for normal_lod */ + @Argument(fullName="normal_lodFormat", shortName="", doc="Format string for normal_lod", required=false, exclusiveOf="", validation="") + var normal_lodFormat: String = "%s" + + /** LOD threshold for calling normal non-variant */ + @Argument(fullName="normal_artifact_lod", shortName="", doc="LOD threshold for calling normal non-variant", required=false, exclusiveOf="", validation="") + var normal_artifact_lod: Option[Float] = None + + /** Format string for normal_artifact_lod */ + @Argument(fullName="normal_artifact_lodFormat", shortName="", doc="Format string for normal_artifact_lod", required=false, exclusiveOf="", validation="") + var normal_artifact_lodFormat: String = "%s" + + /** LOD threshold for calling strand bias */ + @Argument(fullName="strand_artifact_lod", shortName="", doc="LOD threshold for calling strand bias", required=false, exclusiveOf="", validation="") + var strand_artifact_lod: Option[Float] = None + + /** Format string for strand_artifact_lod */ + @Argument(fullName="strand_artifact_lodFormat", shortName="", doc="Format string for strand_artifact_lod", required=false, exclusiveOf="", validation="") + var strand_artifact_lodFormat: String = "%s" + + /** power threshold for calling strand bias */ + @Argument(fullName="strand_artifact_power_threshold", shortName="", doc="power threshold for calling strand bias", required=false, exclusiveOf="", validation="") + var strand_artifact_power_threshold: Option[Float] = None + + /** Format string for strand_artifact_power_threshold */ + @Argument(fullName="strand_artifact_power_thresholdFormat", shortName="", doc="Format string for strand_artifact_power_threshold", required=false, exclusiveOf="", validation="") + var strand_artifact_power_thresholdFormat: String = "%s" + + /** LOD threshold for calling normal non-variant at dbsnp sites */ + @Argument(fullName="dbsnp_normal_lod", shortName="", doc="LOD threshold for calling normal non-variant at dbsnp sites", required=false, exclusiveOf="", validation="") + var dbsnp_normal_lod: Option[Float] = None + + /** Format string for dbsnp_normal_lod */ + @Argument(fullName="dbsnp_normal_lodFormat", shortName="", doc="Format string for dbsnp_normal_lod", required=false, exclusiveOf="", validation="") + var dbsnp_normal_lodFormat: String = "%s" + + /** Power threshold for normal to determine germline vs variant */ + @Argument(fullName="somatic_classification_normal_power_threshold", shortName="", doc="Power threshold for normal to determine germline vs variant", required=false, exclusiveOf="", validation="") + var somatic_classification_normal_power_threshold: Option[Float] = None + + /** Format string for somatic_classification_normal_power_threshold */ + @Argument(fullName="somatic_classification_normal_power_thresholdFormat", shortName="", doc="Format string for somatic_classification_normal_power_threshold", required=false, exclusiveOf="", validation="") + var somatic_classification_normal_power_thresholdFormat: String = "%s" + + /** minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor */ + @Argument(fullName="minimum_normal_allele_fraction", shortName="", doc="minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor", required=false, exclusiveOf="", validation="") + var minimum_normal_allele_fraction: Option[Float] = None + + /** Format string for minimum_normal_allele_fraction */ + @Argument(fullName="minimum_normal_allele_fractionFormat", shortName="", doc="Format string for minimum_normal_allele_fraction", required=false, exclusiveOf="", validation="") + var minimum_normal_allele_fractionFormat: String = "%s" + + /** for computational efficiency, reject sites with allelic fraction below this threshold */ + @Argument(fullName="tumor_f_pretest", shortName="", doc="for computational efficiency, reject sites with allelic fraction below this threshold", required=false, exclusiveOf="", validation="") + var tumor_f_pretest: Option[Float] = None + + /** Format string for tumor_f_pretest */ + @Argument(fullName="tumor_f_pretestFormat", shortName="", doc="Format string for tumor_f_pretest", required=false, exclusiveOf="", validation="") + var tumor_f_pretestFormat: String = "%s" + + /** threshold for minimum base quality score */ + @Argument(fullName="min_qscore", shortName="", doc="threshold for minimum base quality score", required=false, exclusiveOf="", validation="") + var min_qscore: Option[Int] = None + + /** how many gapped events (ins/del) are allowed in proximity to this candidate */ + @Argument(fullName="gap_events_threshold", shortName="", doc="how many gapped events (ins/del) are allowed in proximity to this candidate", required=false, exclusiveOf="", validation="") + var gap_events_threshold: Option[Int] = None + + /** if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling */ + @Argument(fullName="heavily_clipped_read_fraction", shortName="", doc="if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling", required=false, exclusiveOf="", validation="") + var heavily_clipped_read_fraction: Option[Float] = None + + /** Format string for heavily_clipped_read_fraction */ + @Argument(fullName="heavily_clipped_read_fractionFormat", shortName="", doc="Format string for heavily_clipped_read_fraction", required=false, exclusiveOf="", validation="") + var heavily_clipped_read_fractionFormat: String = "%s" + + /** pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads */ + @Argument(fullName="clipping_bias_pvalue_threshold", shortName="", doc="pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads", required=false, exclusiveOf="", validation="") + var clipping_bias_pvalue_threshold: Option[Float] = None + + /** Format string for clipping_bias_pvalue_threshold */ + @Argument(fullName="clipping_bias_pvalue_thresholdFormat", shortName="", doc="Format string for clipping_bias_pvalue_threshold", required=false, exclusiveOf="", validation="") + var clipping_bias_pvalue_thresholdFormat: String = "%s" + + /** threshold for determining if there is relatedness between the alt and ref allele read piles */ + @Argument(fullName="fraction_mapq0_threshold", shortName="", doc="threshold for determining if there is relatedness between the alt and ref allele read piles", required=false, exclusiveOf="", validation="") + var fraction_mapq0_threshold: Option[Float] = None + + /** Format string for fraction_mapq0_threshold */ + @Argument(fullName="fraction_mapq0_thresholdFormat", shortName="", doc="Format string for fraction_mapq0_threshold", required=false, exclusiveOf="", validation="") + var fraction_mapq0_thresholdFormat: String = "%s" + + /** threshold for clustered read position artifact median */ + @Argument(fullName="pir_median_threshold", shortName="", doc="threshold for clustered read position artifact median", required=false, exclusiveOf="", validation="") + var pir_median_threshold: Option[Double] = None + + /** Format string for pir_median_threshold */ + @Argument(fullName="pir_median_thresholdFormat", shortName="", doc="Format string for pir_median_threshold", required=false, exclusiveOf="", validation="") + var pir_median_thresholdFormat: String = "%s" + + /** threshold for clustered read position artifact MAD */ + @Argument(fullName="pir_mad_threshold", shortName="", doc="threshold for clustered read position artifact MAD", required=false, exclusiveOf="", validation="") + var pir_mad_threshold: Option[Double] = None + + /** Format string for pir_mad_threshold */ + @Argument(fullName="pir_mad_thresholdFormat", shortName="", doc="Format string for pir_mad_threshold", required=false, exclusiveOf="", validation="") + var pir_mad_thresholdFormat: String = "%s" + + /** required minimum value for tumor alt allele maximum mapping quality score */ + @Argument(fullName="required_maximum_alt_allele_mapping_quality_score", shortName="", doc="required minimum value for tumor alt allele maximum mapping quality score", required=false, exclusiveOf="", validation="") + var required_maximum_alt_allele_mapping_quality_score: Option[Int] = None + + /** threshold for maximum alternate allele counts in normal */ + @Argument(fullName="max_alt_alleles_in_normal_count", shortName="", doc="threshold for maximum alternate allele counts in normal", required=false, exclusiveOf="", validation="") + var max_alt_alleles_in_normal_count: Option[Int] = None + + /** threshold for maximum alternate allele quality score sum in normal */ + @Argument(fullName="max_alt_alleles_in_normal_qscore_sum", shortName="", doc="threshold for maximum alternate allele quality score sum in normal", required=false, exclusiveOf="", validation="") + var max_alt_alleles_in_normal_qscore_sum: Option[Int] = None + + /** threshold for maximum alternate allele fraction in normal */ + @Argument(fullName="max_alt_allele_in_normal_fraction", shortName="", doc="threshold for maximum alternate allele fraction in normal", required=false, exclusiveOf="", validation="") + var max_alt_allele_in_normal_fraction: Option[Double] = None + + /** Format string for max_alt_allele_in_normal_fraction */ + @Argument(fullName="max_alt_allele_in_normal_fractionFormat", shortName="", doc="Format string for max_alt_allele_in_normal_fraction", required=false, exclusiveOf="", validation="") + var max_alt_allele_in_normal_fractionFormat: String = "%s" + + /** Phred scale quality score constant to use in power calculations */ + @Argument(fullName="power_constant_qscore", shortName="", doc="Phred scale quality score constant to use in power calculations", required=false, exclusiveOf="", validation="") + var power_constant_qscore: Option[Int] = None + + /** Absolute Copy Number Data, as defined by Absolute, to use in power calculations */ + @Argument(fullName="absolute_copy_number_data", shortName="", doc="Absolute Copy Number Data, as defined by Absolute, to use in power calculations", required=false, exclusiveOf="", validation="") + var absolute_copy_number_data: File = _ + + /** Allelic fraction constant to use in power calculations */ + @Argument(fullName="power_constant_af", shortName="", doc="Allelic fraction constant to use in power calculations", required=false, exclusiveOf="", validation="") + var power_constant_af: Option[Double] = None + + /** Format string for power_constant_af */ + @Argument(fullName="power_constant_afFormat", shortName="", doc="Format string for power_constant_af", required=false, exclusiveOf="", validation="") + var power_constant_afFormat: String = "%s" + + /** Call-stats output */ + @Output(fullName="out", shortName="o", doc="Call-stats output", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var out: File = _ + + /** + * Short name of out + * @return Short name of out + */ + def o = this.out + + /** + * Short name of out + * @param value Short name of out + */ + def o_=(value: File) { this.out = value } + + /** VCF file of DBSNP information */ + @Input(fullName="dbsnp", shortName="dbsnp", doc="VCF file of DBSNP information", required=false, exclusiveOf="", validation="") + var dbsnp: Seq[File] = Nil + + /** Dependencies on any indexes of dbsnp */ + @Input(fullName="dbsnpIndexes", shortName="", doc="Dependencies on any indexes of dbsnp", required=false, exclusiveOf="", validation="") + private var dbsnpIndexes: Seq[File] = Nil + + /** VCF file of COSMIC sites */ + @Input(fullName="cosmic", shortName="cosmic", doc="VCF file of COSMIC sites", required=false, exclusiveOf="", validation="") + var cosmic: Seq[File] = Nil + + /** Dependencies on any indexes of cosmic */ + @Input(fullName="cosmicIndexes", shortName="", doc="Dependencies on any indexes of cosmic", required=false, exclusiveOf="", validation="") + private var cosmicIndexes: Seq[File] = Nil + + /** VCF file of sites observed in normal */ + @Input(fullName="normal_panel", shortName="normal_panel", doc="VCF file of sites observed in normal", required=false, exclusiveOf="", validation="") + var normal_panel: Seq[File] = Nil + + /** Dependencies on any indexes of normal_panel */ + @Input(fullName="normal_panelIndexes", shortName="", doc="Dependencies on any indexes of normal_panel", required=false, exclusiveOf="", validation="") + private var normal_panelIndexes: Seq[File] = Nil + + /** write out coverage in WIGGLE format to this file */ + @Output(fullName="coverage_file", shortName="cov", doc="write out coverage in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var coverage_file: File = _ + + /** + * Short name of coverage_file + * @return Short name of coverage_file + */ + def cov = this.coverage_file + + /** + * Short name of coverage_file + * @param value Short name of coverage_file + */ + def cov_=(value: File) { this.coverage_file = value } + + /** write out 20x of Q20 coverage in WIGGLE format to this file */ + @Output(fullName="coverage_20_q20_file", shortName="cov_q20", doc="write out 20x of Q20 coverage in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var coverage_20_q20_file: File = _ + + /** + * Short name of coverage_20_q20_file + * @return Short name of coverage_20_q20_file + */ + def cov_q20 = this.coverage_20_q20_file + + /** + * Short name of coverage_20_q20_file + * @param value Short name of coverage_20_q20_file + */ + def cov_q20_=(value: File) { this.coverage_20_q20_file = value } + + /** write out power in WIGGLE format to this file */ + @Output(fullName="power_file", shortName="pow", doc="write out power in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var power_file: File = _ + + /** + * Short name of power_file + * @return Short name of power_file + */ + def pow = this.power_file + + /** + * Short name of power_file + * @param value Short name of power_file + */ + def pow_=(value: File) { this.power_file = value } + + /** write out tumor read depth in WIGGLE format to this file */ + @Output(fullName="tumor_depth_file", shortName="tdf", doc="write out tumor read depth in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var tumor_depth_file: File = _ + + /** + * Short name of tumor_depth_file + * @return Short name of tumor_depth_file + */ + def tdf = this.tumor_depth_file + + /** + * Short name of tumor_depth_file + * @param value Short name of tumor_depth_file + */ + def tdf_=(value: File) { this.tumor_depth_file = value } + + /** write out normal read depth in WIGGLE format to this file */ + @Output(fullName="normal_depth_file", shortName="ndf", doc="write out normal read depth in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var normal_depth_file: File = _ + + /** + * Short name of normal_depth_file + * @return Short name of normal_depth_file + */ + def ndf = this.normal_depth_file + + /** + * Short name of normal_depth_file + * @param value Short name of normal_depth_file + */ + def ndf_=(value: File) { this.normal_depth_file = value } + + /** if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up. */ + @Argument(fullName="filter_mismatching_base_and_quals", shortName="filterMBQ", doc="if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up.", required=false, exclusiveOf="", validation="") + var filter_mismatching_base_and_quals: Boolean = _ + + /** + * Short name of filter_mismatching_base_and_quals + * @return Short name of filter_mismatching_base_and_quals + */ + def filterMBQ = this.filter_mismatching_base_and_quals + + /** + * Short name of filter_mismatching_base_and_quals + * @param value Short name of filter_mismatching_base_and_quals + */ + def filterMBQ_=(value: Boolean) { this.filter_mismatching_base_and_quals = value } + + override def freezeFieldValues() { + super.freezeFieldValues() + dbsnpIndexes ++= dbsnp.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + cosmicIndexes ++= cosmic.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + normal_panelIndexes ++= normal_panel.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + } + + override def commandLine = super.commandLine + conditional(noop, "--noop", escape=true, format="%s") + conditional(enable_extended_output, "--enable_extended_output", escape=true, format="%s") + conditional(artifact_detection_mode, "--artifact_detection_mode", escape=true, format="%s") + optional("--tumor_sample_name", tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--bam_tumor_sample_name", bam_tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--normal_sample_name", normal_sample_name, spaceSeparated=true, escape=true, format="%s") + conditional(force_output, "--force_output", escape=true, format="%s") + conditional(force_alleles, "--force_alleles", escape=true, format="%s") + optional("--initial_tumor_lod", initial_tumor_lod, spaceSeparated=true, escape=true, format=initial_tumor_lodFormat) + optional("--tumor_lod", tumor_lod, spaceSeparated=true, escape=true, format=tumor_lodFormat) + optional("--fraction_contamination", fraction_contamination, spaceSeparated=true, escape=true, format=fraction_contaminationFormat) + optional("--minimum_mutation_cell_fraction", minimum_mutation_cell_fraction, spaceSeparated=true, escape=true, format=minimum_mutation_cell_fractionFormat) + optional("--normal_lod", normal_lod, spaceSeparated=true, escape=true, format=normal_lodFormat) + optional("--normal_artifact_lod", normal_artifact_lod, spaceSeparated=true, escape=true, format=normal_artifact_lodFormat) + optional("--strand_artifact_lod", strand_artifact_lod, spaceSeparated=true, escape=true, format=strand_artifact_lodFormat) + optional("--strand_artifact_power_threshold", strand_artifact_power_threshold, spaceSeparated=true, escape=true, format=strand_artifact_power_thresholdFormat) + optional("--dbsnp_normal_lod", dbsnp_normal_lod, spaceSeparated=true, escape=true, format=dbsnp_normal_lodFormat) + optional("--somatic_classification_normal_power_threshold", somatic_classification_normal_power_threshold, spaceSeparated=true, escape=true, format=somatic_classification_normal_power_thresholdFormat) + optional("--minimum_normal_allele_fraction", minimum_normal_allele_fraction, spaceSeparated=true, escape=true, format=minimum_normal_allele_fractionFormat) + optional("--tumor_f_pretest", tumor_f_pretest, spaceSeparated=true, escape=true, format=tumor_f_pretestFormat) + optional("--min_qscore", min_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--gap_events_threshold", gap_events_threshold, spaceSeparated=true, escape=true, format="%s") + optional("--heavily_clipped_read_fraction", heavily_clipped_read_fraction, spaceSeparated=true, escape=true, format=heavily_clipped_read_fractionFormat) + optional("--clipping_bias_pvalue_threshold", clipping_bias_pvalue_threshold, spaceSeparated=true, escape=true, format=clipping_bias_pvalue_thresholdFormat) + optional("--fraction_mapq0_threshold", fraction_mapq0_threshold, spaceSeparated=true, escape=true, format=fraction_mapq0_thresholdFormat) + optional("--pir_median_threshold", pir_median_threshold, spaceSeparated=true, escape=true, format=pir_median_thresholdFormat) + optional("--pir_mad_threshold", pir_mad_threshold, spaceSeparated=true, escape=true, format=pir_mad_thresholdFormat) + optional("--required_maximum_alt_allele_mapping_quality_score", required_maximum_alt_allele_mapping_quality_score, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_count", max_alt_alleles_in_normal_count, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_qscore_sum", max_alt_alleles_in_normal_qscore_sum, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_allele_in_normal_fraction", max_alt_allele_in_normal_fraction, spaceSeparated=true, escape=true, format=max_alt_allele_in_normal_fractionFormat) + optional("--power_constant_qscore", power_constant_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--absolute_copy_number_data", absolute_copy_number_data, spaceSeparated=true, escape=true, format="%s") + optional("--power_constant_af", power_constant_af, spaceSeparated=true, escape=true, format=power_constant_afFormat) + optional("-o", out, spaceSeparated=true, escape=true, format="%s") + repeat("-dbsnp", dbsnp, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-cosmic", cosmic, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-normal_panel", normal_panel, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + optional("-cov", coverage_file, spaceSeparated=true, escape=true, format="%s") + optional("-cov_q20", coverage_20_q20_file, spaceSeparated=true, escape=true, format="%s") + optional("-pow", power_file, spaceSeparated=true, escape=true, format="%s") + optional("-tdf", tumor_depth_file, spaceSeparated=true, escape=true, format="%s") + optional("-ndf", normal_depth_file, spaceSeparated=true, escape=true, format="%s") + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape=true, format="%s") +} From 88297606f019da0e4b9725d5cad58abd1924a2d7 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 10 Oct 2012 13:20:30 -0400 Subject: [PATCH 30/90] Adding intellij example configuration files --- intellij_example.tar.bz2 | Bin 0 -> 7520 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 intellij_example.tar.bz2 diff --git a/intellij_example.tar.bz2 b/intellij_example.tar.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..bce16045cd1cc476305c5e59d07ff9b94b8e5d73 GIT binary patch literal 7520 zcmV-m9iQStT4*^jL0KkKS)7!cJOD4F|M~yi5CDHu|NsC0|M36+|L{No06+`?06+jh z1{h!`o8{-7h4qB5wR>^y>D#MWyS_S4UPrYW)`~XXKBY)=w?n(T?w=>u4%}F@T^prq zM&}*98+P}1CC>JGY3}jFJo}ytO>BKWz24nKq=%-|-tIdTv1Z%Ou|py0B#;mw z0$?VFLqIe|HdE0LQ`8zCr9V{lAE^M-CYlc@B!L1ZL{xsM>S#SrDYXw!Hq`?_27mwn z0006)At_HHdU~eyLFyU;NYu?W zO%G7?YBUW!LqkS@9-*KBcwhGq&L2>c7{2Bv%KPh~XqovPD`P$wqr0wA{OX>F7ZKmy zDsL?$v;dI_RDWe46~zQU)lvw>NPvKQ{r?Z1(}ww2_)M*fQwCl2HJP1e>Ri?dCZpMN zP=M?SA|wk1NPtvh=J3} zDw+~D0aFk#a8yBk1VvNe{^Q2@ATLqre5hlqO+ zJH6U_g@n*il^HZ-l$Xlw^mBWD_ji=uWfv%71y1==(-ozyjYp|LKHs?f9BLgry=7~; zQzc6vctrO5Ohh{&v$XFjif1`0L{s*Olu^?q8x}#rqzyaoVB#B4g(81Jx};qlYMjj? z2dYy`yjW&t6?wYx0N9Uh!RRNLa}rLdMi`DPNP-P&$TYbGk_IqjZvxK0qQ|cbe@$>KA+XQo15<95gH&cGF>(8JUi?q#;jjQhX7>?$D_)jV*fSGa99hISqAgCirHOBRlz z5-9I|sR$Ai`Yfa3X$-2y?KsEHOGX z@HXt%cKK^%iU{@ItVyi`Mvy~M#VuYFejK)2%tD1XPotWR1C6g7^>_3=M{GBTR=x6} zL$ie9VdKIYD#p_7X2Ij5c!ACsYkPJWj&|WWp}5G;)cO~hDhd@kB;Qp{=JQuL+IA<+ z9G6K-0B_4%_wIgYeR?@~V~!zKJsytphBh&$h7$B()y02uDWc?iHKJ3Oou2*+MfMx^=gIf{X3|lWppAz z-I7p3R)QX!Idb;|10w?J`_#t4+9oPM5Ep6hKa7!-%b*Z%Ne=e{WH2)WCKzJBSF^U& zK`^`eM3i5TFVExdQsTLk;G@qq9{>}@N65UeiIk4 z@~k{rAI=Uv%!sgaYY$=o+l0!JKSDr9R$?^eCXzi64~|&0Ce%+awpg_DqW1T;JnT{d zru{VON@j0;c#A;BkSsr#?|iam4$XGUx$RTb)H_pMY<00{Yt{q!{n-}(G#x&{RZq!9 z?#T+_N>%`kP|_a@fmo2-OJrXX!d!y~G&Dn3dYM^vWSZJ!#jw;(nuR5VhC?^k`y8?h4xtX)+j?PU}@iN>ljD+(Qmb z8dniBZ(_g=D1mLVH)}(M5H0vcNC?Cv1(;-)C)j(xXY2nS+gLe%zZ*W^p1!|_zv=O} z_Q!+DzkQ#@*8b@JY?61J{_8LQ7Wa7yo_=DdhrF->(?ZCJE#uL2ybpcSMKqaVBwSv) zNU6k^_VtahV8JC~!U6{d!bq&!4p|_$D-~)<6%s3$x=9Tg-hN~ZTTL>hPndt*gp7cg z+wR+roDg$1Hu?=(-&o%#Ayf15RN!Z%05j!NTZ&r35#?&$A)o_Kq_+aG2Tmv-^Ti}kzPUMz}gW$fs9 zJC4FgjTw|ACp)ypVq7XolvK9GOTU706q(I(#Eu(UtE5jUBYT&==_1{Y?P%VlGrzMb zBE|_o9do|DW~w@PU%$xNzAis=G|wxF)AvA^EC+4@aDj~7z-#+Yu_mAdbYOG_n;S_f z2MF%vrdGcJHml~P2xvLQOIQJlnTe)5Z8JtNk{R2)nVMB@y3m5+0?^4LZPW(|mtd*? zDv;N1W_Rb+ea4+?lrAx`155T=*|7;B>>wiy;Ml)7#VrgvNDTg;;hzJ_HNI$RTTqf4 zfyMeU1R+7uY;dsXgq*(jmUH&D>*C?4NtOzLb-!$s%bXb_Sue0mN5$wkpEM3kv8IvGA=_?Zzv1%_C1tsyl^1@RKH{a zciZw1@6q0f3Zxbb;ED{RY}JFbgv`jo;vnjTP++40u_0!9X$&Jll(K*-u?XPE#-34@ z(2*iq168t)2LIl%)odnH8EI_Yl*{iaXWjeIC?DX z@F63H#+Z$15ur99k-Q~?3g|8Zc=x8AR4D_lS&{5(Wb(9OWn~6P1uWA@lOmW&8qFIU z7SBN;v4-l*AW>A(2|!UvMJG5MwJuL6M+i`eC_Z*#ky3?khBR#txTdfRi~`O|X#<>q zay;fL4M@CAa10!#6DOq2oZBZ*`QRkS_ry;K+IvB5vHgUgDqb| z3@cbx6TJE0H(;83Aleb%_>debXaSWl^D1it&Z*F@y=N{NvpYj~%SKvwWbjz}(2QH5 zzP2fmjwex!Q0DDKn@@wk;d+8XInO!CB-!he7tkDYlzvB5io3y3~eaI5w=k}~_n&-pi{RD^=|p|INIK%M7^Ju>U^q=8=L%d;bA7r& zav%4cJ$b-?7%3m*P6<7LL;|e%;!{W=n0z(DI%^o;Bei5F?M_70?60j?x99A# z3qM)=cXkVG2kmpPJ8oP#mdXvhiT3f-MRyJ&15Gf(d7`7!iiTN|-|lq+clXLmg&CS~L$9jF&hP0Bm8xWJJK=$OMFB>@>4P6z#Xn3dcffp+LAC z!`V2YWzCFixsCZ_7L;F_QL9T?&p+$c)*hPKS8gSO~VTFj|ro3G{Iuof@swuwn#16JUnNC5#N!PbpVLE zvff%F#RxrAU($v^EeV!2DVBm~xc6xi-$OudTP+k6K2v02fE5E45$L!=gi;#?e7UiT zMhuXoWDk1Yu>gn#4)=DU3)lONdeFJ7$d8PZ2g9kss?ltvQp=qrImnG5r+V3zF4fd( z=B;z~@$<2{hrSQ|A=lv6Q712!p!qTbzdD`k(bs>{2Pp3?yqCWFJqT zzsdD8a3Kh21F_vQhkQ6nV4J6$yI@VnB-^q@1Poy1V4pzHT>!|B!-$_&1xU~WX+}BS| zI+H9JIpulj0|c`K4h+Jv+i8{#sDpzsY3kK&nf6^uxxRKVpRY}nq>h`5p#2eSW1JCB zoQDl9g0bddzNVPmI`lIIC+n3Z-%)6 z)m1D@3`N7B@Sgj)AR36~a4}*AFBJoK>y5WL$p%MGX_9PAb!p#rxvRNmYjZFf_p^MM zS*F2;S$4gmw~WV5MV$7|80zk2IN^|p!(1CUj&QiGuRFLn?P!*xF?*d9(+V$}IXT1} zTha_h(muFR1txBW5H`^S1RVSVru%s#2xv)Akeo8AEDyF+y-8{s&ujwd5cgA z{vZA1IN}ss`1mWy37bexU#GAM?R5hGixGW*YN1F6 zNe^y$@Ot{b0s=j%3iJAec(;rNi2(`#p%tYeftrrHgl%C`y`dwy83`F7-YoY_ox@e$ zL&B&_2$DC2^*`wAL)aSaJF>S~WkZ7Y-Du;z{ zz=U=xW(E3CCn_nOid4kc4!;tEos&Z9t35>kz>s9yLG>gOM({gKT^#n%!cAcT@WPry3G z2V$fF(Bh#$fe>JI^|(v(OqCu~Gq|s3b?7=j5C#Z=5wJo%cpU|9AqFF9LZ;v@MFUXd zs2N){bdrWWIl=TxEeH;2AWyYeEYN~qcFD}6*o@`j5Unt1JJgj_YN^CRhB!bw5Oy*P z1DcHm8GutysIu}&!*1M~s!zPA&Oi}v=#?bEF!B<|)OQu(7P)!*MrromGcLP%0kRC2-f z-Ma&_=fdV8BrOEsXnl*Tw^mqWLHgJM!Kwz2*{PSy3 zu1y0}WCf==J8l%kErz3IvwaFAt0TVM@-$^1MCp&5TKidOUd1p zYf^#|AzIhOeK`tn48Ym)u!~-}_Q+l&S8a}MP zqt<;`1h6{%dl7fl4{{^4=5Z9~GklcT&KW~EA3Cu3&;?mZn=%p-SGX&lf>n(5j&iOY zYQ5cz$?+OcwiV^|gbZ zDl{t&8X}C%;tMPtc~k`|q$FrGfm%YMX(X@VI=uFI0&+ldLE%k6`SmVPkC_HWF>Jjk zUDTu0qvTmpsIKA3Q+lPhpdN}HV1^2u*J2-*FWMiW;5$l`j~ao*gwCP$RM5i#_C_Ex zH)zO<;CO6Q=Wwgery}Pl$N)(r*fN-s03O~z4|h@!W6)yYC(`*Uf$0mj(1PL>cZ3#Z z4TbDihB0%-2YpbP;Si{_nnei&VL3_(rX}Qwue;<9@ZzbMbgFgWaWzMX7*HysutU{s z6$yK$fP%s@0>K!oAjF959~anz@iKxw9tVi2%a|O!4?k>B;`D(%h5u(vcUQi+mcKd#qyCaVfaiWCOtfxqGP5gZ9C@~hnd`7h<>b=e_+%r-y-BvC8wl_K{AeW^0CX@V5&D!>>QsSU^i-+=i>^k-=!j-OGaIbfVpkOo zov0DlGYvzkWdki>W|%{31zZ$_CjdJbXwoesiiTpR&LP}<$G(Ags#wwbgBY)fP=_5T zQ4Xh%0B4=sfb<{^vtZvf!~pRs?Q2_tCyTUQVcvR>^r#ARkO%Guv=4>qj_B@o4jqbM zt7aXvQ8X%2q25s!VhE5U6mV4RggF8x`vC+b>2V@PWF#OH>&BrAM934Jfz;}KW^E!s zf_sHg^L}8s>!NOo>yjx%-@oa32x!;2(Ek5!w#C`ezx9s#*X(t`x1;-X6+ z7Tr_H3_VZlT{4sbK3ug=pGr!XjFT71U-2y+_{;q+T4bJi(b= zap}Jvh}lP|*qy&ou8QcP>pMaQngHZp=&Vk$3eem_f*S=$R8;1V4~ZCnf}#?r8iXVQ z0j{X_oxDt(h>0TcAk-a!;E`5M2WU_pz^PFx0iuHO=~{qY8>t!!C^m$obS{f0N*n5P z90a#3kBG3kQg+Y~XP`YbFs-J>`*$M_gdyA-C_|>(hkfw|+l<+w zf}KDpwJ=nwn0Cr`@CL~cY#kjD!*FzDfO#Xz58x^D4{2FCkP+ji120 z`AK?g`I(q&&XIvsdP8c|-6RwyD9PaUmZHX{h)-f%GL}>`AdodPFC>U>q}W4M$7Yrv zXn_GT_PIBf+CkyE8OEjAE9XU38OMGBuvkEl2#MCGQP6b&cqsd};rC<;+96%VZpibB za5i6A1WKt|DOYLN0p46>D$r5;!~>XE;UL^8L$+U03KV+NtWE_Trs&B5hj{M;05y_z( zIB@{aYt}fs#2_Ui8xH+<1|!JJl0XMS2m}_23jO|IA2?LhL%8il_4O)!cmxPzpqfGY zc__a@$5HV6afV(GYNQppH5ZI_4QL$Cz*M(x41|T~>WW%5dc!&O=BaR(D8S87Uc(Wb z4GAcqP$MD;MhMW2k||&T_-I1Sc7quRcUP6)AC5{qas)meG;bs!BEU|AAWLmX=fy+6 z$p=H}qdcCy^{S?BA;kyA^aH=II~+xSl}Oz`ntcF$U;6-Xk#{x0N-aG@bNDK>cMb(tan literal 0 HcmV?d00001 From f085f5d46a79645606fbfc296cdc5aae73c67ae4 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 24 Sep 2012 16:15:57 -0400 Subject: [PATCH 31/90] Adding default intellij configuration files --- .idea/.name | 1 + .idea/ant.xml | 15 + .idea/codeStyleSettings.xml | 13 + .idea/compiler.xml | 21 + .idea/copyright/profiles_settings.xml | 5 + .idea/encodings.xml | 5 + .idea/highlighting.xml | 8 + .idea/inspectionProfiles/Project_Default.xml | 11 + .../inspectionProfiles/profiles_settings.xml | 7 + .idea/libraries/GATK_libraries.xml | 13 + .idea/misc.xml | 32 ++ .idea/modules.xml | 9 + .idea/scopes/scope_settings.xml | 5 + .idea/uiDesigner.xml | 125 ++++++ .idea/vcs.xml | 10 + .idea/workspace.xml | 386 ++++++++++++++++++ cmi-gatk.iml | 23 ++ 17 files changed, 689 insertions(+) create mode 100644 .idea/.name create mode 100644 .idea/ant.xml create mode 100644 .idea/codeStyleSettings.xml create mode 100644 .idea/compiler.xml create mode 100644 .idea/copyright/profiles_settings.xml create mode 100644 .idea/encodings.xml create mode 100644 .idea/highlighting.xml create mode 100644 .idea/inspectionProfiles/Project_Default.xml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/libraries/GATK_libraries.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/scopes/scope_settings.xml create mode 100644 .idea/uiDesigner.xml create mode 100644 .idea/vcs.xml create mode 100644 .idea/workspace.xml create mode 100644 cmi-gatk.iml diff --git a/.idea/.name b/.idea/.name new file mode 100644 index 000000000..7014f65a5 --- /dev/null +++ b/.idea/.name @@ -0,0 +1 @@ +cmi-gatk \ No newline at end of file diff --git a/.idea/ant.xml b/.idea/ant.xml new file mode 100644 index 000000000..4674eeac9 --- /dev/null +++ b/.idea/ant.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/.idea/codeStyleSettings.xml b/.idea/codeStyleSettings.xml new file mode 100644 index 000000000..9178b389f --- /dev/null +++ b/.idea/codeStyleSettings.xml @@ -0,0 +1,13 @@ + + + + + + + diff --git a/.idea/compiler.xml b/.idea/compiler.xml new file mode 100644 index 000000000..ded2e9a1d --- /dev/null +++ b/.idea/compiler.xml @@ -0,0 +1,21 @@ + + + + + + diff --git a/.idea/copyright/profiles_settings.xml b/.idea/copyright/profiles_settings.xml new file mode 100644 index 000000000..3572571ad --- /dev/null +++ b/.idea/copyright/profiles_settings.xml @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/.idea/encodings.xml b/.idea/encodings.xml new file mode 100644 index 000000000..e206d70d8 --- /dev/null +++ b/.idea/encodings.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/.idea/highlighting.xml b/.idea/highlighting.xml new file mode 100644 index 000000000..f33b64d94 --- /dev/null +++ b/.idea/highlighting.xml @@ -0,0 +1,8 @@ + + + + + + diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 000000000..b8c243dbe --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,11 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 000000000..3b312839b --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,7 @@ + + + + \ No newline at end of file diff --git a/.idea/libraries/GATK_libraries.xml b/.idea/libraries/GATK_libraries.xml new file mode 100644 index 000000000..970d0a3dc --- /dev/null +++ b/.idea/libraries/GATK_libraries.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 000000000..afd7f3778 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,32 @@ + + + + + + + + + + http://www.w3.org/1999/xhtml + + + + + + + diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 000000000..09caa2933 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/.idea/scopes/scope_settings.xml b/.idea/scopes/scope_settings.xml new file mode 100644 index 000000000..922003b84 --- /dev/null +++ b/.idea/scopes/scope_settings.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/.idea/uiDesigner.xml b/.idea/uiDesigner.xml new file mode 100644 index 000000000..3b0002030 --- /dev/null +++ b/.idea/uiDesigner.xml @@ -0,0 +1,125 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 000000000..cbc984988 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,10 @@ + + + + + + + + + diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 000000000..87ab79287 --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,386 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + localhost + 5050 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + cmi-gatk + + + + + + + + GATK libraries + + + + + + + + + diff --git a/cmi-gatk.iml b/cmi-gatk.iml new file mode 100644 index 000000000..e63aff535 --- /dev/null +++ b/cmi-gatk.iml @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + + From e29bcab42e9ba75276d20b9402d5d881271ce04d Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 10:51:53 -0400 Subject: [PATCH 32/90] Updating Intellij enviroment and adding Scala --- .idea/libraries/GATK_libraries.xml | 1 - .idea/misc.xml | 2 +- .idea/workspace.xml | 221 ++++++++++++++++++++++++----- cmi-gatk.iml | 10 +- 4 files changed, 192 insertions(+), 42 deletions(-) diff --git a/.idea/libraries/GATK_libraries.xml b/.idea/libraries/GATK_libraries.xml index 970d0a3dc..b363bbe6c 100644 --- a/.idea/libraries/GATK_libraries.xml +++ b/.idea/libraries/GATK_libraries.xml @@ -6,7 +6,6 @@ - diff --git a/.idea/misc.xml b/.idea/misc.xml index afd7f3778..a79280c52 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -24,7 +24,7 @@ http://www.w3.org/1999/xhtml - + diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 87ab79287..f6d4567fd 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -1,7 +1,12 @@ - + + + + + + + + + + + + + + - + @@ -112,33 +140,7 @@ - - - - - - - - - - - - - - - - - - - - - + @@ -147,7 +149,7 @@ - + @@ -228,8 +230,9 @@ - + + @@ -286,7 +289,7 @@ + + - + + + + + + + + + + + + + + + + + - + @@ -333,6 +464,18 @@ + + + Detection + + + + + @@ -346,6 +489,7 @@ + 1.6 diff --git a/cmi-gatk.iml b/cmi-gatk.iml index e63aff535..4dbee1336 100644 --- a/cmi-gatk.iml +++ b/cmi-gatk.iml @@ -1,5 +1,13 @@ + + + + + + @@ -17,7 +25,7 @@ - + From fdf29503fb9bb6906d0e0b7ad41b6045aab2f38f Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 17:18:44 -0400 Subject: [PATCH 33/90] removing annoying xml from IDEA configuration --- .idea/workspace.xml | 529 -------------------------------------------- 1 file changed, 529 deletions(-) delete mode 100644 .idea/workspace.xml diff --git a/.idea/workspace.xml b/.idea/workspace.xml deleted file mode 100644 index f6d4567fd..000000000 --- a/.idea/workspace.xml +++ /dev/null @@ -1,529 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - localhost - 5050 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Detection - - - - - - - - - - - - - - - 1.6 - - - - - - - - cmi-gatk - - - - - - - - GATK libraries - - - - - - - - - From 29195cd3aab9a47118f71516ce55949b979d9967 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 27 Sep 2012 11:04:56 -0400 Subject: [PATCH 34/90] Removed the intellij files from the root and made an example package for new users. This allows users to start at the same page and then change it as they see fit without interfering with the repo (thanks guillermo!) --- .idea/.name | 1 - .idea/ant.xml | 15 --- .idea/codeStyleSettings.xml | 13 -- .idea/compiler.xml | 21 --- .idea/copyright/profiles_settings.xml | 5 - .idea/encodings.xml | 5 - .idea/highlighting.xml | 8 -- .idea/inspectionProfiles/Project_Default.xml | 11 -- .../inspectionProfiles/profiles_settings.xml | 7 - .idea/libraries/GATK_libraries.xml | 12 -- .idea/misc.xml | 32 ----- .idea/modules.xml | 9 -- .idea/scopes/scope_settings.xml | 5 - .idea/uiDesigner.xml | 125 ------------------ .idea/vcs.xml | 10 -- cmi-gatk.iml | 31 ----- 16 files changed, 310 deletions(-) delete mode 100644 .idea/.name delete mode 100644 .idea/ant.xml delete mode 100644 .idea/codeStyleSettings.xml delete mode 100644 .idea/compiler.xml delete mode 100644 .idea/copyright/profiles_settings.xml delete mode 100644 .idea/encodings.xml delete mode 100644 .idea/highlighting.xml delete mode 100644 .idea/inspectionProfiles/Project_Default.xml delete mode 100644 .idea/inspectionProfiles/profiles_settings.xml delete mode 100644 .idea/libraries/GATK_libraries.xml delete mode 100644 .idea/misc.xml delete mode 100644 .idea/modules.xml delete mode 100644 .idea/scopes/scope_settings.xml delete mode 100644 .idea/uiDesigner.xml delete mode 100644 .idea/vcs.xml delete mode 100644 cmi-gatk.iml diff --git a/.idea/.name b/.idea/.name deleted file mode 100644 index 7014f65a5..000000000 --- a/.idea/.name +++ /dev/null @@ -1 +0,0 @@ -cmi-gatk \ No newline at end of file diff --git a/.idea/ant.xml b/.idea/ant.xml deleted file mode 100644 index 4674eeac9..000000000 --- a/.idea/ant.xml +++ /dev/null @@ -1,15 +0,0 @@ - - - - - - - - - - - - - - - diff --git a/.idea/codeStyleSettings.xml b/.idea/codeStyleSettings.xml deleted file mode 100644 index 9178b389f..000000000 --- a/.idea/codeStyleSettings.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - diff --git a/.idea/compiler.xml b/.idea/compiler.xml deleted file mode 100644 index ded2e9a1d..000000000 --- a/.idea/compiler.xml +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - diff --git a/.idea/copyright/profiles_settings.xml b/.idea/copyright/profiles_settings.xml deleted file mode 100644 index 3572571ad..000000000 --- a/.idea/copyright/profiles_settings.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - - \ No newline at end of file diff --git a/.idea/encodings.xml b/.idea/encodings.xml deleted file mode 100644 index e206d70d8..000000000 --- a/.idea/encodings.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - - diff --git a/.idea/highlighting.xml b/.idea/highlighting.xml deleted file mode 100644 index f33b64d94..000000000 --- a/.idea/highlighting.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml deleted file mode 100644 index b8c243dbe..000000000 --- a/.idea/inspectionProfiles/Project_Default.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml deleted file mode 100644 index 3b312839b..000000000 --- a/.idea/inspectionProfiles/profiles_settings.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/libraries/GATK_libraries.xml b/.idea/libraries/GATK_libraries.xml deleted file mode 100644 index b363bbe6c..000000000 --- a/.idea/libraries/GATK_libraries.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index a79280c52..000000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,32 +0,0 @@ - - - - - - - - - - http://www.w3.org/1999/xhtml - - - - - - - diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index 09caa2933..000000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - diff --git a/.idea/scopes/scope_settings.xml b/.idea/scopes/scope_settings.xml deleted file mode 100644 index 922003b84..000000000 --- a/.idea/scopes/scope_settings.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/uiDesigner.xml b/.idea/uiDesigner.xml deleted file mode 100644 index 3b0002030..000000000 --- a/.idea/uiDesigner.xml +++ /dev/null @@ -1,125 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index cbc984988..000000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - diff --git a/cmi-gatk.iml b/cmi-gatk.iml deleted file mode 100644 index 4dbee1336..000000000 --- a/cmi-gatk.iml +++ /dev/null @@ -1,31 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - From e9eaa33c0b3699472da7287a2c6e23cc6b1ac08f Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 27 Sep 2012 11:09:41 -0400 Subject: [PATCH 35/90] adding some directories to gitignore --- .gitignore | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 456794cea..927caf98d 100644 --- a/.gitignore +++ b/.gitignore @@ -18,10 +18,8 @@ queueScatterGather /bar* integrationtests/ public/testdata/onTheFlyOutputTest.vcf -private/testdata/onTheFlyOutputTest.vcf -lib -html -gatkdocs -dist -build -resources +build/ +dist/ +dump/ +lib/ +out/ From af5a6fdaced7814b7af3d6e858897c6b0eadb8ed Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Thu, 11 Oct 2012 11:09:49 -0400 Subject: [PATCH 36/90] Resolve [DEV-7]: add single-sample VCF calling at end of FASTQ-BAM pipeline. Initial steps of [DEV-4]: queue extensions for Picard QC metrics --- .../picard/CalculateHsMetrics.scala | 60 +++++++++++++++++++ .../picard/CollectGcBiasMetrics.scala | 32 ++++++++++ .../picard/CollectMultipleMetrics.scala | 36 +++++++++++ 3 files changed, 128 insertions(+) create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CalculateHsMetrics.scala create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CalculateHsMetrics.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CalculateHsMetrics.scala new file mode 100644 index 000000000..75e9300dc --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CalculateHsMetrics.scala @@ -0,0 +1,60 @@ +package org.broadinstitute.sting.queue.extensions.picard + +import org.broadinstitute.sting.commandline.{Argument, Output, Input} +import java.io.File + +/** + * Created with IntelliJ IDEA. + * User: delangel + * Date: 10/9/12 + * Time: 5:59 PM + * To change this template use File | Settings | File Templates. + */ +class CalculateHsMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction { + analysisName = "CalculateHsMetrics" + javaMainClass = "net.sf.picard.sam.CalculateHsMetrics" + + @Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true) + var input: Seq[File] = Nil + + @Output(doc="The output file to write statistics to", shortName = "output", fullName = "output_file", required = true) + var output: File = _ + + @Argument(doc="Interval list with targets", shortName = "targets", fullName = "target_list", required = true) + var targets: File = _ + + @Argument(doc="Interval list with baits", shortName = "baits", fullName = "bait_list", required = true) + var baits: File = _ + + @Argument(doc="Reference file", shortName = "reference", fullName = "reference", required = true) + var reference: File = _ + /* + @Argument(doc = "Maximum number of file handles to keep open when spilling read ends to disk. Set this number a little lower than the per-process maximum number of file that may be open. This number can be found by executing the 'ulimit -n' command on a Unix system.", shortName = "max_file_handles", fullName ="max_file_handles_for_read_ends_maps", required=false) + var MAX_FILE_HANDLES_FOR_READ_ENDS_MAP: Int = -1; + + @Argument(doc = "This number, plus the maximum RAM available to the JVM, determine the memory footprint used by some of the sorting collections. If you are running out of memory, try reducing this number.", shortName = "sorting_ratio", fullName = "sorting_collection_size_ratio", required = false) + var SORTING_COLLECTION_SIZE_RATIO: Double = -1 + */ + override def freezeFieldValues() { + super.freezeFieldValues() +// if (outputIndex == null && output != null) + // outputIndex = new File(output.getName.stripSuffix(".bam") + ".bai") + } + + val level = "SAMPLE" + + override def inputBams = input + override def outputBam = output + //this.sortOrder = null + //this.createIndex = Some(true) + override def commandLine = super.commandLine + + required("BAIT_INTERVALS=" + baits) + + required("TARGET_INTERVALS=" + targets) + + required("REFERENCE_SEQUENCE=" + reference) + + optional("METRIC_ACCUMULATION_LEVEL="+level)/*+ + conditional(REMOVE_DUPLICATES, "REMOVE_DUPLICATES=true") + + conditional(MAX_FILE_HANDLES_FOR_READ_ENDS_MAP > 0, "MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=" + MAX_FILE_HANDLES_FOR_READ_ENDS_MAP.toString) + + conditional(SORTING_COLLECTION_SIZE_RATIO > 0, "SORTING_COLLECTION_SIZE_RATIO=" + SORTING_COLLECTION_SIZE_RATIO.toString) */ + + +} diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala new file mode 100644 index 000000000..de2b0af9e --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala @@ -0,0 +1,32 @@ +package org.broadinstitute.sting.queue.extensions.picard + +import org.broadinstitute.sting.commandline.{Argument, Output, Input} +import java.io.File + +/** + * Created with IntelliJ IDEA. + * User: delangel + * Date: 10/10/12 + * Time: 10:37 AM + * To change this template use File | Settings | File Templates. + */ +class CollectGcBiasMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction { + analysisName = "CalculateGcMetrics" + javaMainClass = "net.sf.picard.sam.CalculateGcMetrics" + + @Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true) + var input: Seq[File] = Nil + + @Output(doc="The output file to write statistics to", shortName = "output", fullName = "output_file", required = true) + var output: File = _ + + @Argument(doc="Reference file", shortName = "reference", fullName = "reference", required = true) + var reference: File = _ + + override def inputBams = input + override def outputBam = output + override def commandLine = super.commandLine + + required("CHART_OUTPUT=" + output+".pdf") + + required("REFERENCE_SEQUENCE=" + reference) + + required("ASSUME_SORTED=true") +} diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala new file mode 100644 index 000000000..a9af4e858 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala @@ -0,0 +1,36 @@ +package org.broadinstitute.sting.queue.extensions.picard + +import org.broadinstitute.sting.commandline.{Argument, Output, Input} +import java.io.File + +/** + * Created with IntelliJ IDEA. + * User: delangel + * Date: 10/10/12 + * Time: 10:37 AM + * To change this template use File | Settings | File Templates. + */ +class CollectMultipleMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction{ + analysisName = "CalculateMultipleMetrics" + javaMainClass = "net.sf.picard.sam.CalculateMultipleMetrics" + + @Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true) + var input: Seq[File] = Nil + + @Output(doc="The output file to write statistics to", shortName = "output", fullName = "output_file", required = true) + var output: File = _ + + @Argument(doc="Reference file", shortName = "reference", fullName = "reference", required = true) + var reference: File = _ + + override def inputBams = input + override def outputBam = output + override def commandLine = super.commandLine + + required("REFERENCE_SEQUENCE=" + reference) + + required("ASSUME_SORTED=true") + + required("PROGRAM=QualityScoreDistribution") + + required("PROGRAM=MeanQualityByCycle") + + required("PROGRAM=CollectAlignmentSummaryMetrics" ) + + +} From c1706ef0ef42bd6a7986009e7664c453313e8cbc Mon Sep 17 00:00:00 2001 From: Kristian Cibulskis Date: Fri, 12 Oct 2012 14:18:12 -0400 Subject: [PATCH 38/90] upgraded mutation caller with VCF output raw indel calls (non filtered,non vcf) --- .../queue/extensions/cancer/MuTect.scala | 33 +++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala index 623d397d4..1193e7dec 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala @@ -6,7 +6,7 @@ import org.broadinstitute.sting.commandline.Gather import org.broadinstitute.sting.commandline.Input import org.broadinstitute.sting.commandline.Output import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction -import org.broadinstitute.sting.queue.extensions.gatk.{LocusScatterFunction, TaggedFile} +import org.broadinstitute.sting.queue.extensions.gatk.{TaggedFile, VcfGatherFunction, LocusScatterFunction} class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineGATK with ScatterGatherableFunction { analysisName = "MuTect" @@ -45,6 +45,10 @@ class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineG @Argument(fullName="force_alleles", shortName="", doc="force output for all alleles at each site", required=false, exclusiveOf="", validation="") var force_alleles: Boolean = _ + /** only emit passing calls */ + @Argument(fullName="only_passing_calls", shortName="", doc="only emit passing calls", required=false, exclusiveOf="", validation="") + var only_passing_calls: Boolean = _ + /** Initial LOD threshold for calling tumor variant */ @Argument(fullName="initial_tumor_lod", shortName="", doc="Initial LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") var initial_tumor_lod: Option[Float] = None @@ -242,6 +246,28 @@ class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineG */ def o_=(value: File) { this.out = value } + /** VCF output of mutation candidates */ + @Output(fullName="vcf", shortName="vcf", doc="VCF output of mutation candidates", required=false, exclusiveOf="", validation="") + @Gather(classOf[VcfGatherFunction]) + var vcf: File = _ + + /** Automatically generated index for vcf */ + @Output(fullName="vcfIndex", shortName="", doc="Automatically generated index for vcf", required=false, exclusiveOf="", validation="") + @Gather(enabled=false) + private var vcfIndex: File = _ + + /** Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests. */ + @Argument(fullName="no_cmdline_in_header", shortName="no_cmdline_in_header", doc="Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.", required=false, exclusiveOf="", validation="") + var no_cmdline_in_header: Boolean = _ + + /** Just output sites without genotypes (i.e. only the first 8 columns of the VCF) */ + @Argument(fullName="sites_only", shortName="sites_only", doc="Just output sites without genotypes (i.e. only the first 8 columns of the VCF)", required=false, exclusiveOf="", validation="") + var sites_only: Boolean = _ + + /** force BCF output, regardless of the file's extension */ + @Argument(fullName="bcf", shortName="bcf", doc="force BCF output, regardless of the file's extension", required=false, exclusiveOf="", validation="") + var bcf: Boolean = _ + /** VCF file of DBSNP information */ @Input(fullName="dbsnp", shortName="dbsnp", doc="VCF file of DBSNP information", required=false, exclusiveOf="", validation="") var dbsnp: Seq[File] = Nil @@ -369,10 +395,13 @@ class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineG override def freezeFieldValues() { super.freezeFieldValues() + if (vcf != null && !org.broadinstitute.sting.utils.io.IOUtils.isSpecialFile(vcf)) + if (!org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor.isCompressed(vcf.getPath)) + vcfIndex = new File(vcf.getPath + ".idx") dbsnpIndexes ++= dbsnp.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) cosmicIndexes ++= cosmic.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) normal_panelIndexes ++= normal_panel.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) } - override def commandLine = super.commandLine + conditional(noop, "--noop", escape=true, format="%s") + conditional(enable_extended_output, "--enable_extended_output", escape=true, format="%s") + conditional(artifact_detection_mode, "--artifact_detection_mode", escape=true, format="%s") + optional("--tumor_sample_name", tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--bam_tumor_sample_name", bam_tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--normal_sample_name", normal_sample_name, spaceSeparated=true, escape=true, format="%s") + conditional(force_output, "--force_output", escape=true, format="%s") + conditional(force_alleles, "--force_alleles", escape=true, format="%s") + optional("--initial_tumor_lod", initial_tumor_lod, spaceSeparated=true, escape=true, format=initial_tumor_lodFormat) + optional("--tumor_lod", tumor_lod, spaceSeparated=true, escape=true, format=tumor_lodFormat) + optional("--fraction_contamination", fraction_contamination, spaceSeparated=true, escape=true, format=fraction_contaminationFormat) + optional("--minimum_mutation_cell_fraction", minimum_mutation_cell_fraction, spaceSeparated=true, escape=true, format=minimum_mutation_cell_fractionFormat) + optional("--normal_lod", normal_lod, spaceSeparated=true, escape=true, format=normal_lodFormat) + optional("--normal_artifact_lod", normal_artifact_lod, spaceSeparated=true, escape=true, format=normal_artifact_lodFormat) + optional("--strand_artifact_lod", strand_artifact_lod, spaceSeparated=true, escape=true, format=strand_artifact_lodFormat) + optional("--strand_artifact_power_threshold", strand_artifact_power_threshold, spaceSeparated=true, escape=true, format=strand_artifact_power_thresholdFormat) + optional("--dbsnp_normal_lod", dbsnp_normal_lod, spaceSeparated=true, escape=true, format=dbsnp_normal_lodFormat) + optional("--somatic_classification_normal_power_threshold", somatic_classification_normal_power_threshold, spaceSeparated=true, escape=true, format=somatic_classification_normal_power_thresholdFormat) + optional("--minimum_normal_allele_fraction", minimum_normal_allele_fraction, spaceSeparated=true, escape=true, format=minimum_normal_allele_fractionFormat) + optional("--tumor_f_pretest", tumor_f_pretest, spaceSeparated=true, escape=true, format=tumor_f_pretestFormat) + optional("--min_qscore", min_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--gap_events_threshold", gap_events_threshold, spaceSeparated=true, escape=true, format="%s") + optional("--heavily_clipped_read_fraction", heavily_clipped_read_fraction, spaceSeparated=true, escape=true, format=heavily_clipped_read_fractionFormat) + optional("--clipping_bias_pvalue_threshold", clipping_bias_pvalue_threshold, spaceSeparated=true, escape=true, format=clipping_bias_pvalue_thresholdFormat) + optional("--fraction_mapq0_threshold", fraction_mapq0_threshold, spaceSeparated=true, escape=true, format=fraction_mapq0_thresholdFormat) + optional("--pir_median_threshold", pir_median_threshold, spaceSeparated=true, escape=true, format=pir_median_thresholdFormat) + optional("--pir_mad_threshold", pir_mad_threshold, spaceSeparated=true, escape=true, format=pir_mad_thresholdFormat) + optional("--required_maximum_alt_allele_mapping_quality_score", required_maximum_alt_allele_mapping_quality_score, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_count", max_alt_alleles_in_normal_count, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_qscore_sum", max_alt_alleles_in_normal_qscore_sum, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_allele_in_normal_fraction", max_alt_allele_in_normal_fraction, spaceSeparated=true, escape=true, format=max_alt_allele_in_normal_fractionFormat) + optional("--power_constant_qscore", power_constant_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--absolute_copy_number_data", absolute_copy_number_data, spaceSeparated=true, escape=true, format="%s") + optional("--power_constant_af", power_constant_af, spaceSeparated=true, escape=true, format=power_constant_afFormat) + optional("-o", out, spaceSeparated=true, escape=true, format="%s") + repeat("-dbsnp", dbsnp, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-cosmic", cosmic, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-normal_panel", normal_panel, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + optional("-cov", coverage_file, spaceSeparated=true, escape=true, format="%s") + optional("-cov_q20", coverage_20_q20_file, spaceSeparated=true, escape=true, format="%s") + optional("-pow", power_file, spaceSeparated=true, escape=true, format="%s") + optional("-tdf", tumor_depth_file, spaceSeparated=true, escape=true, format="%s") + optional("-ndf", normal_depth_file, spaceSeparated=true, escape=true, format="%s") + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape=true, format="%s") + override def commandLine = super.commandLine + conditional(noop, "--noop", escape=true, format="%s") + conditional(enable_extended_output, "--enable_extended_output", escape=true, format="%s") + conditional(artifact_detection_mode, "--artifact_detection_mode", escape=true, format="%s") + optional("--tumor_sample_name", tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--bam_tumor_sample_name", bam_tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--normal_sample_name", normal_sample_name, spaceSeparated=true, escape=true, format="%s") + conditional(force_output, "--force_output", escape=true, format="%s") + conditional(force_alleles, "--force_alleles", escape=true, format="%s") + conditional(only_passing_calls, "--only_passing_calls", escape=true, format="%s") + optional("--initial_tumor_lod", initial_tumor_lod, spaceSeparated=true, escape=true, format=initial_tumor_lodFormat) + optional("--tumor_lod", tumor_lod, spaceSeparated=true, escape=true, format=tumor_lodFormat) + optional("--fraction_contamination", fraction_contamination, spaceSeparated=true, escape=true, format=fraction_contaminationFormat) + optional("--minimum_mutation_cell_fraction", minimum_mutation_cell_fraction, spaceSeparated=true, escape=true, format=minimum_mutation_cell_fractionFormat) + optional("--normal_lod", normal_lod, spaceSeparated=true, escape=true, format=normal_lodFormat) + optional("--normal_artifact_lod", normal_artifact_lod, spaceSeparated=true, escape=true, format=normal_artifact_lodFormat) + optional("--strand_artifact_lod", strand_artifact_lod, spaceSeparated=true, escape=true, format=strand_artifact_lodFormat) + optional("--strand_artifact_power_threshold", strand_artifact_power_threshold, spaceSeparated=true, escape=true, format=strand_artifact_power_thresholdFormat) + optional("--dbsnp_normal_lod", dbsnp_normal_lod, spaceSeparated=true, escape=true, format=dbsnp_normal_lodFormat) + optional("--somatic_classification_normal_power_threshold", somatic_classification_normal_power_threshold, spaceSeparated=true, escape=true, format=somatic_classification_normal_power_thresholdFormat) + optional("--minimum_normal_allele_fraction", minimum_normal_allele_fraction, spaceSeparated=true, escape=true, format=minimum_normal_allele_fractionFormat) + optional("--tumor_f_pretest", tumor_f_pretest, spaceSeparated=true, escape=true, format=tumor_f_pretestFormat) + optional("--min_qscore", min_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--gap_events_threshold", gap_events_threshold, spaceSeparated=true, escape=true, format="%s") + optional("--heavily_clipped_read_fraction", heavily_clipped_read_fraction, spaceSeparated=true, escape=true, format=heavily_clipped_read_fractionFormat) + optional("--clipping_bias_pvalue_threshold", clipping_bias_pvalue_threshold, spaceSeparated=true, escape=true, format=clipping_bias_pvalue_thresholdFormat) + optional("--fraction_mapq0_threshold", fraction_mapq0_threshold, spaceSeparated=true, escape=true, format=fraction_mapq0_thresholdFormat) + optional("--pir_median_threshold", pir_median_threshold, spaceSeparated=true, escape=true, format=pir_median_thresholdFormat) + optional("--pir_mad_threshold", pir_mad_threshold, spaceSeparated=true, escape=true, format=pir_mad_thresholdFormat) + optional("--required_maximum_alt_allele_mapping_quality_score", required_maximum_alt_allele_mapping_quality_score, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_count", max_alt_alleles_in_normal_count, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_qscore_sum", max_alt_alleles_in_normal_qscore_sum, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_allele_in_normal_fraction", max_alt_allele_in_normal_fraction, spaceSeparated=true, escape=true, format=max_alt_allele_in_normal_fractionFormat) + optional("--power_constant_qscore", power_constant_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--absolute_copy_number_data", absolute_copy_number_data, spaceSeparated=true, escape=true, format="%s") + optional("--power_constant_af", power_constant_af, spaceSeparated=true, escape=true, format=power_constant_afFormat) + optional("-o", out, spaceSeparated=true, escape=true, format="%s") + optional("-vcf", vcf, spaceSeparated=true, escape=true, format="%s") + conditional(no_cmdline_in_header, "-no_cmdline_in_header", escape=true, format="%s") + conditional(sites_only, "-sites_only", escape=true, format="%s") + conditional(bcf, "-bcf", escape=true, format="%s") + repeat("-dbsnp", dbsnp, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-cosmic", cosmic, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-normal_panel", normal_panel, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + optional("-cov", coverage_file, spaceSeparated=true, escape=true, format="%s") + optional("-cov_q20", coverage_20_q20_file, spaceSeparated=true, escape=true, format="%s") + optional("-pow", power_file, spaceSeparated=true, escape=true, format="%s") + optional("-tdf", tumor_depth_file, spaceSeparated=true, escape=true, format="%s") + optional("-ndf", normal_depth_file, spaceSeparated=true, escape=true, format="%s") + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape=true, format="%s") } From 05111eeaef41d3d3d5c2483b16728a76fc8f8a6e Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 10 Oct 2012 15:00:17 -0400 Subject: [PATCH 40/90] Making nContigs parameter hidden in ReduceReads For now, the het reduction should only be performed for diploids (n=2). We haven't really tested it for other ploidy so it should remain hidden until someone braves it out. --- .../compression/reducereads/ReduceReads.java | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java index 1b3e68647..5810bc94f 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java @@ -181,15 +181,6 @@ public class ReduceReads extends ReadWalker, ReduceRea @Argument(fullName = "minimum_del_proportion_to_trigger_variant", shortName = "mindel", doc = "", required = false) private double minIndelProportionToTriggerVariant = 0.05; - /** - * Minimum proportion of indels in a site to trigger a variant region. Anything below this will be - * considered consensus. - */ - @Argument(fullName = "contigs", shortName = "ctg", doc = "", required = false) - private int nContigs = 2; - - - /** * Downsamples the coverage of a variable region approximately (guarantees the minimum to be equal to this). * A value of 0 turns downsampling off. @@ -197,6 +188,14 @@ public class ReduceReads extends ReadWalker, ReduceRea @Argument(fullName = "downsample_coverage", shortName = "ds", doc = "", required = false) private int downsampleCoverage = 250; + /** + * Number of chromossomes in the sample (this is used for the polyploid consensus compression). Only + * tested for humans (or organisms with n=2). Use at your own risk! + */ + @Hidden + @Argument(fullName = "contigs", shortName = "ctg", doc = "", required = false) + private int nContigs = 2; + @Hidden @Argument(fullName = "", shortName = "dl", doc = "", required = false) private int debugLevel = 0; From 274ac4836f3357a9cc0d0d37a0f9c6f98050542f Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 12 Oct 2012 13:50:10 -0400 Subject: [PATCH 41/90] Allowing the GATK to have non-required outputs Modified the SAMFileWriterArgumentTypeDescriptor to accept output bam files that are null if they're not required (in the @Output annotation). This change enables the nWayOut parameter for the IndeRealigner and ReduceReads to operate optionally while maintaining the original single way out. [#DEV-10 transition:31 resolution:1] --- .../SAMFileWriterArgumentTypeDescriptor.java | 36 +++++++++---------- .../gatk/walkers/indels/IndelRealigner.java | 4 +-- .../indels/IndelRealignerIntegrationTest.java | 10 ++++++ 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java index 8566f6c63..dcf2704f5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java @@ -124,32 +124,28 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor // This parser has been passed a null filename and the GATK is not responsible for creating a type default for the object; // therefore, the user must have failed to specify a type default - if(writerFileName == null) { - if(!source.isRequired()) - throw new MissingArgumentValueException(bamArgumentDefinition); - if(generateMD5) + if(writerFileName == null && generateMD5) throw new ArgumentException("MD5 generation specified, but no output file specified. If md5 generation is desired, please specify a BAM output file and an md5 file will be written alongside."); - } // Create the stub and set parameters. - SAMFileWriterStub stub; - if ( writerFileName != null ) + SAMFileWriterStub stub = null; // stub = new SAMFileWriterStub(engine, defaultOutputStream); + + if ( writerFileName != null ) { stub = new SAMFileWriterStub(engine, new File(writerFileName)); - else - stub = new SAMFileWriterStub(engine, defaultOutputStream); - if ( compressionLevel != null ) - stub.setCompressionLevel(compressionLevel); - if ( indexOnTheFly ) - stub.setIndexOnTheFly(indexOnTheFly); - if ( generateMD5 ) - stub.setGenerateMD5(generateMD5); - if ( simplifyBAM ) - stub.setSimplifyBAM(simplifyBAM); + if ( compressionLevel != null ) + stub.setCompressionLevel(compressionLevel); + if ( indexOnTheFly ) + stub.setIndexOnTheFly(indexOnTheFly); + if ( generateMD5 ) + stub.setGenerateMD5(generateMD5); + if ( simplifyBAM ) + stub.setSimplifyBAM(simplifyBAM); - // WARNING: Side effects required by engine! - parsingEngine.addTags(stub,getArgumentTags(matches)); - engine.addOutput(stub); + // WARNING: Side effects required by engine! + parsingEngine.addTags(stub,getArgumentTags(matches)); + engine.addOutput(stub); + } return stub; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 76d8d85c2..998894fbf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -370,8 +370,6 @@ public class IndelRealigner extends ReadWalker { currentInterval = intervals.hasNext() ? intervals.next() : null; - writerToUse = writer; - if ( N_WAY_OUT != null ) { boolean createIndex = true; @@ -383,9 +381,9 @@ public class IndelRealigner extends ReadWalker { createIndex, generateMD5s,createProgramRecord(),KEEP_ALL_PG_RECORDS); } } else { - // set up the output writer setupWriter(getToolkit().getSAMFileHeader()); + writerToUse = writer; } manager = new ConstrainedMateFixingManager(writerToUse, getToolkit().getGenomeLocParser(), MAX_ISIZE_FOR_MOVEMENT, MAX_POS_MOVE_ALLOWED, MAX_RECORDS_IN_MEMORY); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java index 040845828..9b464cfec 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java @@ -113,4 +113,14 @@ public class IndelRealignerIntegrationTest extends WalkerTest { executeTest(String.format("realigner [%s]", entry.getKey()), spec); } } + + @Test + public void testNWayOut() { + WalkerTestSpec spec1 = new WalkerTestSpec( + baseCommandPrefix + " -nWayOut .clean.bam ", + 1, + Arrays.asList("d41d8cd98f00b204e9800998ecf8427e")); + executeTest("test realigner nWayOut", spec1); + } + } From 25be94fbb8d7f762e1576d75c7c0a76d46bb45ef Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Mon, 15 Oct 2012 13:24:32 -0400 Subject: [PATCH 47/90] Increasing the precision of MathUtils.approximateLog10SumLog10 from 1E-3 to 1E-4. Genotyper integration tests change as a result. Expanding the unit tests of MathUtils.log10sumLog10. --- ...GenotyperGeneralPloidyIntegrationTest.java | 8 +- .../HaplotypeCallerIntegrationTest.java | 12 +- .../broadinstitute/sting/utils/MathUtils.java | 4 +- .../UnifiedGenotyperIntegrationTest.java | 22 +-- .../sting/utils/MathUtilsUnitTest.java | 153 +++++++++++------- 5 files changed, 117 insertions(+), 82 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java index 4de3cd887..219c36a05 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java @@ -60,22 +60,22 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest { @Test(enabled = true) public void testBOTH_GGA_Pools() { - PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","6d60d9f3dfe8e1580214be0d170b0fff"); + PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","67dabdbf1e6ed8a83d2e85766558a20a"); } @Test(enabled = true) public void testINDEL_GGA_Pools() { - PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","30abf3c1868a61145edbc08fe35c8150"); + PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","d4bfae27f1b07923f381d708d8a34cf4"); } @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","ef99bc0513d3267f43b84cb88a324376"); + PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","7d6f319b9edcb1ff8c290fef150a2df8"); } @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","8ca07270717641385fe5d2e07e530782"); + PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","dd02890123e07e7412a49475cb6280f1"); } @Test(enabled = true) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 3450725c8..be8fd2fb2 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -21,17 +21,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "a305107a5ec889152aa2efbe90b249d7"); + HCTest(CEUTRIO_BAM, "", "8c52c0955099cca3215a0d78fd455894"); } @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "0c2217ec81f19790a6d1f98ebf8cf70d"); + HCTest(NA12878_BAM, "", "01367428c26d3eaf9297c58bf8677dd3"); } @Test public void testHaplotypeCallerMultiSampleGGA() { - HCTest(CEUTRIO_BAM, "--max_alternate_alleles_for_indels 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "0396c7352ab8ab98b03dca36299a0ddf"); + HCTest(CEUTRIO_BAM, "--max_alternate_alleles_for_indels 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "61c1a0fb62d909229af6b5a91dad8b35"); } private void HCTestComplexVariants(String bam, String args, String md5) { @@ -42,7 +42,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleComplex() { - HCTestComplexVariants(CEUTRIO_BAM, "", "2cfb7d830d5a7eb7bc754b5f688a27a5"); + HCTestComplexVariants(CEUTRIO_BAM, "", "30598abeeb0b0ae5816ffdbf0c4044fd"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -53,7 +53,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleSymbolic() { - HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "d511848a46083c0d0b2495f65f162c2e"); + HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "6eb9c1026225b38ba7bd3c4c218f8269"); } private void HCTestIndelQualityScores(String bam, String args, String md5) { @@ -70,7 +70,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestProblematicReadsModifiedInActiveRegions() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("7e112ea4623617f1f7f8f562f54aa2aa")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("fa5c5eb996e95aed12c50d70e6dd74d7")); executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec); } diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index 8aa727be8..a1d6907a2 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -51,8 +51,8 @@ public class MathUtils { public static final double[] log10Cache; public static final double[] log10FactorialCache; private static final double[] jacobianLogTable; - private static final double JACOBIAN_LOG_TABLE_STEP = 0.001; - private static final double JACOBIAN_LOG_TABLE_INV_STEP = 1.0 / 0.001; + private static final double JACOBIAN_LOG_TABLE_STEP = 0.0001; + private static final double JACOBIAN_LOG_TABLE_INV_STEP = 1.0 / JACOBIAN_LOG_TABLE_STEP; private static final double MAX_JACOBIAN_TOLERANCE = 8.0; private static final int JACOBIAN_LOG_TABLE_SIZE = (int) (MAX_JACOBIAN_TOLERANCE / JACOBIAN_LOG_TABLE_STEP) + 1; private static final int MAXN = 50000; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 905ceef0f..e2ea47d9c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("fe9c0e9e4b4ee4677145748cdd2285ff")); + Arrays.asList("b3abf320f7d02d0e3b2883833419130e")); executeTest("test MultiSample Pilot1", spec); } @@ -60,7 +60,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, - Arrays.asList("772e14d8c908044c04053d204bad69ef")); + Arrays.asList("26af30187316f742878c85f0ed091837")); executeTest("test Multiple SNP alleles", spec); } @@ -76,7 +76,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testReverseTrim() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, - Arrays.asList("1fb69aa3857e921191997daa73f1b687")); + Arrays.asList("aa9cf96ab8f5aa844387e3aef1f27249")); executeTest("test reverse trim", spec); } @@ -249,7 +249,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("5d19e3077e0cabbb364f68676a09ebe0")); + Arrays.asList("04a87b87ee4323eba853c78f25551d1a")); executeTest(String.format("test multiple technologies"), spec); } @@ -268,7 +268,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -L 1:10,000,000-10,100,000" + " -baq CALCULATE_AS_NECESSARY", 1, - Arrays.asList("8a1931095f70523ad11cb99b30df7b84")); + Arrays.asList("950fb032cc9902ae48bd21f272d2fd52")); executeTest(String.format("test calling with BAQ"), spec); } @@ -287,7 +287,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("64a491b5276fd5d1cd04260ea3e63cf7")); + Arrays.asList("b3df138254ed141b61a758df87757e0d")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -302,7 +302,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -minIndelCnt 1" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("f63a8b8061e6c5999408d34798061895")); + Arrays.asList("63fd9488daadd4baaef0a98f02916996")); executeTest(String.format("test indel caller in SLX with low min allele count"), spec); } @@ -315,7 +315,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("c9d684ff2f2a9083480db6e962d612a9")); + Arrays.asList("52b5a432092995c92fe71e1942689ba8")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -325,7 +325,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("833fd97c6f32d7af6c9c088a78e51f68")); + Arrays.asList("7e3f67bf371112be5dbadb4fe6faa52a")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec); } @@ -335,7 +335,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("95b73c24c68dc475516571d9f49dfb1e")); + Arrays.asList("bc31c4977cb7e563ddf9c8dea27f3f4f")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec); } @@ -343,7 +343,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSampleIndels1() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("3bdbf48de30bac58f3bcbc5bf3aa63aa")); + Arrays.asList("7fc488fe16dea9f023bfcfdaa908a548")); List result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst(); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( diff --git a/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java index 04b0199d8..fc2b2638b 100755 --- a/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java @@ -225,65 +225,67 @@ public class MathUtilsUnitTest extends BaseTest { @Test public void testApproximateLog10SumLog10() { + + final double requiredPrecision = 1E-4; - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {0.0}), 0.0, 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-5.15}), -5.15, 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {130.0}), 130.0, 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-0.145}), -0.145, 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {0.0}), 0.0, requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-5.15}), -5.15, requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {130.0}), 130.0, requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-0.145}), -0.145, requiredPrecision); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, 0.0), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(-1.0, 0.0), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, -1.0), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(-2.2, -3.5), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(-1.0, -7.1), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(5.0, 6.2), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(38.1, 16.2), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(-38.1, 6.2), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(-19.1, -37.1), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(-29.1, -27.6), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(-0.12345, -0.23456), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(-15.7654, -17.0101), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(-0.12345, Double.NEGATIVE_INFINITY), -0.12345, 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(-15.7654, Double.NEGATIVE_INFINITY), -15.7654, 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, 0.0), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-1.0, 0.0), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, -1.0), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-2.2, -3.5), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-1.0, -7.1), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(5.0, 6.2), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(38.1, 16.2), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-38.1, 6.2), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-19.1, -37.1), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-29.1, -27.6), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-0.12345, -0.23456), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-15.7654, -17.0101), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-0.12345, Double.NEGATIVE_INFINITY), -0.12345, requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-15.7654, Double.NEGATIVE_INFINITY), -15.7654, requiredPrecision); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-1.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {0.0, -1.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-2.2, -3.5}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-1.0, -7.1}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {5.0, 6.2}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {38.1, 16.2}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-38.1, 6.2}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-19.1, -37.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-29.1, -27.6}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-0.12345, -0.23456}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-15.7654, -17.0101}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-1.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {0.0, -1.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-2.2, -3.5}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-1.0, -7.1}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {5.0, 6.2}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {38.1, 16.2}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-38.1, 6.2}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-19.1, -37.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-29.1, -27.6}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-0.12345, -0.23456}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-15.7654, -17.0101}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), requiredPrecision); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {0.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-1.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {0.0, -1.0, -2.5}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0) + Math.pow(10.0, -2.5)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-2.2, -3.5, -1.1}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5) + Math.pow(10.0, -1.1)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-1.0, -7.1, 0.5}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1) + Math.pow(10.0, 0.5)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {5.0, 6.2, 1.3}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2) + Math.pow(10.0, 1.3)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {38.1, 16.2, 18.1}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2) + Math.pow(10.0, 18.1)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-38.1, 6.2, 26.6}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2) + Math.pow(10.0, 26.6)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-19.1, -37.1, -45.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1) + Math.pow(10.0, -45.1)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-29.1, -27.6, -26.2}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6) + Math.pow(10.0, -26.2)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-0.12345, -0.23456, -0.34567}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456) + Math.pow(10.0, -0.34567)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-15.7654, -17.0101, -17.9341}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101) + Math.pow(10.0, -17.9341)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {0.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-1.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {0.0, -1.0, -2.5}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0) + Math.pow(10.0, -2.5)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-2.2, -3.5, -1.1}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5) + Math.pow(10.0, -1.1)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-1.0, -7.1, 0.5}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1) + Math.pow(10.0, 0.5)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {5.0, 6.2, 1.3}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2) + Math.pow(10.0, 1.3)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {38.1, 16.2, 18.1}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2) + Math.pow(10.0, 18.1)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-38.1, 6.2, 26.6}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2) + Math.pow(10.0, 26.6)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-19.1, -37.1, -45.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1) + Math.pow(10.0, -45.1)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-29.1, -27.6, -26.2}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6) + Math.pow(10.0, -26.2)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-0.12345, -0.23456, -0.34567}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456) + Math.pow(10.0, -0.34567)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-15.7654, -17.0101, -17.9341}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101) + Math.pow(10.0, -17.9341)), requiredPrecision); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, 0.0, 0.0), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(-1.0, 0.0, 0.0), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, -1.0, -2.5), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0) + Math.pow(10.0, -2.5)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(-2.2, -3.5, -1.1), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5) + Math.pow(10.0, -1.1)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(-1.0, -7.1, 0.5), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1) + Math.pow(10.0, 0.5)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(5.0, 6.2, 1.3), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2) + Math.pow(10.0, 1.3)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(38.1, 16.2, 18.1), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2) + Math.pow(10.0, 18.1)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(-38.1, 6.2, 26.6), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2) + Math.pow(10.0, 26.6)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(-19.1, -37.1, -45.1), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1) + Math.pow(10.0, -45.1)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(-29.1, -27.6, -26.2), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6) + Math.pow(10.0, -26.2)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(-0.12345, -0.23456, -0.34567), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456) + Math.pow(10.0, -0.34567)), 1e-3); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(-15.7654, -17.0101, -17.9341), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101) + Math.pow(10.0, -17.9341)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, 0.0, 0.0), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-1.0, 0.0, 0.0), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, -1.0, -2.5), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0) + Math.pow(10.0, -2.5)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-2.2, -3.5, -1.1), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5) + Math.pow(10.0, -1.1)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-1.0, -7.1, 0.5), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1) + Math.pow(10.0, 0.5)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(5.0, 6.2, 1.3), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2) + Math.pow(10.0, 1.3)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(38.1, 16.2, 18.1), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2) + Math.pow(10.0, 18.1)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-38.1, 6.2, 26.6), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2) + Math.pow(10.0, 26.6)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-19.1, -37.1, -45.1), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1) + Math.pow(10.0, -45.1)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-29.1, -27.6, -26.2), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6) + Math.pow(10.0, -26.2)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-0.12345, -0.23456, -0.34567), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456) + Math.pow(10.0, -0.34567)), requiredPrecision); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-15.7654, -17.0101, -17.9341), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101) + Math.pow(10.0, -17.9341)), requiredPrecision); } @Test @@ -299,14 +301,47 @@ public class MathUtilsUnitTest extends BaseTest { @Test public void testLog10sumLog10() { + final double requiredPrecision = 1E-14; + final double log3 = 0.477121254719662; - Assert.assertEquals(MathUtils.compareDoubles(MathUtils.log10sumLog10(new double[]{0.0, 0.0, 0.0}), log3), 0); - Assert.assertEquals(MathUtils.compareDoubles(MathUtils.log10sumLog10(new double[] {0.0, 0.0, 0.0}, 0), log3), 0); - Assert.assertEquals(MathUtils.compareDoubles(MathUtils.log10sumLog10(new double[]{0.0, 0.0, 0.0}, 0, 3), log3), 0); + Assert.assertEquals(MathUtils.log10sumLog10(new double[]{0.0, 0.0, 0.0}), log3, requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {0.0, 0.0, 0.0}, 0), log3, requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[]{0.0, 0.0, 0.0}, 0, 3), log3, requiredPrecision); final double log2 = 0.301029995663981; - Assert.assertEquals(MathUtils.compareDoubles(MathUtils.log10sumLog10(new double[] {0.0, 0.0, 0.0}, 0, 2), log2), 0); - Assert.assertEquals(MathUtils.compareDoubles(MathUtils.log10sumLog10(new double[] {0.0, 0.0, 0.0}, 0, 1), 0.0), 0); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {0.0, 0.0, 0.0}, 0, 2), log2, requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {0.0, 0.0, 0.0}, 0, 1), 0.0, requiredPrecision); + + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {0.0}), 0.0, requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {-5.15}), -5.15, requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {130.0}), 130.0, requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {-0.145}), -0.145, requiredPrecision); + + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {-1.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0)), requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {0.0, -1.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0)), requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {-2.2, -3.5}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5)), requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {-1.0, -7.1}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1)), requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {5.0, 6.2}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2)), requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {38.1, 16.2}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2)), requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {-38.1, 6.2}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2)), requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {-19.1, -37.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1)), requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {-29.1, -27.6}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6)), requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {-0.12345, -0.23456}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {-15.7654, -17.0101}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), requiredPrecision); + + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {0.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {-1.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {0.0, -1.0, -2.5}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0) + Math.pow(10.0, -2.5)), requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {-2.2, -3.5, -1.1}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5) + Math.pow(10.0, -1.1)), requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {-1.0, -7.1, 0.5}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1) + Math.pow(10.0, 0.5)), requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {5.0, 6.2, 1.3}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2) + Math.pow(10.0, 1.3)), requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {38.1, 16.2, 18.1}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2) + Math.pow(10.0, 18.1)), requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {-38.1, 6.2, 26.6}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2) + Math.pow(10.0, 26.6)), requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {-19.1, -37.1, -45.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1) + Math.pow(10.0, -45.1)), requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {-29.1, -27.6, -26.2}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6) + Math.pow(10.0, -26.2)), requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {-0.12345, -0.23456, -0.34567}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456) + Math.pow(10.0, -0.34567)), requiredPrecision); + Assert.assertEquals(MathUtils.log10sumLog10(new double[] {-15.7654, -17.0101, -17.9341}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101) + Math.pow(10.0, -17.9341)), requiredPrecision); } @Test From 429c96e72356aeb7554b305c73e38f486eb4a436 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 24 Sep 2012 17:01:17 -0400 Subject: [PATCH 48/90] Generic input file name recognition (still need to implement support to FastQ, but it now can at least accept it) --- .../qscripts/DataProcessingPipeline.scala | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index 56f6460fb..c21db30ce 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -96,6 +96,7 @@ class DataProcessingPipeline extends QScript { var cleanModelEnum: ConsensusDeterminationModel = ConsensusDeterminationModel.USE_READS + val bwaParameters: String = " -q 5 -l 32 -k 2 -t 4 -o 1 " @@ -165,12 +166,15 @@ class DataProcessingPipeline extends QScript { var realignedBams: Seq[File] = Seq() var index = 1 for (bam <- bams) { - // first revert the BAM file to the original qualities - val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai") - val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai") - val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam") - val realignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.bam") - val rgRealignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.rg.bam") + val extension = bam.toString.substring(bam.toString.length - 4) + + + + val saiFile1 = swapExt(bam, extension, "." + index + ".1.sai") + val saiFile2 = swapExt(bam, extension, "." + index + ".2.sai") + val realignedSamFile = swapExt(bam, extension, "." + index + ".realigned.sam") + val realignedBamFile = swapExt(bam, extension, "." + index + ".realigned.bam") + val rgRealignedBamFile = swapExt(bam, extension, "." + index + ".realigned.rg.bam") if (useBWAse) { val revertedBAM = revertBAM(bam, true) @@ -444,7 +448,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file") var sai = outSai - def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b " + bam + " > " + sai this.analysisName = queueLogDir + outSai + ".bwa_aln_se" this.jobName = queueLogDir + outSai + ".bwa_aln_se" } @@ -452,7 +456,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file for 1st mating pair") var sai = outSai1 - def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b" + index + " " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b" + index + " " + bam + " > " + sai this.analysisName = queueLogDir + outSai1 + ".bwa_aln_pe1" this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1" } From f1fb51b222d2f81091389367d600c7ea2b4f913d Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 12:02:34 -0400 Subject: [PATCH 49/90] Reverting the DPP to the original version, going to create a new simplified version for CMI in private. --- .../qscripts/DataProcessingPipeline.scala | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index c21db30ce..56f6460fb 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -96,7 +96,6 @@ class DataProcessingPipeline extends QScript { var cleanModelEnum: ConsensusDeterminationModel = ConsensusDeterminationModel.USE_READS - val bwaParameters: String = " -q 5 -l 32 -k 2 -t 4 -o 1 " @@ -166,15 +165,12 @@ class DataProcessingPipeline extends QScript { var realignedBams: Seq[File] = Seq() var index = 1 for (bam <- bams) { - val extension = bam.toString.substring(bam.toString.length - 4) - - - - val saiFile1 = swapExt(bam, extension, "." + index + ".1.sai") - val saiFile2 = swapExt(bam, extension, "." + index + ".2.sai") - val realignedSamFile = swapExt(bam, extension, "." + index + ".realigned.sam") - val realignedBamFile = swapExt(bam, extension, "." + index + ".realigned.bam") - val rgRealignedBamFile = swapExt(bam, extension, "." + index + ".realigned.rg.bam") + // first revert the BAM file to the original qualities + val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai") + val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai") + val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam") + val realignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.bam") + val rgRealignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.rg.bam") if (useBWAse) { val revertedBAM = revertBAM(bam, true) @@ -448,7 +444,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file") var sai = outSai - def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b " + bam + " > " + sai this.analysisName = queueLogDir + outSai + ".bwa_aln_se" this.jobName = queueLogDir + outSai + ".bwa_aln_se" } @@ -456,7 +452,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file for 1st mating pair") var sai = outSai1 - def commandLine = bwaPath + " aln -t " + bwaThreads + bwaParameters + reference + " -b" + index + " " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b" + index + " " + bam + " > " + sai this.analysisName = queueLogDir + outSai1 + ".bwa_aln_pe1" this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1" } From 322ea1262c29d0b125cd69844a5abd15ef88928b Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 25 Sep 2012 17:13:50 -0400 Subject: [PATCH 50/90] First implementation of a generic 'bundled' Data Processing Pipeline for germline and cancer. not ready for prime time yet! --- .../src/org/broadinstitute/sting/queue/util/QScriptUtils.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala index 1529d9951..f684e533f 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala @@ -57,7 +57,8 @@ object QScriptUtils { for (file <- fromFile(in).getLines()) if (!file.startsWith("#") && !file.isEmpty ) list :+= new File(file.trim()) - list.sortWith(_.compareTo(_) < 0) +// list.sortWith(_.compareTo(_) < 0) + list } /** From 658f3551712aa5e2f8cdc0ba78458c685a900a65 Mon Sep 17 00:00:00 2001 From: Kristian Cibulskis Date: Wed, 3 Oct 2012 16:25:34 -0400 Subject: [PATCH 57/90] initial cancer pipeline with mutations and partial indel support --- .../queue/extensions/cancer/MuTect.scala | 378 ++++++++++++++++++ 1 file changed, 378 insertions(+) create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala new file mode 100644 index 000000000..623d397d4 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala @@ -0,0 +1,378 @@ +package org.broadinstitute.sting.queue.extensions.cancer + +import java.io.File +import org.broadinstitute.sting.commandline.Argument +import org.broadinstitute.sting.commandline.Gather +import org.broadinstitute.sting.commandline.Input +import org.broadinstitute.sting.commandline.Output +import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction +import org.broadinstitute.sting.queue.extensions.gatk.{LocusScatterFunction, TaggedFile} + +class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineGATK with ScatterGatherableFunction { + analysisName = "MuTect" + analysis_type = "MuTect" + scatterClass = classOf[LocusScatterFunction] + + /** used for debugging, basically exit as soon as we get the reads */ + @Argument(fullName="noop", shortName="", doc="used for debugging, basically exit as soon as we get the reads", required=false, exclusiveOf="", validation="") + var noop: Boolean = _ + + /** add many additional columns of statistics to the output file */ + @Argument(fullName="enable_extended_output", shortName="", doc="add many additional columns of statistics to the output file", required=false, exclusiveOf="", validation="") + var enable_extended_output: Boolean = _ + + /** used when running the caller on a normal (as if it were a tumor) to detect artifacts */ + @Argument(fullName="artifact_detection_mode", shortName="", doc="used when running the caller on a normal (as if it were a tumor) to detect artifacts", required=false, exclusiveOf="", validation="") + var artifact_detection_mode: Boolean = _ + + /** name to use for tumor in output files */ + @Argument(fullName="tumor_sample_name", shortName="", doc="name to use for tumor in output files", required=false, exclusiveOf="", validation="") + var tumor_sample_name: String = _ + + /** if the tumor bam contains multiple samples, only use read groups with SM equal to this value */ + @Argument(fullName="bam_tumor_sample_name", shortName="", doc="if the tumor bam contains multiple samples, only use read groups with SM equal to this value", required=false, exclusiveOf="", validation="") + var bam_tumor_sample_name: String = _ + + /** name to use for normal in output files */ + @Argument(fullName="normal_sample_name", shortName="", doc="name to use for normal in output files", required=false, exclusiveOf="", validation="") + var normal_sample_name: String = _ + + /** force output for each site */ + @Argument(fullName="force_output", shortName="", doc="force output for each site", required=false, exclusiveOf="", validation="") + var force_output: Boolean = _ + + /** force output for all alleles at each site */ + @Argument(fullName="force_alleles", shortName="", doc="force output for all alleles at each site", required=false, exclusiveOf="", validation="") + var force_alleles: Boolean = _ + + /** Initial LOD threshold for calling tumor variant */ + @Argument(fullName="initial_tumor_lod", shortName="", doc="Initial LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") + var initial_tumor_lod: Option[Float] = None + + /** Format string for initial_tumor_lod */ + @Argument(fullName="initial_tumor_lodFormat", shortName="", doc="Format string for initial_tumor_lod", required=false, exclusiveOf="", validation="") + var initial_tumor_lodFormat: String = "%s" + + /** LOD threshold for calling tumor variant */ + @Argument(fullName="tumor_lod", shortName="", doc="LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") + var tumor_lod: Option[Float] = None + + /** Format string for tumor_lod */ + @Argument(fullName="tumor_lodFormat", shortName="", doc="Format string for tumor_lod", required=false, exclusiveOf="", validation="") + var tumor_lodFormat: String = "%s" + + /** estimate of fraction (0-1) of physical contamination with other unrelated samples */ + @Argument(fullName="fraction_contamination", shortName="", doc="estimate of fraction (0-1) of physical contamination with other unrelated samples", required=false, exclusiveOf="", validation="") + var fraction_contamination: Option[Float] = None + + /** Format string for fraction_contamination */ + @Argument(fullName="fraction_contaminationFormat", shortName="", doc="Format string for fraction_contamination", required=false, exclusiveOf="", validation="") + var fraction_contaminationFormat: String = "%s" + + /** minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination */ + @Argument(fullName="minimum_mutation_cell_fraction", shortName="", doc="minimum fraction of cells which are presumed to have a mutation, used to handle non-clonality and contamination", required=false, exclusiveOf="", validation="") + var minimum_mutation_cell_fraction: Option[Float] = None + + /** Format string for minimum_mutation_cell_fraction */ + @Argument(fullName="minimum_mutation_cell_fractionFormat", shortName="", doc="Format string for minimum_mutation_cell_fraction", required=false, exclusiveOf="", validation="") + var minimum_mutation_cell_fractionFormat: String = "%s" + + /** LOD threshold for calling normal non-germline */ + @Argument(fullName="normal_lod", shortName="", doc="LOD threshold for calling normal non-germline", required=false, exclusiveOf="", validation="") + var normal_lod: Option[Float] = None + + /** Format string for normal_lod */ + @Argument(fullName="normal_lodFormat", shortName="", doc="Format string for normal_lod", required=false, exclusiveOf="", validation="") + var normal_lodFormat: String = "%s" + + /** LOD threshold for calling normal non-variant */ + @Argument(fullName="normal_artifact_lod", shortName="", doc="LOD threshold for calling normal non-variant", required=false, exclusiveOf="", validation="") + var normal_artifact_lod: Option[Float] = None + + /** Format string for normal_artifact_lod */ + @Argument(fullName="normal_artifact_lodFormat", shortName="", doc="Format string for normal_artifact_lod", required=false, exclusiveOf="", validation="") + var normal_artifact_lodFormat: String = "%s" + + /** LOD threshold for calling strand bias */ + @Argument(fullName="strand_artifact_lod", shortName="", doc="LOD threshold for calling strand bias", required=false, exclusiveOf="", validation="") + var strand_artifact_lod: Option[Float] = None + + /** Format string for strand_artifact_lod */ + @Argument(fullName="strand_artifact_lodFormat", shortName="", doc="Format string for strand_artifact_lod", required=false, exclusiveOf="", validation="") + var strand_artifact_lodFormat: String = "%s" + + /** power threshold for calling strand bias */ + @Argument(fullName="strand_artifact_power_threshold", shortName="", doc="power threshold for calling strand bias", required=false, exclusiveOf="", validation="") + var strand_artifact_power_threshold: Option[Float] = None + + /** Format string for strand_artifact_power_threshold */ + @Argument(fullName="strand_artifact_power_thresholdFormat", shortName="", doc="Format string for strand_artifact_power_threshold", required=false, exclusiveOf="", validation="") + var strand_artifact_power_thresholdFormat: String = "%s" + + /** LOD threshold for calling normal non-variant at dbsnp sites */ + @Argument(fullName="dbsnp_normal_lod", shortName="", doc="LOD threshold for calling normal non-variant at dbsnp sites", required=false, exclusiveOf="", validation="") + var dbsnp_normal_lod: Option[Float] = None + + /** Format string for dbsnp_normal_lod */ + @Argument(fullName="dbsnp_normal_lodFormat", shortName="", doc="Format string for dbsnp_normal_lod", required=false, exclusiveOf="", validation="") + var dbsnp_normal_lodFormat: String = "%s" + + /** Power threshold for normal to determine germline vs variant */ + @Argument(fullName="somatic_classification_normal_power_threshold", shortName="", doc="Power threshold for normal to determine germline vs variant", required=false, exclusiveOf="", validation="") + var somatic_classification_normal_power_threshold: Option[Float] = None + + /** Format string for somatic_classification_normal_power_threshold */ + @Argument(fullName="somatic_classification_normal_power_thresholdFormat", shortName="", doc="Format string for somatic_classification_normal_power_threshold", required=false, exclusiveOf="", validation="") + var somatic_classification_normal_power_thresholdFormat: String = "%s" + + /** minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor */ + @Argument(fullName="minimum_normal_allele_fraction", shortName="", doc="minimum allele fraction to be considered in normal, useful for normal sample contaminated with tumor", required=false, exclusiveOf="", validation="") + var minimum_normal_allele_fraction: Option[Float] = None + + /** Format string for minimum_normal_allele_fraction */ + @Argument(fullName="minimum_normal_allele_fractionFormat", shortName="", doc="Format string for minimum_normal_allele_fraction", required=false, exclusiveOf="", validation="") + var minimum_normal_allele_fractionFormat: String = "%s" + + /** for computational efficiency, reject sites with allelic fraction below this threshold */ + @Argument(fullName="tumor_f_pretest", shortName="", doc="for computational efficiency, reject sites with allelic fraction below this threshold", required=false, exclusiveOf="", validation="") + var tumor_f_pretest: Option[Float] = None + + /** Format string for tumor_f_pretest */ + @Argument(fullName="tumor_f_pretestFormat", shortName="", doc="Format string for tumor_f_pretest", required=false, exclusiveOf="", validation="") + var tumor_f_pretestFormat: String = "%s" + + /** threshold for minimum base quality score */ + @Argument(fullName="min_qscore", shortName="", doc="threshold for minimum base quality score", required=false, exclusiveOf="", validation="") + var min_qscore: Option[Int] = None + + /** how many gapped events (ins/del) are allowed in proximity to this candidate */ + @Argument(fullName="gap_events_threshold", shortName="", doc="how many gapped events (ins/del) are allowed in proximity to this candidate", required=false, exclusiveOf="", validation="") + var gap_events_threshold: Option[Int] = None + + /** if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling */ + @Argument(fullName="heavily_clipped_read_fraction", shortName="", doc="if this fraction or more of the bases in a read are soft/hard clipped, do not use this read for mutation calling", required=false, exclusiveOf="", validation="") + var heavily_clipped_read_fraction: Option[Float] = None + + /** Format string for heavily_clipped_read_fraction */ + @Argument(fullName="heavily_clipped_read_fractionFormat", shortName="", doc="Format string for heavily_clipped_read_fraction", required=false, exclusiveOf="", validation="") + var heavily_clipped_read_fractionFormat: String = "%s" + + /** pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads */ + @Argument(fullName="clipping_bias_pvalue_threshold", shortName="", doc="pvalue threshold for fishers exact test of clipping bias in mutant reads vs ref reads", required=false, exclusiveOf="", validation="") + var clipping_bias_pvalue_threshold: Option[Float] = None + + /** Format string for clipping_bias_pvalue_threshold */ + @Argument(fullName="clipping_bias_pvalue_thresholdFormat", shortName="", doc="Format string for clipping_bias_pvalue_threshold", required=false, exclusiveOf="", validation="") + var clipping_bias_pvalue_thresholdFormat: String = "%s" + + /** threshold for determining if there is relatedness between the alt and ref allele read piles */ + @Argument(fullName="fraction_mapq0_threshold", shortName="", doc="threshold for determining if there is relatedness between the alt and ref allele read piles", required=false, exclusiveOf="", validation="") + var fraction_mapq0_threshold: Option[Float] = None + + /** Format string for fraction_mapq0_threshold */ + @Argument(fullName="fraction_mapq0_thresholdFormat", shortName="", doc="Format string for fraction_mapq0_threshold", required=false, exclusiveOf="", validation="") + var fraction_mapq0_thresholdFormat: String = "%s" + + /** threshold for clustered read position artifact median */ + @Argument(fullName="pir_median_threshold", shortName="", doc="threshold for clustered read position artifact median", required=false, exclusiveOf="", validation="") + var pir_median_threshold: Option[Double] = None + + /** Format string for pir_median_threshold */ + @Argument(fullName="pir_median_thresholdFormat", shortName="", doc="Format string for pir_median_threshold", required=false, exclusiveOf="", validation="") + var pir_median_thresholdFormat: String = "%s" + + /** threshold for clustered read position artifact MAD */ + @Argument(fullName="pir_mad_threshold", shortName="", doc="threshold for clustered read position artifact MAD", required=false, exclusiveOf="", validation="") + var pir_mad_threshold: Option[Double] = None + + /** Format string for pir_mad_threshold */ + @Argument(fullName="pir_mad_thresholdFormat", shortName="", doc="Format string for pir_mad_threshold", required=false, exclusiveOf="", validation="") + var pir_mad_thresholdFormat: String = "%s" + + /** required minimum value for tumor alt allele maximum mapping quality score */ + @Argument(fullName="required_maximum_alt_allele_mapping_quality_score", shortName="", doc="required minimum value for tumor alt allele maximum mapping quality score", required=false, exclusiveOf="", validation="") + var required_maximum_alt_allele_mapping_quality_score: Option[Int] = None + + /** threshold for maximum alternate allele counts in normal */ + @Argument(fullName="max_alt_alleles_in_normal_count", shortName="", doc="threshold for maximum alternate allele counts in normal", required=false, exclusiveOf="", validation="") + var max_alt_alleles_in_normal_count: Option[Int] = None + + /** threshold for maximum alternate allele quality score sum in normal */ + @Argument(fullName="max_alt_alleles_in_normal_qscore_sum", shortName="", doc="threshold for maximum alternate allele quality score sum in normal", required=false, exclusiveOf="", validation="") + var max_alt_alleles_in_normal_qscore_sum: Option[Int] = None + + /** threshold for maximum alternate allele fraction in normal */ + @Argument(fullName="max_alt_allele_in_normal_fraction", shortName="", doc="threshold for maximum alternate allele fraction in normal", required=false, exclusiveOf="", validation="") + var max_alt_allele_in_normal_fraction: Option[Double] = None + + /** Format string for max_alt_allele_in_normal_fraction */ + @Argument(fullName="max_alt_allele_in_normal_fractionFormat", shortName="", doc="Format string for max_alt_allele_in_normal_fraction", required=false, exclusiveOf="", validation="") + var max_alt_allele_in_normal_fractionFormat: String = "%s" + + /** Phred scale quality score constant to use in power calculations */ + @Argument(fullName="power_constant_qscore", shortName="", doc="Phred scale quality score constant to use in power calculations", required=false, exclusiveOf="", validation="") + var power_constant_qscore: Option[Int] = None + + /** Absolute Copy Number Data, as defined by Absolute, to use in power calculations */ + @Argument(fullName="absolute_copy_number_data", shortName="", doc="Absolute Copy Number Data, as defined by Absolute, to use in power calculations", required=false, exclusiveOf="", validation="") + var absolute_copy_number_data: File = _ + + /** Allelic fraction constant to use in power calculations */ + @Argument(fullName="power_constant_af", shortName="", doc="Allelic fraction constant to use in power calculations", required=false, exclusiveOf="", validation="") + var power_constant_af: Option[Double] = None + + /** Format string for power_constant_af */ + @Argument(fullName="power_constant_afFormat", shortName="", doc="Format string for power_constant_af", required=false, exclusiveOf="", validation="") + var power_constant_afFormat: String = "%s" + + /** Call-stats output */ + @Output(fullName="out", shortName="o", doc="Call-stats output", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var out: File = _ + + /** + * Short name of out + * @return Short name of out + */ + def o = this.out + + /** + * Short name of out + * @param value Short name of out + */ + def o_=(value: File) { this.out = value } + + /** VCF file of DBSNP information */ + @Input(fullName="dbsnp", shortName="dbsnp", doc="VCF file of DBSNP information", required=false, exclusiveOf="", validation="") + var dbsnp: Seq[File] = Nil + + /** Dependencies on any indexes of dbsnp */ + @Input(fullName="dbsnpIndexes", shortName="", doc="Dependencies on any indexes of dbsnp", required=false, exclusiveOf="", validation="") + private var dbsnpIndexes: Seq[File] = Nil + + /** VCF file of COSMIC sites */ + @Input(fullName="cosmic", shortName="cosmic", doc="VCF file of COSMIC sites", required=false, exclusiveOf="", validation="") + var cosmic: Seq[File] = Nil + + /** Dependencies on any indexes of cosmic */ + @Input(fullName="cosmicIndexes", shortName="", doc="Dependencies on any indexes of cosmic", required=false, exclusiveOf="", validation="") + private var cosmicIndexes: Seq[File] = Nil + + /** VCF file of sites observed in normal */ + @Input(fullName="normal_panel", shortName="normal_panel", doc="VCF file of sites observed in normal", required=false, exclusiveOf="", validation="") + var normal_panel: Seq[File] = Nil + + /** Dependencies on any indexes of normal_panel */ + @Input(fullName="normal_panelIndexes", shortName="", doc="Dependencies on any indexes of normal_panel", required=false, exclusiveOf="", validation="") + private var normal_panelIndexes: Seq[File] = Nil + + /** write out coverage in WIGGLE format to this file */ + @Output(fullName="coverage_file", shortName="cov", doc="write out coverage in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var coverage_file: File = _ + + /** + * Short name of coverage_file + * @return Short name of coverage_file + */ + def cov = this.coverage_file + + /** + * Short name of coverage_file + * @param value Short name of coverage_file + */ + def cov_=(value: File) { this.coverage_file = value } + + /** write out 20x of Q20 coverage in WIGGLE format to this file */ + @Output(fullName="coverage_20_q20_file", shortName="cov_q20", doc="write out 20x of Q20 coverage in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var coverage_20_q20_file: File = _ + + /** + * Short name of coverage_20_q20_file + * @return Short name of coverage_20_q20_file + */ + def cov_q20 = this.coverage_20_q20_file + + /** + * Short name of coverage_20_q20_file + * @param value Short name of coverage_20_q20_file + */ + def cov_q20_=(value: File) { this.coverage_20_q20_file = value } + + /** write out power in WIGGLE format to this file */ + @Output(fullName="power_file", shortName="pow", doc="write out power in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var power_file: File = _ + + /** + * Short name of power_file + * @return Short name of power_file + */ + def pow = this.power_file + + /** + * Short name of power_file + * @param value Short name of power_file + */ + def pow_=(value: File) { this.power_file = value } + + /** write out tumor read depth in WIGGLE format to this file */ + @Output(fullName="tumor_depth_file", shortName="tdf", doc="write out tumor read depth in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var tumor_depth_file: File = _ + + /** + * Short name of tumor_depth_file + * @return Short name of tumor_depth_file + */ + def tdf = this.tumor_depth_file + + /** + * Short name of tumor_depth_file + * @param value Short name of tumor_depth_file + */ + def tdf_=(value: File) { this.tumor_depth_file = value } + + /** write out normal read depth in WIGGLE format to this file */ + @Output(fullName="normal_depth_file", shortName="ndf", doc="write out normal read depth in WIGGLE format to this file", required=false, exclusiveOf="", validation="") + @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction]) + var normal_depth_file: File = _ + + /** + * Short name of normal_depth_file + * @return Short name of normal_depth_file + */ + def ndf = this.normal_depth_file + + /** + * Short name of normal_depth_file + * @param value Short name of normal_depth_file + */ + def ndf_=(value: File) { this.normal_depth_file = value } + + /** if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up. */ + @Argument(fullName="filter_mismatching_base_and_quals", shortName="filterMBQ", doc="if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up.", required=false, exclusiveOf="", validation="") + var filter_mismatching_base_and_quals: Boolean = _ + + /** + * Short name of filter_mismatching_base_and_quals + * @return Short name of filter_mismatching_base_and_quals + */ + def filterMBQ = this.filter_mismatching_base_and_quals + + /** + * Short name of filter_mismatching_base_and_quals + * @param value Short name of filter_mismatching_base_and_quals + */ + def filterMBQ_=(value: Boolean) { this.filter_mismatching_base_and_quals = value } + + override def freezeFieldValues() { + super.freezeFieldValues() + dbsnpIndexes ++= dbsnp.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + cosmicIndexes ++= cosmic.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + normal_panelIndexes ++= normal_panel.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) + } + + override def commandLine = super.commandLine + conditional(noop, "--noop", escape=true, format="%s") + conditional(enable_extended_output, "--enable_extended_output", escape=true, format="%s") + conditional(artifact_detection_mode, "--artifact_detection_mode", escape=true, format="%s") + optional("--tumor_sample_name", tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--bam_tumor_sample_name", bam_tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--normal_sample_name", normal_sample_name, spaceSeparated=true, escape=true, format="%s") + conditional(force_output, "--force_output", escape=true, format="%s") + conditional(force_alleles, "--force_alleles", escape=true, format="%s") + optional("--initial_tumor_lod", initial_tumor_lod, spaceSeparated=true, escape=true, format=initial_tumor_lodFormat) + optional("--tumor_lod", tumor_lod, spaceSeparated=true, escape=true, format=tumor_lodFormat) + optional("--fraction_contamination", fraction_contamination, spaceSeparated=true, escape=true, format=fraction_contaminationFormat) + optional("--minimum_mutation_cell_fraction", minimum_mutation_cell_fraction, spaceSeparated=true, escape=true, format=minimum_mutation_cell_fractionFormat) + optional("--normal_lod", normal_lod, spaceSeparated=true, escape=true, format=normal_lodFormat) + optional("--normal_artifact_lod", normal_artifact_lod, spaceSeparated=true, escape=true, format=normal_artifact_lodFormat) + optional("--strand_artifact_lod", strand_artifact_lod, spaceSeparated=true, escape=true, format=strand_artifact_lodFormat) + optional("--strand_artifact_power_threshold", strand_artifact_power_threshold, spaceSeparated=true, escape=true, format=strand_artifact_power_thresholdFormat) + optional("--dbsnp_normal_lod", dbsnp_normal_lod, spaceSeparated=true, escape=true, format=dbsnp_normal_lodFormat) + optional("--somatic_classification_normal_power_threshold", somatic_classification_normal_power_threshold, spaceSeparated=true, escape=true, format=somatic_classification_normal_power_thresholdFormat) + optional("--minimum_normal_allele_fraction", minimum_normal_allele_fraction, spaceSeparated=true, escape=true, format=minimum_normal_allele_fractionFormat) + optional("--tumor_f_pretest", tumor_f_pretest, spaceSeparated=true, escape=true, format=tumor_f_pretestFormat) + optional("--min_qscore", min_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--gap_events_threshold", gap_events_threshold, spaceSeparated=true, escape=true, format="%s") + optional("--heavily_clipped_read_fraction", heavily_clipped_read_fraction, spaceSeparated=true, escape=true, format=heavily_clipped_read_fractionFormat) + optional("--clipping_bias_pvalue_threshold", clipping_bias_pvalue_threshold, spaceSeparated=true, escape=true, format=clipping_bias_pvalue_thresholdFormat) + optional("--fraction_mapq0_threshold", fraction_mapq0_threshold, spaceSeparated=true, escape=true, format=fraction_mapq0_thresholdFormat) + optional("--pir_median_threshold", pir_median_threshold, spaceSeparated=true, escape=true, format=pir_median_thresholdFormat) + optional("--pir_mad_threshold", pir_mad_threshold, spaceSeparated=true, escape=true, format=pir_mad_thresholdFormat) + optional("--required_maximum_alt_allele_mapping_quality_score", required_maximum_alt_allele_mapping_quality_score, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_count", max_alt_alleles_in_normal_count, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_qscore_sum", max_alt_alleles_in_normal_qscore_sum, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_allele_in_normal_fraction", max_alt_allele_in_normal_fraction, spaceSeparated=true, escape=true, format=max_alt_allele_in_normal_fractionFormat) + optional("--power_constant_qscore", power_constant_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--absolute_copy_number_data", absolute_copy_number_data, spaceSeparated=true, escape=true, format="%s") + optional("--power_constant_af", power_constant_af, spaceSeparated=true, escape=true, format=power_constant_afFormat) + optional("-o", out, spaceSeparated=true, escape=true, format="%s") + repeat("-dbsnp", dbsnp, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-cosmic", cosmic, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-normal_panel", normal_panel, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + optional("-cov", coverage_file, spaceSeparated=true, escape=true, format="%s") + optional("-cov_q20", coverage_20_q20_file, spaceSeparated=true, escape=true, format="%s") + optional("-pow", power_file, spaceSeparated=true, escape=true, format="%s") + optional("-tdf", tumor_depth_file, spaceSeparated=true, escape=true, format="%s") + optional("-ndf", normal_depth_file, spaceSeparated=true, escape=true, format="%s") + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape=true, format="%s") +} From 22b79fb4dda0c75d9c2a868bc2c0f0b8daa7f504 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Thu, 11 Oct 2012 11:09:49 -0400 Subject: [PATCH 59/90] Resolve [DEV-7]: add single-sample VCF calling at end of FASTQ-BAM pipeline. Initial steps of [DEV-4]: queue extensions for Picard QC metrics --- .../picard/CalculateHsMetrics.scala | 60 +++++++++++++++++++ .../picard/CollectGcBiasMetrics.scala | 32 ++++++++++ .../picard/CollectMultipleMetrics.scala | 36 +++++++++++ 3 files changed, 128 insertions(+) create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CalculateHsMetrics.scala create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CalculateHsMetrics.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CalculateHsMetrics.scala new file mode 100644 index 000000000..75e9300dc --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CalculateHsMetrics.scala @@ -0,0 +1,60 @@ +package org.broadinstitute.sting.queue.extensions.picard + +import org.broadinstitute.sting.commandline.{Argument, Output, Input} +import java.io.File + +/** + * Created with IntelliJ IDEA. + * User: delangel + * Date: 10/9/12 + * Time: 5:59 PM + * To change this template use File | Settings | File Templates. + */ +class CalculateHsMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction { + analysisName = "CalculateHsMetrics" + javaMainClass = "net.sf.picard.sam.CalculateHsMetrics" + + @Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true) + var input: Seq[File] = Nil + + @Output(doc="The output file to write statistics to", shortName = "output", fullName = "output_file", required = true) + var output: File = _ + + @Argument(doc="Interval list with targets", shortName = "targets", fullName = "target_list", required = true) + var targets: File = _ + + @Argument(doc="Interval list with baits", shortName = "baits", fullName = "bait_list", required = true) + var baits: File = _ + + @Argument(doc="Reference file", shortName = "reference", fullName = "reference", required = true) + var reference: File = _ + /* + @Argument(doc = "Maximum number of file handles to keep open when spilling read ends to disk. Set this number a little lower than the per-process maximum number of file that may be open. This number can be found by executing the 'ulimit -n' command on a Unix system.", shortName = "max_file_handles", fullName ="max_file_handles_for_read_ends_maps", required=false) + var MAX_FILE_HANDLES_FOR_READ_ENDS_MAP: Int = -1; + + @Argument(doc = "This number, plus the maximum RAM available to the JVM, determine the memory footprint used by some of the sorting collections. If you are running out of memory, try reducing this number.", shortName = "sorting_ratio", fullName = "sorting_collection_size_ratio", required = false) + var SORTING_COLLECTION_SIZE_RATIO: Double = -1 + */ + override def freezeFieldValues() { + super.freezeFieldValues() +// if (outputIndex == null && output != null) + // outputIndex = new File(output.getName.stripSuffix(".bam") + ".bai") + } + + val level = "SAMPLE" + + override def inputBams = input + override def outputBam = output + //this.sortOrder = null + //this.createIndex = Some(true) + override def commandLine = super.commandLine + + required("BAIT_INTERVALS=" + baits) + + required("TARGET_INTERVALS=" + targets) + + required("REFERENCE_SEQUENCE=" + reference) + + optional("METRIC_ACCUMULATION_LEVEL="+level)/*+ + conditional(REMOVE_DUPLICATES, "REMOVE_DUPLICATES=true") + + conditional(MAX_FILE_HANDLES_FOR_READ_ENDS_MAP > 0, "MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=" + MAX_FILE_HANDLES_FOR_READ_ENDS_MAP.toString) + + conditional(SORTING_COLLECTION_SIZE_RATIO > 0, "SORTING_COLLECTION_SIZE_RATIO=" + SORTING_COLLECTION_SIZE_RATIO.toString) */ + + +} diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala new file mode 100644 index 000000000..de2b0af9e --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala @@ -0,0 +1,32 @@ +package org.broadinstitute.sting.queue.extensions.picard + +import org.broadinstitute.sting.commandline.{Argument, Output, Input} +import java.io.File + +/** + * Created with IntelliJ IDEA. + * User: delangel + * Date: 10/10/12 + * Time: 10:37 AM + * To change this template use File | Settings | File Templates. + */ +class CollectGcBiasMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction { + analysisName = "CalculateGcMetrics" + javaMainClass = "net.sf.picard.sam.CalculateGcMetrics" + + @Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true) + var input: Seq[File] = Nil + + @Output(doc="The output file to write statistics to", shortName = "output", fullName = "output_file", required = true) + var output: File = _ + + @Argument(doc="Reference file", shortName = "reference", fullName = "reference", required = true) + var reference: File = _ + + override def inputBams = input + override def outputBam = output + override def commandLine = super.commandLine + + required("CHART_OUTPUT=" + output+".pdf") + + required("REFERENCE_SEQUENCE=" + reference) + + required("ASSUME_SORTED=true") +} diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala new file mode 100644 index 000000000..a9af4e858 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala @@ -0,0 +1,36 @@ +package org.broadinstitute.sting.queue.extensions.picard + +import org.broadinstitute.sting.commandline.{Argument, Output, Input} +import java.io.File + +/** + * Created with IntelliJ IDEA. + * User: delangel + * Date: 10/10/12 + * Time: 10:37 AM + * To change this template use File | Settings | File Templates. + */ +class CollectMultipleMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction{ + analysisName = "CalculateMultipleMetrics" + javaMainClass = "net.sf.picard.sam.CalculateMultipleMetrics" + + @Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true) + var input: Seq[File] = Nil + + @Output(doc="The output file to write statistics to", shortName = "output", fullName = "output_file", required = true) + var output: File = _ + + @Argument(doc="Reference file", shortName = "reference", fullName = "reference", required = true) + var reference: File = _ + + override def inputBams = input + override def outputBam = output + override def commandLine = super.commandLine + + required("REFERENCE_SEQUENCE=" + reference) + + required("ASSUME_SORTED=true") + + required("PROGRAM=QualityScoreDistribution") + + required("PROGRAM=MeanQualityByCycle") + + required("PROGRAM=CollectAlignmentSummaryMetrics" ) + + +} From dad7ca281eaae6ba1318e295d1ad9cac8ef732ae Mon Sep 17 00:00:00 2001 From: Kristian Cibulskis Date: Fri, 12 Oct 2012 14:18:12 -0400 Subject: [PATCH 61/90] upgraded mutation caller with VCF output raw indel calls (non filtered,non vcf) --- .../queue/extensions/cancer/MuTect.scala | 33 +++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala index 623d397d4..1193e7dec 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala @@ -6,7 +6,7 @@ import org.broadinstitute.sting.commandline.Gather import org.broadinstitute.sting.commandline.Input import org.broadinstitute.sting.commandline.Output import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction -import org.broadinstitute.sting.queue.extensions.gatk.{LocusScatterFunction, TaggedFile} +import org.broadinstitute.sting.queue.extensions.gatk.{TaggedFile, VcfGatherFunction, LocusScatterFunction} class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineGATK with ScatterGatherableFunction { analysisName = "MuTect" @@ -45,6 +45,10 @@ class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineG @Argument(fullName="force_alleles", shortName="", doc="force output for all alleles at each site", required=false, exclusiveOf="", validation="") var force_alleles: Boolean = _ + /** only emit passing calls */ + @Argument(fullName="only_passing_calls", shortName="", doc="only emit passing calls", required=false, exclusiveOf="", validation="") + var only_passing_calls: Boolean = _ + /** Initial LOD threshold for calling tumor variant */ @Argument(fullName="initial_tumor_lod", shortName="", doc="Initial LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") var initial_tumor_lod: Option[Float] = None @@ -242,6 +246,28 @@ class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineG */ def o_=(value: File) { this.out = value } + /** VCF output of mutation candidates */ + @Output(fullName="vcf", shortName="vcf", doc="VCF output of mutation candidates", required=false, exclusiveOf="", validation="") + @Gather(classOf[VcfGatherFunction]) + var vcf: File = _ + + /** Automatically generated index for vcf */ + @Output(fullName="vcfIndex", shortName="", doc="Automatically generated index for vcf", required=false, exclusiveOf="", validation="") + @Gather(enabled=false) + private var vcfIndex: File = _ + + /** Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests. */ + @Argument(fullName="no_cmdline_in_header", shortName="no_cmdline_in_header", doc="Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.", required=false, exclusiveOf="", validation="") + var no_cmdline_in_header: Boolean = _ + + /** Just output sites without genotypes (i.e. only the first 8 columns of the VCF) */ + @Argument(fullName="sites_only", shortName="sites_only", doc="Just output sites without genotypes (i.e. only the first 8 columns of the VCF)", required=false, exclusiveOf="", validation="") + var sites_only: Boolean = _ + + /** force BCF output, regardless of the file's extension */ + @Argument(fullName="bcf", shortName="bcf", doc="force BCF output, regardless of the file's extension", required=false, exclusiveOf="", validation="") + var bcf: Boolean = _ + /** VCF file of DBSNP information */ @Input(fullName="dbsnp", shortName="dbsnp", doc="VCF file of DBSNP information", required=false, exclusiveOf="", validation="") var dbsnp: Seq[File] = Nil @@ -369,10 +395,13 @@ class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineG override def freezeFieldValues() { super.freezeFieldValues() + if (vcf != null && !org.broadinstitute.sting.utils.io.IOUtils.isSpecialFile(vcf)) + if (!org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor.isCompressed(vcf.getPath)) + vcfIndex = new File(vcf.getPath + ".idx") dbsnpIndexes ++= dbsnp.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) cosmicIndexes ++= cosmic.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) normal_panelIndexes ++= normal_panel.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) } - override def commandLine = super.commandLine + conditional(noop, "--noop", escape=true, format="%s") + conditional(enable_extended_output, "--enable_extended_output", escape=true, format="%s") + conditional(artifact_detection_mode, "--artifact_detection_mode", escape=true, format="%s") + optional("--tumor_sample_name", tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--bam_tumor_sample_name", bam_tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--normal_sample_name", normal_sample_name, spaceSeparated=true, escape=true, format="%s") + conditional(force_output, "--force_output", escape=true, format="%s") + conditional(force_alleles, "--force_alleles", escape=true, format="%s") + optional("--initial_tumor_lod", initial_tumor_lod, spaceSeparated=true, escape=true, format=initial_tumor_lodFormat) + optional("--tumor_lod", tumor_lod, spaceSeparated=true, escape=true, format=tumor_lodFormat) + optional("--fraction_contamination", fraction_contamination, spaceSeparated=true, escape=true, format=fraction_contaminationFormat) + optional("--minimum_mutation_cell_fraction", minimum_mutation_cell_fraction, spaceSeparated=true, escape=true, format=minimum_mutation_cell_fractionFormat) + optional("--normal_lod", normal_lod, spaceSeparated=true, escape=true, format=normal_lodFormat) + optional("--normal_artifact_lod", normal_artifact_lod, spaceSeparated=true, escape=true, format=normal_artifact_lodFormat) + optional("--strand_artifact_lod", strand_artifact_lod, spaceSeparated=true, escape=true, format=strand_artifact_lodFormat) + optional("--strand_artifact_power_threshold", strand_artifact_power_threshold, spaceSeparated=true, escape=true, format=strand_artifact_power_thresholdFormat) + optional("--dbsnp_normal_lod", dbsnp_normal_lod, spaceSeparated=true, escape=true, format=dbsnp_normal_lodFormat) + optional("--somatic_classification_normal_power_threshold", somatic_classification_normal_power_threshold, spaceSeparated=true, escape=true, format=somatic_classification_normal_power_thresholdFormat) + optional("--minimum_normal_allele_fraction", minimum_normal_allele_fraction, spaceSeparated=true, escape=true, format=minimum_normal_allele_fractionFormat) + optional("--tumor_f_pretest", tumor_f_pretest, spaceSeparated=true, escape=true, format=tumor_f_pretestFormat) + optional("--min_qscore", min_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--gap_events_threshold", gap_events_threshold, spaceSeparated=true, escape=true, format="%s") + optional("--heavily_clipped_read_fraction", heavily_clipped_read_fraction, spaceSeparated=true, escape=true, format=heavily_clipped_read_fractionFormat) + optional("--clipping_bias_pvalue_threshold", clipping_bias_pvalue_threshold, spaceSeparated=true, escape=true, format=clipping_bias_pvalue_thresholdFormat) + optional("--fraction_mapq0_threshold", fraction_mapq0_threshold, spaceSeparated=true, escape=true, format=fraction_mapq0_thresholdFormat) + optional("--pir_median_threshold", pir_median_threshold, spaceSeparated=true, escape=true, format=pir_median_thresholdFormat) + optional("--pir_mad_threshold", pir_mad_threshold, spaceSeparated=true, escape=true, format=pir_mad_thresholdFormat) + optional("--required_maximum_alt_allele_mapping_quality_score", required_maximum_alt_allele_mapping_quality_score, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_count", max_alt_alleles_in_normal_count, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_qscore_sum", max_alt_alleles_in_normal_qscore_sum, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_allele_in_normal_fraction", max_alt_allele_in_normal_fraction, spaceSeparated=true, escape=true, format=max_alt_allele_in_normal_fractionFormat) + optional("--power_constant_qscore", power_constant_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--absolute_copy_number_data", absolute_copy_number_data, spaceSeparated=true, escape=true, format="%s") + optional("--power_constant_af", power_constant_af, spaceSeparated=true, escape=true, format=power_constant_afFormat) + optional("-o", out, spaceSeparated=true, escape=true, format="%s") + repeat("-dbsnp", dbsnp, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-cosmic", cosmic, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-normal_panel", normal_panel, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + optional("-cov", coverage_file, spaceSeparated=true, escape=true, format="%s") + optional("-cov_q20", coverage_20_q20_file, spaceSeparated=true, escape=true, format="%s") + optional("-pow", power_file, spaceSeparated=true, escape=true, format="%s") + optional("-tdf", tumor_depth_file, spaceSeparated=true, escape=true, format="%s") + optional("-ndf", normal_depth_file, spaceSeparated=true, escape=true, format="%s") + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape=true, format="%s") + override def commandLine = super.commandLine + conditional(noop, "--noop", escape=true, format="%s") + conditional(enable_extended_output, "--enable_extended_output", escape=true, format="%s") + conditional(artifact_detection_mode, "--artifact_detection_mode", escape=true, format="%s") + optional("--tumor_sample_name", tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--bam_tumor_sample_name", bam_tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--normal_sample_name", normal_sample_name, spaceSeparated=true, escape=true, format="%s") + conditional(force_output, "--force_output", escape=true, format="%s") + conditional(force_alleles, "--force_alleles", escape=true, format="%s") + conditional(only_passing_calls, "--only_passing_calls", escape=true, format="%s") + optional("--initial_tumor_lod", initial_tumor_lod, spaceSeparated=true, escape=true, format=initial_tumor_lodFormat) + optional("--tumor_lod", tumor_lod, spaceSeparated=true, escape=true, format=tumor_lodFormat) + optional("--fraction_contamination", fraction_contamination, spaceSeparated=true, escape=true, format=fraction_contaminationFormat) + optional("--minimum_mutation_cell_fraction", minimum_mutation_cell_fraction, spaceSeparated=true, escape=true, format=minimum_mutation_cell_fractionFormat) + optional("--normal_lod", normal_lod, spaceSeparated=true, escape=true, format=normal_lodFormat) + optional("--normal_artifact_lod", normal_artifact_lod, spaceSeparated=true, escape=true, format=normal_artifact_lodFormat) + optional("--strand_artifact_lod", strand_artifact_lod, spaceSeparated=true, escape=true, format=strand_artifact_lodFormat) + optional("--strand_artifact_power_threshold", strand_artifact_power_threshold, spaceSeparated=true, escape=true, format=strand_artifact_power_thresholdFormat) + optional("--dbsnp_normal_lod", dbsnp_normal_lod, spaceSeparated=true, escape=true, format=dbsnp_normal_lodFormat) + optional("--somatic_classification_normal_power_threshold", somatic_classification_normal_power_threshold, spaceSeparated=true, escape=true, format=somatic_classification_normal_power_thresholdFormat) + optional("--minimum_normal_allele_fraction", minimum_normal_allele_fraction, spaceSeparated=true, escape=true, format=minimum_normal_allele_fractionFormat) + optional("--tumor_f_pretest", tumor_f_pretest, spaceSeparated=true, escape=true, format=tumor_f_pretestFormat) + optional("--min_qscore", min_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--gap_events_threshold", gap_events_threshold, spaceSeparated=true, escape=true, format="%s") + optional("--heavily_clipped_read_fraction", heavily_clipped_read_fraction, spaceSeparated=true, escape=true, format=heavily_clipped_read_fractionFormat) + optional("--clipping_bias_pvalue_threshold", clipping_bias_pvalue_threshold, spaceSeparated=true, escape=true, format=clipping_bias_pvalue_thresholdFormat) + optional("--fraction_mapq0_threshold", fraction_mapq0_threshold, spaceSeparated=true, escape=true, format=fraction_mapq0_thresholdFormat) + optional("--pir_median_threshold", pir_median_threshold, spaceSeparated=true, escape=true, format=pir_median_thresholdFormat) + optional("--pir_mad_threshold", pir_mad_threshold, spaceSeparated=true, escape=true, format=pir_mad_thresholdFormat) + optional("--required_maximum_alt_allele_mapping_quality_score", required_maximum_alt_allele_mapping_quality_score, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_count", max_alt_alleles_in_normal_count, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_qscore_sum", max_alt_alleles_in_normal_qscore_sum, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_allele_in_normal_fraction", max_alt_allele_in_normal_fraction, spaceSeparated=true, escape=true, format=max_alt_allele_in_normal_fractionFormat) + optional("--power_constant_qscore", power_constant_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--absolute_copy_number_data", absolute_copy_number_data, spaceSeparated=true, escape=true, format="%s") + optional("--power_constant_af", power_constant_af, spaceSeparated=true, escape=true, format=power_constant_afFormat) + optional("-o", out, spaceSeparated=true, escape=true, format="%s") + optional("-vcf", vcf, spaceSeparated=true, escape=true, format="%s") + conditional(no_cmdline_in_header, "-no_cmdline_in_header", escape=true, format="%s") + conditional(sites_only, "-sites_only", escape=true, format="%s") + conditional(bcf, "-bcf", escape=true, format="%s") + repeat("-dbsnp", dbsnp, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-cosmic", cosmic, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-normal_panel", normal_panel, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + optional("-cov", coverage_file, spaceSeparated=true, escape=true, format="%s") + optional("-cov_q20", coverage_20_q20_file, spaceSeparated=true, escape=true, format="%s") + optional("-pow", power_file, spaceSeparated=true, escape=true, format="%s") + optional("-tdf", tumor_depth_file, spaceSeparated=true, escape=true, format="%s") + optional("-ndf", normal_depth_file, spaceSeparated=true, escape=true, format="%s") + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape=true, format="%s") } From a234bacb02ee401efb493403e8afcd6b789fec4a Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 10 Oct 2012 15:00:17 -0400 Subject: [PATCH 63/90] Making nContigs parameter hidden in ReduceReads For now, the het reduction should only be performed for diploids (n=2). We haven't really tested it for other ploidy so it should remain hidden until someone braves it out. --- .../compression/reducereads/ReduceReads.java | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java index 1b3e68647..5810bc94f 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java @@ -181,15 +181,6 @@ public class ReduceReads extends ReadWalker, ReduceRea @Argument(fullName = "minimum_del_proportion_to_trigger_variant", shortName = "mindel", doc = "", required = false) private double minIndelProportionToTriggerVariant = 0.05; - /** - * Minimum proportion of indels in a site to trigger a variant region. Anything below this will be - * considered consensus. - */ - @Argument(fullName = "contigs", shortName = "ctg", doc = "", required = false) - private int nContigs = 2; - - - /** * Downsamples the coverage of a variable region approximately (guarantees the minimum to be equal to this). * A value of 0 turns downsampling off. @@ -197,6 +188,14 @@ public class ReduceReads extends ReadWalker, ReduceRea @Argument(fullName = "downsample_coverage", shortName = "ds", doc = "", required = false) private int downsampleCoverage = 250; + /** + * Number of chromossomes in the sample (this is used for the polyploid consensus compression). Only + * tested for humans (or organisms with n=2). Use at your own risk! + */ + @Hidden + @Argument(fullName = "contigs", shortName = "ctg", doc = "", required = false) + private int nContigs = 2; + @Hidden @Argument(fullName = "", shortName = "dl", doc = "", required = false) private int debugLevel = 0; From 80d92e0c636a58b9cfc948ab42f650a54955ba6d Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 12 Oct 2012 13:50:10 -0400 Subject: [PATCH 64/90] Allowing the GATK to have non-required outputs Modified the SAMFileWriterArgumentTypeDescriptor to accept output bam files that are null if they're not required (in the @Output annotation). This change enables the nWayOut parameter for the IndeRealigner and ReduceReads to operate optionally while maintaining the original single way out. [#DEV-10 transition:31 resolution:1] --- .../SAMFileWriterArgumentTypeDescriptor.java | 36 +++++++++---------- .../gatk/walkers/indels/IndelRealigner.java | 4 +-- .../indels/IndelRealignerIntegrationTest.java | 10 ++++++ 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java index 8566f6c63..dcf2704f5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java @@ -124,32 +124,28 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor // This parser has been passed a null filename and the GATK is not responsible for creating a type default for the object; // therefore, the user must have failed to specify a type default - if(writerFileName == null) { - if(!source.isRequired()) - throw new MissingArgumentValueException(bamArgumentDefinition); - if(generateMD5) + if(writerFileName == null && generateMD5) throw new ArgumentException("MD5 generation specified, but no output file specified. If md5 generation is desired, please specify a BAM output file and an md5 file will be written alongside."); - } // Create the stub and set parameters. - SAMFileWriterStub stub; - if ( writerFileName != null ) + SAMFileWriterStub stub = null; // stub = new SAMFileWriterStub(engine, defaultOutputStream); + + if ( writerFileName != null ) { stub = new SAMFileWriterStub(engine, new File(writerFileName)); - else - stub = new SAMFileWriterStub(engine, defaultOutputStream); - if ( compressionLevel != null ) - stub.setCompressionLevel(compressionLevel); - if ( indexOnTheFly ) - stub.setIndexOnTheFly(indexOnTheFly); - if ( generateMD5 ) - stub.setGenerateMD5(generateMD5); - if ( simplifyBAM ) - stub.setSimplifyBAM(simplifyBAM); + if ( compressionLevel != null ) + stub.setCompressionLevel(compressionLevel); + if ( indexOnTheFly ) + stub.setIndexOnTheFly(indexOnTheFly); + if ( generateMD5 ) + stub.setGenerateMD5(generateMD5); + if ( simplifyBAM ) + stub.setSimplifyBAM(simplifyBAM); - // WARNING: Side effects required by engine! - parsingEngine.addTags(stub,getArgumentTags(matches)); - engine.addOutput(stub); + // WARNING: Side effects required by engine! + parsingEngine.addTags(stub,getArgumentTags(matches)); + engine.addOutput(stub); + } return stub; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 76d8d85c2..998894fbf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -370,8 +370,6 @@ public class IndelRealigner extends ReadWalker { currentInterval = intervals.hasNext() ? intervals.next() : null; - writerToUse = writer; - if ( N_WAY_OUT != null ) { boolean createIndex = true; @@ -383,9 +381,9 @@ public class IndelRealigner extends ReadWalker { createIndex, generateMD5s,createProgramRecord(),KEEP_ALL_PG_RECORDS); } } else { - // set up the output writer setupWriter(getToolkit().getSAMFileHeader()); + writerToUse = writer; } manager = new ConstrainedMateFixingManager(writerToUse, getToolkit().getGenomeLocParser(), MAX_ISIZE_FOR_MOVEMENT, MAX_POS_MOVE_ALLOWED, MAX_RECORDS_IN_MEMORY); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java index 040845828..9b464cfec 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java @@ -113,4 +113,14 @@ public class IndelRealignerIntegrationTest extends WalkerTest { executeTest(String.format("realigner [%s]", entry.getKey()), spec); } } + + @Test + public void testNWayOut() { + WalkerTestSpec spec1 = new WalkerTestSpec( + baseCommandPrefix + " -nWayOut .clean.bam ", + 1, + Arrays.asList("d41d8cd98f00b204e9800998ecf8427e")); + executeTest("test realigner nWayOut", spec1); + } + } From 69194e50322ac5f889ab6621aa29f7f24f42f24f Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 15 Oct 2012 13:24:58 -0400 Subject: [PATCH 69/90] Adding intellij example files to the repo --- intellij_example.tar.bz2 | Bin 0 -> 7520 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 intellij_example.tar.bz2 diff --git a/intellij_example.tar.bz2 b/intellij_example.tar.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..bce16045cd1cc476305c5e59d07ff9b94b8e5d73 GIT binary patch literal 7520 zcmV-m9iQStT4*^jL0KkKS)7!cJOD4F|M~yi5CDHu|NsC0|M36+|L{No06+`?06+jh z1{h!`o8{-7h4qB5wR>^y>D#MWyS_S4UPrYW)`~XXKBY)=w?n(T?w=>u4%}F@T^prq zM&}*98+P}1CC>JGY3}jFJo}ytO>BKWz24nKq=%-|-tIdTv1Z%Ou|py0B#;mw z0$?VFLqIe|HdE0LQ`8zCr9V{lAE^M-CYlc@B!L1ZL{xsM>S#SrDYXw!Hq`?_27mwn z0006)At_HHdU~eyLFyU;NYu?W zO%G7?YBUW!LqkS@9-*KBcwhGq&L2>c7{2Bv%KPh~XqovPD`P$wqr0wA{OX>F7ZKmy zDsL?$v;dI_RDWe46~zQU)lvw>NPvKQ{r?Z1(}ww2_)M*fQwCl2HJP1e>Ri?dCZpMN zP=M?SA|wk1NPtvh=J3} zDw+~D0aFk#a8yBk1VvNe{^Q2@ATLqre5hlqO+ zJH6U_g@n*il^HZ-l$Xlw^mBWD_ji=uWfv%71y1==(-ozyjYp|LKHs?f9BLgry=7~; zQzc6vctrO5Ohh{&v$XFjif1`0L{s*Olu^?q8x}#rqzyaoVB#B4g(81Jx};qlYMjj? z2dYy`yjW&t6?wYx0N9Uh!RRNLa}rLdMi`DPNP-P&$TYbGk_IqjZvxK0qQ|cbe@$>KA+XQo15<95gH&cGF>(8JUi?q#;jjQhX7>?$D_)jV*fSGa99hISqAgCirHOBRlz z5-9I|sR$Ai`Yfa3X$-2y?KsEHOGX z@HXt%cKK^%iU{@ItVyi`Mvy~M#VuYFejK)2%tD1XPotWR1C6g7^>_3=M{GBTR=x6} zL$ie9VdKIYD#p_7X2Ij5c!ACsYkPJWj&|WWp}5G;)cO~hDhd@kB;Qp{=JQuL+IA<+ z9G6K-0B_4%_wIgYeR?@~V~!zKJsytphBh&$h7$B()y02uDWc?iHKJ3Oou2*+MfMx^=gIf{X3|lWppAz z-I7p3R)QX!Idb;|10w?J`_#t4+9oPM5Ep6hKa7!-%b*Z%Ne=e{WH2)WCKzJBSF^U& zK`^`eM3i5TFVExdQsTLk;G@qq9{>}@N65UeiIk4 z@~k{rAI=Uv%!sgaYY$=o+l0!JKSDr9R$?^eCXzi64~|&0Ce%+awpg_DqW1T;JnT{d zru{VON@j0;c#A;BkSsr#?|iam4$XGUx$RTb)H_pMY<00{Yt{q!{n-}(G#x&{RZq!9 z?#T+_N>%`kP|_a@fmo2-OJrXX!d!y~G&Dn3dYM^vWSZJ!#jw;(nuR5VhC?^k`y8?h4xtX)+j?PU}@iN>ljD+(Qmb z8dniBZ(_g=D1mLVH)}(M5H0vcNC?Cv1(;-)C)j(xXY2nS+gLe%zZ*W^p1!|_zv=O} z_Q!+DzkQ#@*8b@JY?61J{_8LQ7Wa7yo_=DdhrF->(?ZCJE#uL2ybpcSMKqaVBwSv) zNU6k^_VtahV8JC~!U6{d!bq&!4p|_$D-~)<6%s3$x=9Tg-hN~ZTTL>hPndt*gp7cg z+wR+roDg$1Hu?=(-&o%#Ayf15RN!Z%05j!NTZ&r35#?&$A)o_Kq_+aG2Tmv-^Ti}kzPUMz}gW$fs9 zJC4FgjTw|ACp)ypVq7XolvK9GOTU706q(I(#Eu(UtE5jUBYT&==_1{Y?P%VlGrzMb zBE|_o9do|DW~w@PU%$xNzAis=G|wxF)AvA^EC+4@aDj~7z-#+Yu_mAdbYOG_n;S_f z2MF%vrdGcJHml~P2xvLQOIQJlnTe)5Z8JtNk{R2)nVMB@y3m5+0?^4LZPW(|mtd*? zDv;N1W_Rb+ea4+?lrAx`155T=*|7;B>>wiy;Ml)7#VrgvNDTg;;hzJ_HNI$RTTqf4 zfyMeU1R+7uY;dsXgq*(jmUH&D>*C?4NtOzLb-!$s%bXb_Sue0mN5$wkpEM3kv8IvGA=_?Zzv1%_C1tsyl^1@RKH{a zciZw1@6q0f3Zxbb;ED{RY}JFbgv`jo;vnjTP++40u_0!9X$&Jll(K*-u?XPE#-34@ z(2*iq168t)2LIl%)odnH8EI_Yl*{iaXWjeIC?DX z@F63H#+Z$15ur99k-Q~?3g|8Zc=x8AR4D_lS&{5(Wb(9OWn~6P1uWA@lOmW&8qFIU z7SBN;v4-l*AW>A(2|!UvMJG5MwJuL6M+i`eC_Z*#ky3?khBR#txTdfRi~`O|X#<>q zay;fL4M@CAa10!#6DOq2oZBZ*`QRkS_ry;K+IvB5vHgUgDqb| z3@cbx6TJE0H(;83Aleb%_>debXaSWl^D1it&Z*F@y=N{NvpYj~%SKvwWbjz}(2QH5 zzP2fmjwex!Q0DDKn@@wk;d+8XInO!CB-!he7tkDYlzvB5io3y3~eaI5w=k}~_n&-pi{RD^=|p|INIK%M7^Ju>U^q=8=L%d;bA7r& zav%4cJ$b-?7%3m*P6<7LL;|e%;!{W=n0z(DI%^o;Bei5F?M_70?60j?x99A# z3qM)=cXkVG2kmpPJ8oP#mdXvhiT3f-MRyJ&15Gf(d7`7!iiTN|-|lq+clXLmg&CS~L$9jF&hP0Bm8xWJJK=$OMFB>@>4P6z#Xn3dcffp+LAC z!`V2YWzCFixsCZ_7L;F_QL9T?&p+$c)*hPKS8gSO~VTFj|ro3G{Iuof@swuwn#16JUnNC5#N!PbpVLE zvff%F#RxrAU($v^EeV!2DVBm~xc6xi-$OudTP+k6K2v02fE5E45$L!=gi;#?e7UiT zMhuXoWDk1Yu>gn#4)=DU3)lONdeFJ7$d8PZ2g9kss?ltvQp=qrImnG5r+V3zF4fd( z=B;z~@$<2{hrSQ|A=lv6Q712!p!qTbzdD`k(bs>{2Pp3?yqCWFJqT zzsdD8a3Kh21F_vQhkQ6nV4J6$yI@VnB-^q@1Poy1V4pzHT>!|B!-$_&1xU~WX+}BS| zI+H9JIpulj0|c`K4h+Jv+i8{#sDpzsY3kK&nf6^uxxRKVpRY}nq>h`5p#2eSW1JCB zoQDl9g0bddzNVPmI`lIIC+n3Z-%)6 z)m1D@3`N7B@Sgj)AR36~a4}*AFBJoK>y5WL$p%MGX_9PAb!p#rxvRNmYjZFf_p^MM zS*F2;S$4gmw~WV5MV$7|80zk2IN^|p!(1CUj&QiGuRFLn?P!*xF?*d9(+V$}IXT1} zTha_h(muFR1txBW5H`^S1RVSVru%s#2xv)Akeo8AEDyF+y-8{s&ujwd5cgA z{vZA1IN}ss`1mWy37bexU#GAM?R5hGixGW*YN1F6 zNe^y$@Ot{b0s=j%3iJAec(;rNi2(`#p%tYeftrrHgl%C`y`dwy83`F7-YoY_ox@e$ zL&B&_2$DC2^*`wAL)aSaJF>S~WkZ7Y-Du;z{ zz=U=xW(E3CCn_nOid4kc4!;tEos&Z9t35>kz>s9yLG>gOM({gKT^#n%!cAcT@WPry3G z2V$fF(Bh#$fe>JI^|(v(OqCu~Gq|s3b?7=j5C#Z=5wJo%cpU|9AqFF9LZ;v@MFUXd zs2N){bdrWWIl=TxEeH;2AWyYeEYN~qcFD}6*o@`j5Unt1JJgj_YN^CRhB!bw5Oy*P z1DcHm8GutysIu}&!*1M~s!zPA&Oi}v=#?bEF!B<|)OQu(7P)!*MrromGcLP%0kRC2-f z-Ma&_=fdV8BrOEsXnl*Tw^mqWLHgJM!Kwz2*{PSy3 zu1y0}WCf==J8l%kErz3IvwaFAt0TVM@-$^1MCp&5TKidOUd1p zYf^#|AzIhOeK`tn48Ym)u!~-}_Q+l&S8a}MP zqt<;`1h6{%dl7fl4{{^4=5Z9~GklcT&KW~EA3Cu3&;?mZn=%p-SGX&lf>n(5j&iOY zYQ5cz$?+OcwiV^|gbZ zDl{t&8X}C%;tMPtc~k`|q$FrGfm%YMX(X@VI=uFI0&+ldLE%k6`SmVPkC_HWF>Jjk zUDTu0qvTmpsIKA3Q+lPhpdN}HV1^2u*J2-*FWMiW;5$l`j~ao*gwCP$RM5i#_C_Ex zH)zO<;CO6Q=Wwgery}Pl$N)(r*fN-s03O~z4|h@!W6)yYC(`*Uf$0mj(1PL>cZ3#Z z4TbDihB0%-2YpbP;Si{_nnei&VL3_(rX}Qwue;<9@ZzbMbgFgWaWzMX7*HysutU{s z6$yK$fP%s@0>K!oAjF959~anz@iKxw9tVi2%a|O!4?k>B;`D(%h5u(vcUQi+mcKd#qyCaVfaiWCOtfxqGP5gZ9C@~hnd`7h<>b=e_+%r-y-BvC8wl_K{AeW^0CX@V5&D!>>QsSU^i-+=i>^k-=!j-OGaIbfVpkOo zov0DlGYvzkWdki>W|%{31zZ$_CjdJbXwoesiiTpR&LP}<$G(Ags#wwbgBY)fP=_5T zQ4Xh%0B4=sfb<{^vtZvf!~pRs?Q2_tCyTUQVcvR>^r#ARkO%Guv=4>qj_B@o4jqbM zt7aXvQ8X%2q25s!VhE5U6mV4RggF8x`vC+b>2V@PWF#OH>&BrAM934Jfz;}KW^E!s zf_sHg^L}8s>!NOo>yjx%-@oa32x!;2(Ek5!w#C`ezx9s#*X(t`x1;-X6+ z7Tr_H3_VZlT{4sbK3ug=pGr!XjFT71U-2y+_{;q+T4bJi(b= zap}Jvh}lP|*qy&ou8QcP>pMaQngHZp=&Vk$3eem_f*S=$R8;1V4~ZCnf}#?r8iXVQ z0j{X_oxDt(h>0TcAk-a!;E`5M2WU_pz^PFx0iuHO=~{qY8>t!!C^m$obS{f0N*n5P z90a#3kBG3kQg+Y~XP`YbFs-J>`*$M_gdyA-C_|>(hkfw|+l<+w zf}KDpwJ=nwn0Cr`@CL~cY#kjD!*FzDfO#Xz58x^D4{2FCkP+ji120 z`AK?g`I(q&&XIvsdP8c|-6RwyD9PaUmZHX{h)-f%GL}>`AdodPFC>U>q}W4M$7Yrv zXn_GT_PIBf+CkyE8OEjAE9XU38OMGBuvkEl2#MCGQP6b&cqsd};rC<;+96%VZpibB za5i6A1WKt|DOYLN0p46>D$r5;!~>XE;UL^8L$+U03KV+NtWE_Trs&B5hj{M;05y_z( zIB@{aYt}fs#2_Ui8xH+<1|!JJl0XMS2m}_23jO|IA2?LhL%8il_4O)!cmxPzpqfGY zc__a@$5HV6afV(GYNQppH5ZI_4QL$Cz*M(x41|T~>WW%5dc!&O=BaR(D8S87Uc(Wb z4GAcqP$MD;MhMW2k||&T_-I1Sc7quRcUP6)AC5{qas)meG;bs!BEU|AAWLmX=fy+6 z$p=H}qdcCy^{S?BA;kyA^aH=II~+xSl}Oz`ntcF$U;6-Xk#{x0N-aG@bNDK>cMb(tan literal 0 HcmV?d00001 From 213cc00abe2207398d9b5cca168b78bf0edf6434 Mon Sep 17 00:00:00 2001 From: kshakir Date: Mon, 15 Oct 2012 15:03:33 -0400 Subject: [PATCH 70/90] Refactored argument matching to support other plugins in addition to file lists. Added plugin support for sending Queue status messages. Argument parsing can store subclasses of java.io.File, for example RemoteFile. --- ivy.xml | 4 +- .../sting/commandline/ArgumentMatch.java | 28 ++--- .../commandline/ArgumentMatchFileValue.java | 27 +++++ .../commandline/ArgumentMatchSource.java | 42 ++++---- .../commandline/ArgumentMatchSourceType.java | 4 +- .../commandline/ArgumentMatchStringValue.java | 24 +++++ .../sting/commandline/ArgumentMatchValue.java | 18 ++++ .../commandline/ArgumentTypeDescriptor.java | 34 +++--- .../sting/commandline/CommandLineProgram.java | 2 +- .../sting/commandline/ParsedArgs.java | 13 +++ .../sting/commandline/ParsedListArgs.java | 30 ++++++ .../sting/commandline/ParsingEngine.java | 102 +++++++++++------- .../ParsingEngineArgumentFiles.java | 30 ++++++ .../ParsingEngineArgumentProvider.java | 12 +++ .../OutputStreamArgumentTypeDescriptor.java | 2 +- .../SAMFileReaderArgumentTypeDescriptor.java | 12 +-- .../SAMFileWriterArgumentTypeDescriptor.java | 13 ++- .../VCFWriterArgumentTypeDescriptor.java | 6 +- .../sting/utils/help/HelpFormatter.java | 18 ++-- .../ArgumentMatchSiteUnitTest.java | 2 +- .../ArgumentMatchSourceUnitTest.java | 16 +-- .../sting/queue/QCommandLine.scala | 64 ++++++++--- .../broadinstitute/sting/queue/QScript.scala | 31 +++++- .../sting/queue/engine/QStatusMessenger.scala | 10 ++ .../queue/extensions/gatk/GATKIntervals.scala | 2 +- .../sting/queue/util/RemoteFile.scala | 13 +++ 26 files changed, 409 insertions(+), 150 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchFileValue.java create mode 100644 public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchStringValue.java create mode 100644 public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchValue.java create mode 100644 public/java/src/org/broadinstitute/sting/commandline/ParsedArgs.java create mode 100644 public/java/src/org/broadinstitute/sting/commandline/ParsedListArgs.java create mode 100644 public/java/src/org/broadinstitute/sting/commandline/ParsingEngineArgumentFiles.java create mode 100644 public/java/src/org/broadinstitute/sting/commandline/ParsingEngineArgumentProvider.java create mode 100644 public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala create mode 100644 public/scala/src/org/broadinstitute/sting/queue/util/RemoteFile.scala diff --git a/ivy.xml b/ivy.xml index 0761cb411..1e3346ff5 100644 --- a/ivy.xml +++ b/ivy.xml @@ -78,8 +78,8 @@ - - + + diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java index c0823e5c5..6c8fb1f4d 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java @@ -46,7 +46,7 @@ public class ArgumentMatch implements Iterable { /** * Maps indices of command line arguments to values paired with that argument. */ - public final SortedMap> sites = new TreeMap>(); + public final SortedMap> sites = new TreeMap>(); /** * An ordered, freeform collection of tags. @@ -90,11 +90,11 @@ public class ArgumentMatch implements Iterable { * @param value Value for the argument at this position. * @param tags ordered freeform text tags associated with this argument. */ - private ArgumentMatch(final String label, final ArgumentDefinition definition, final ArgumentMatchSite site, final String value, final Tags tags) { + private ArgumentMatch(final String label, final ArgumentDefinition definition, final ArgumentMatchSite site, final ArgumentMatchValue value, final Tags tags) { this.label = label; this.definition = definition; - ArrayList values = new ArrayList(); + ArrayList values = new ArrayList(); if( value != null ) values.add(value); sites.put(site,values ); @@ -131,11 +131,11 @@ public class ArgumentMatch implements Iterable { */ @SuppressWarnings("unchecked") ArgumentMatch transform(Multiplexer multiplexer, Object key) { - SortedMap> newIndices = new TreeMap>(); - for(Map.Entry> site: sites.entrySet()) { - List newEntries = new ArrayList(); - for(String entry: site.getValue()) - newEntries.add(multiplexer.transformArgument(key,entry)); + SortedMap> newIndices = new TreeMap>(); + for(Map.Entry> site: sites.entrySet()) { + List newEntries = new ArrayList(); + for(ArgumentMatchValue entry: site.getValue()) + newEntries.add(new ArgumentMatchStringValue(multiplexer.transformArgument(key,entry.asString()))); newIndices.put(site.getKey(),newEntries); } ArgumentMatch newArgumentMatch = new ArgumentMatch(label,definition); @@ -165,7 +165,7 @@ public class ArgumentMatch implements Iterable { /** * Iterate over each available token. */ - private Iterator tokenIterator = null; + private Iterator tokenIterator = null; /** * The next site to return. Null if none remain. @@ -175,7 +175,7 @@ public class ArgumentMatch implements Iterable { /** * The next token to return. Null if none remain. */ - String nextToken = null; + ArgumentMatchValue nextToken = null; { siteIterator = sites.keySet().iterator(); @@ -254,9 +254,9 @@ public class ArgumentMatch implements Iterable { * @param site site of the command-line argument to which this value is mated. * @param value Text representation of value to add. */ - public void addValue( ArgumentMatchSite site, String value ) { + public void addValue( ArgumentMatchSite site, ArgumentMatchValue value ) { if( !sites.containsKey(site) || sites.get(site) == null ) - sites.put(site, new ArrayList() ); + sites.put(site, new ArrayList() ); sites.get(site).add(value); } @@ -275,8 +275,8 @@ public class ArgumentMatch implements Iterable { * Return the values associated with this argument match. * @return A collection of the string representation of these value. */ - public List values() { - List values = new ArrayList(); + public List values() { + List values = new ArrayList(); for( ArgumentMatchSite site: sites.keySet() ) { if( sites.get(site) != null ) values.addAll(sites.get(site)); diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchFileValue.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchFileValue.java new file mode 100644 index 000000000..344b6829a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchFileValue.java @@ -0,0 +1,27 @@ +package org.broadinstitute.sting.commandline; + +import java.io.File; + +/** + * Holds a reference to a file as an argument match value. + * + * This is useful when the type of the stored file may be a subclass of java.io.File, + * for example a Queue RemoteFile. + */ +public class ArgumentMatchFileValue extends ArgumentMatchValue { + private final File file; + + public ArgumentMatchFileValue(File file) { + this.file = file; + } + + @Override + public String asString() { + return file == null ? null : file.getAbsolutePath(); + } + + @Override + public File asFile() { + return file; + } +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSource.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSource.java index ed2700006..9dfb3afbe 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSource.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSource.java @@ -24,38 +24,36 @@ package org.broadinstitute.sting.commandline; -import java.io.File; - /** - * Where an argument match originated, via the commandline or a file. + * Where an argument match originated, via the commandline or a custom provider. */ public class ArgumentMatchSource implements Comparable { public static final ArgumentMatchSource COMMAND_LINE = new ArgumentMatchSource(ArgumentMatchSourceType.CommandLine, null); private final ArgumentMatchSourceType type; - private final File file; + private final String description; /** * Creates an argument match source from the specified file. - * @param file File specifying the arguments. Must not be null. + * @param description Where the arguments originated. */ - public ArgumentMatchSource(File file) { - this(ArgumentMatchSourceType.File, file); + public ArgumentMatchSource(String description) { + this(ArgumentMatchSourceType.Provider, description); } - private ArgumentMatchSource(ArgumentMatchSourceType type, File file) { - if (type == ArgumentMatchSourceType.File && file == null) - throw new IllegalArgumentException("An argument match source of type File cannot have a null file."); + private ArgumentMatchSource(ArgumentMatchSourceType type, String description) { + if (type == ArgumentMatchSourceType.Provider && description == null) + throw new IllegalArgumentException("An argument match source provider cannot have a null description."); this.type = type; - this.file = file; + this.description = description; } public ArgumentMatchSourceType getType() { return type; } - public File getFile() { - return file; + public String getDescription() { + return description; } @Override @@ -65,13 +63,13 @@ public class ArgumentMatchSource implements Comparable { ArgumentMatchSource that = (ArgumentMatchSource) o; - return (type == that.type) && (file == null ? that.file == null : file.equals(that.file)); + return (type == that.type) && (description == null ? that.description == null : description.equals(that.description)); } @Override public int hashCode() { int result = type != null ? type.hashCode() : 0; - result = 31 * result + (file != null ? file.hashCode() : 0); + result = 31 * result + (description != null ? description.hashCode() : 0); return result; } @@ -84,15 +82,15 @@ public class ArgumentMatchSource implements Comparable { if (comp != 0) return comp; - File f1 = this.file; - File f2 = that.file; + String d1 = this.description; + String d2 = that.description; - if ((f1 == null) ^ (f2 == null)) { - // If one of the files is null and the other is not - // put the null file first - return f1 == null ? -1 : 1; + if ((d1 == null) ^ (d2 == null)) { + // If one of the descriptions is null and the other is not + // put the null description first + return d1 == null ? -1 : 1; } - return f1 == null ? 0 : f1.compareTo(f2); + return d1 == null ? 0 : d1.compareTo(d2); } } diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSourceType.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSourceType.java index 3ff6e21d4..118316473 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSourceType.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSourceType.java @@ -25,8 +25,8 @@ package org.broadinstitute.sting.commandline; /** - * Type of where an argument match originated, via the commandline or a file. + * Type of where an argument match originated, via the commandline or a some other provider. */ public enum ArgumentMatchSourceType { - CommandLine, File + CommandLine, Provider } diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchStringValue.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchStringValue.java new file mode 100644 index 000000000..bb2015c3b --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchStringValue.java @@ -0,0 +1,24 @@ +package org.broadinstitute.sting.commandline; + +import java.io.File; + +/** + * Argument values that originated from a string. + */ +public class ArgumentMatchStringValue extends ArgumentMatchValue { + private final String value; + + public ArgumentMatchStringValue(String value) { + this.value = value; + } + + @Override + public String asString() { + return value; + } + + @Override + public File asFile() { + return value == null ? null : new File(value); + } +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchValue.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchValue.java new file mode 100644 index 000000000..bed4edfa6 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchValue.java @@ -0,0 +1,18 @@ +package org.broadinstitute.sting.commandline; + +import java.io.File; + +/** + * Returns argument values as either strings or values. + */ +public abstract class ArgumentMatchValue { + /** + * @return the value of this argument as a String object. + */ + public abstract String asString(); + + /** + * @return the value of this argument as a File object. + */ + public abstract File asFile(); +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index dd4a151bf..4b9774806 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -215,8 +215,8 @@ public abstract class ArgumentTypeDescriptor { * @param matches The matches for the given argument. * @return The value of the argument if available, or null if not present. */ - protected String getArgumentValue( ArgumentDefinition definition, ArgumentMatches matches ) { - Collection argumentValues = getArgumentValues( definition, matches ); + protected ArgumentMatchValue getArgumentValue( ArgumentDefinition definition, ArgumentMatches matches ) { + Collection argumentValues = getArgumentValues( definition, matches ); if( argumentValues.size() > 1 ) throw new UserException.CommandLineException("Multiple values associated with given definition, but this argument expects only one: " + definition.fullName); return argumentValues.size() > 0 ? argumentValues.iterator().next() : null; @@ -244,8 +244,8 @@ public abstract class ArgumentTypeDescriptor { * @param matches The matches for the given argument. * @return The value of the argument if available, or an empty collection if not present. */ - protected Collection getArgumentValues( ArgumentDefinition definition, ArgumentMatches matches ) { - Collection values = new ArrayList(); + protected Collection getArgumentValues( ArgumentDefinition definition, ArgumentMatches matches ) { + Collection values = new ArrayList(); for( ArgumentMatch match: matches ) { if( match.definition.equals(definition) ) values.addAll(match.values()); @@ -310,7 +310,7 @@ public abstract class ArgumentTypeDescriptor { */ protected Object parseBinding(ArgumentSource source, Type type, ArgumentMatches matches, Tags tags) { ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source); - String value = getArgumentValue(defaultDefinition, matches); + ArgumentMatchValue value = getArgumentValue(defaultDefinition, matches); @SuppressWarnings("unchecked") Class parameterType = JVMUtils.getParameterizedTypeClass(type); String name = defaultDefinition.fullName; @@ -328,7 +328,7 @@ public abstract class ArgumentTypeDescriptor { * @param fieldName The name of the field that was parsed. Used for error reporting. * @return The newly created binding object of type bindingClass. */ - public static Object parseBinding(String value, Class parameterType, Type bindingClass, + public static Object parseBinding(ArgumentMatchValue value, Class parameterType, Type bindingClass, String bindingName, Tags tags, String fieldName) { try { String tribbleType = null; @@ -337,7 +337,7 @@ public abstract class ArgumentTypeDescriptor { throw new UserException.CommandLineException( String.format("Unexpected number of positional tags for argument %s : %s. " + "Rod bindings only support -X:type and -X:name,type argument styles", - value, fieldName)); + value.asString(), fieldName)); } else if ( tags.getPositionalTags().size() == 2 ) { // -X:name,type style bindingName = tags.getPositionalTags().get(0); @@ -366,7 +366,7 @@ public abstract class ArgumentTypeDescriptor { if ( tribbleType == null ) { // try to determine the file type dynamically - File file = new File(value); + File file = value.asFile(); if ( file.canRead() && file.isFile() ) { FeatureManager.FeatureDescriptor featureDescriptor = manager.getByFiletype(file); if ( featureDescriptor != null ) { @@ -379,7 +379,7 @@ public abstract class ArgumentTypeDescriptor { // IntervalBinding can be created from a normal String Class rawType = (makeRawTypeIfNecessary(bindingClass)); try { - return rawType.getConstructor(String.class).newInstance(value); + return rawType.getConstructor(String.class).newInstance(value.asString()); } catch (NoSuchMethodException e) { /* ignore */ } @@ -399,14 +399,14 @@ public abstract class ArgumentTypeDescriptor { } Constructor ctor = (makeRawTypeIfNecessary(bindingClass)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class); - return ctor.newInstance(parameterType, bindingName, value, tribbleType, tags); + return ctor.newInstance(parameterType, bindingName, value.asString(), tribbleType, tags); } catch (Exception e) { if ( e instanceof UserException ) throw ((UserException)e); else throw new UserException.CommandLineException( String.format("Failed to parse value %s for argument %s. Message: %s", - value, fieldName, e.getMessage())); + value.asString(), fieldName, e.getMessage())); } } } @@ -517,7 +517,7 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { return true; ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source); - String value = getArgumentValue( defaultDefinition, matches ); + ArgumentMatchValue value = getArgumentValue(defaultDefinition, matches); Object result; Tags tags = getArgumentTags(matches); @@ -527,12 +527,12 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { Method valueOf = primitiveToWrapperMap.get(type).getMethod("valueOf",String.class); if(value == null) throw new MissingArgumentValueException(createDefaultArgumentDefinition(source)); - result = valueOf.invoke(null,value.trim()); + result = valueOf.invoke(null,value.asString().trim()); } else if (type.isEnum()) { Object[] vals = type.getEnumConstants(); Object defaultEnumeration = null; // as we look at options, record the default option if it exists for (Object val : vals) { - if (String.valueOf(val).equalsIgnoreCase(value)) return val; + if (String.valueOf(val).equalsIgnoreCase(value.asString())) return val; try { if (type.getField(val.toString()).isAnnotationPresent(EnumerationArgumentDefault.class)) defaultEnumeration = val; } catch (NoSuchFieldException e) { throw new ReviewedStingException("parsing " + type.toString() + "doesn't contain the field " + val.toString()); } } @@ -544,10 +544,12 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { else if (value == null) throw new MissingArgumentValueException(createDefaultArgumentDefinition(source)); else - throw new UnknownEnumeratedValueException(createDefaultArgumentDefinition(source),value); + throw new UnknownEnumeratedValueException(createDefaultArgumentDefinition(source),value.asString()); + } else if (type.equals(File.class)) { + result = value.asFile(); } else { Constructor ctor = type.getConstructor(String.class); - result = ctor.newInstance(value); + result = ctor.newInstance(value.asString()); } } catch (UserException e) { throw e; diff --git a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java index 15ec9dfe5..d77ae67cf 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java +++ b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java @@ -174,7 +174,7 @@ public abstract class CommandLineProgram { ParsingEngine parser = clp.parser = new ParsingEngine(clp); parser.addArgumentSource(clp.getClass()); - Map> parsedArgs; + Map parsedArgs; // process the args if (clp.canAddArgumentsDynamically()) { diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsedArgs.java b/public/java/src/org/broadinstitute/sting/commandline/ParsedArgs.java new file mode 100644 index 000000000..9ab315175 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsedArgs.java @@ -0,0 +1,13 @@ +package org.broadinstitute.sting.commandline; + +/** + * Represents a collection of parsed arguments for an argument source. + * + * Useful for printing out help documents. + */ +public abstract class ParsedArgs { + /** + * @return A compact description of the arguments from an provider/source. + */ + public abstract String getDescription(); +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsedListArgs.java b/public/java/src/org/broadinstitute/sting/commandline/ParsedListArgs.java new file mode 100644 index 000000000..a77e73bcf --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsedListArgs.java @@ -0,0 +1,30 @@ +package org.broadinstitute.sting.commandline; + +import org.apache.commons.lang.StringUtils; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * A list of string arguments, usually from the command line or an args list file. + */ +public class ParsedListArgs extends ParsedArgs { + private final List args = new ArrayList(); + + public ParsedListArgs() { + } + + public ParsedListArgs(List args) { + this.args.addAll(args); + } + + public void add(String... args) { + this.args.addAll(Arrays.asList(args)); + } + + @Override + public String getDescription() { + return StringUtils.join(this.args, " "); + } +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java index 0fac195e1..a8b729be4 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java @@ -30,6 +30,7 @@ import org.apache.commons.io.FileUtils; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -61,7 +62,7 @@ public class ParsingEngine { * Indicates as best as possible where command-line text remains unmatched * to existing arguments. */ - ArgumentMatches argumentMatches = null; + private ArgumentMatches argumentMatches = null; /** * Techniques for parsing and for argument lookup. @@ -88,7 +89,10 @@ public class ParsingEngine { /** * List of tags associated with the given instantiation of the command-line argument. */ - private final Map tags = new IdentityHashMap(); + private final Map tags = new IdentityHashMap(); + + private PluginManager argumentProviderPluginManager = + new PluginManager(ParsingEngineArgumentProvider.class); /** * our log, which we want to capture anything from org.broadinstitute.sting @@ -105,7 +109,10 @@ public class ParsingEngine { argumentTypeDescriptors.addAll(clp.getArgumentTypeDescriptors()); argumentTypeDescriptors.addAll(STANDARD_ARGUMENT_TYPE_DESCRIPTORS); - addArgumentSource(ParsingEngineArgumentFiles.class); + List> providers = argumentProviderPluginManager.getPlugins(); + for (Class provider: providers) { + addArgumentSource(provider); + } } /** @@ -117,6 +124,10 @@ public class ParsingEngine { addArgumentSource(null, source); } + public ArgumentMatches getArgumentMatches() { + return argumentMatches; + } + /** * Add an argument source. Argument sources are expected to have * any number of fields with an @Argument annotation attached. @@ -156,29 +167,30 @@ public class ParsingEngine { * @param tokens Tokens passed on the command line. * @return The parsed arguments by file. */ - public SortedMap> parse( String[] tokens ) { + public SortedMap parse( String[] tokens ) { argumentMatches = new ArgumentMatches(); - SortedMap> parsedArgs = new TreeMap>(); + SortedMap parsedArgs = new TreeMap(); List cmdLineTokens = Arrays.asList(tokens); parse(ArgumentMatchSource.COMMAND_LINE, cmdLineTokens, argumentMatches, parsedArgs); - ParsingEngineArgumentFiles argumentFiles = new ParsingEngineArgumentFiles(); + List providers = argumentProviderPluginManager.createAllTypes(); - // Load the arguments ONLY into the argument files. - // Validation may optionally run on the rest of the arguments. - loadArgumentsIntoObject(argumentFiles); + for (ParsingEngineArgumentProvider provider: providers) { + // Load the arguments ONLY into the provider. + // Validation may optionally run on the rest of the arguments. + loadArgumentsIntoObject(provider); + } - for (File file: argumentFiles.files) { - List fileTokens = getArguments(file); - parse(new ArgumentMatchSource(file), fileTokens, argumentMatches, parsedArgs); + for (ParsingEngineArgumentProvider provider: providers) { + provider.parse(this, parsedArgs); } return parsedArgs; } - private void parse(ArgumentMatchSource matchSource, List tokens, - ArgumentMatches argumentMatches, SortedMap> parsedArgs) { + public void parse(ArgumentMatchSource matchSource, List tokens, + ArgumentMatches argumentMatches, SortedMap parsedArgs) { ArgumentMatchSite lastArgumentMatchSite = new ArgumentMatchSite(matchSource, -1); int i = 0; @@ -195,19 +207,44 @@ public class ParsingEngine { } else { if( argumentMatches.hasMatch(lastArgumentMatchSite) && - !argumentMatches.getMatch(lastArgumentMatchSite).hasValueAtSite(lastArgumentMatchSite)) - argumentMatches.getMatch(lastArgumentMatchSite).addValue( lastArgumentMatchSite, token ); + !argumentMatches.getMatch(lastArgumentMatchSite).hasValueAtSite(lastArgumentMatchSite)) + argumentMatches.getMatch(lastArgumentMatchSite).addValue( lastArgumentMatchSite, new ArgumentMatchStringValue(token) ); else - argumentMatches.MissingArgument.addValue( site, token ); + argumentMatches.MissingArgument.addValue( site, new ArgumentMatchStringValue(token) ); } i++; } - parsedArgs.put(matchSource, tokens); + parsedArgs.put(matchSource, new ParsedListArgs(tokens)); } - private List getArguments(File file) { + public void parsePairs(ArgumentMatchSource matchSource, List> tokens, + ArgumentMatches argumentMatches, ParsedArgs matchSourceArgs, + SortedMap parsedArgs) { + int i = 0; + for (Pair pair: tokens) { + + ArgumentMatchSite site = new ArgumentMatchSite(matchSource, i); + List matchers = Arrays.asList(ArgumentDefinitions.FullNameDefinitionMatcher, ArgumentDefinitions.ShortNameDefinitionMatcher); + ArgumentDefinition definition = null; + for (DefinitionMatcher matcher: matchers) { + definition = argumentDefinitions.findArgumentDefinition( pair.getFirst(), matcher ); + if (definition != null) + break; + } + if (definition == null) + continue; + ArgumentMatch argumentMatch = new ArgumentMatch(pair.getFirst(), definition, site, new Tags()); + argumentMatches.mergeInto(argumentMatch); + argumentMatch.addValue(site, pair.getSecond()); + i++; + } + + parsedArgs.put(matchSource, matchSourceArgs); + } + + protected List getArguments(File file) { try { if (file.getAbsolutePath().endsWith(".list")) { return getListArguments(file); @@ -283,9 +320,9 @@ public class ParsingEngine { // Ensure that the field contents meet the validation criteria specified by the regular expression. for( ArgumentMatch verifiableMatch: verifiableMatches ) { - for( String value: verifiableMatch.values() ) { - if( verifiableArgument.validation != null && !value.matches(verifiableArgument.validation) ) - invalidValues.add( new Pair(verifiableArgument, value) ); + for( ArgumentMatchValue value: verifiableMatch.values() ) { + if( verifiableArgument.validation != null && !value.asString().matches(verifiableArgument.validation) ) + invalidValues.add( new Pair(verifiableArgument, value.asString()) ); } } } @@ -629,21 +666,21 @@ class UnmatchedArgumentException extends ArgumentException { private static String formatArguments( ArgumentMatch invalidValues ) { StringBuilder sb = new StringBuilder(); for( ArgumentMatchSite site: invalidValues.sites.keySet() ) - for( String value: invalidValues.sites.get(site) ) { + for( ArgumentMatchValue value: invalidValues.sites.get(site) ) { switch (site.getSource().getType()) { case CommandLine: sb.append( String.format("%nInvalid argument value '%s' at position %d.", - value, site.getIndex()) ); + value.asString(), site.getIndex()) ); break; - case File: - sb.append( String.format("%nInvalid argument value '%s' in file %s at position %d.", - value, site.getSource().getFile().getAbsolutePath(), site.getIndex()) ); + case Provider: + sb.append( String.format("%nInvalid argument value '%s' in %s at position %d.", + value.asString(), site.getSource().getDescription(), site.getIndex()) ); break; default: throw new RuntimeException( String.format("Unexpected argument match source type: %s", site.getSource().getType())); } - if(value != null && Utils.dupString(' ',value.length()).equals(value)) + if(value.asString() != null && Utils.dupString(' ',value.asString().length()).equals(value.asString())) sb.append(" Please make sure any line continuation backslashes on your command line are not followed by whitespace."); } return sb.toString(); @@ -696,12 +733,3 @@ class UnknownEnumeratedValueException extends ArgumentException { return String.format("Invalid value %s specified for argument %s; valid options are (%s).", argumentPassed, definition.fullName, Utils.join(",",definition.validOptions)); } } - -/** - * Container class to store the list of argument files. - * The files will be parsed after the command line arguments. - */ -class ParsingEngineArgumentFiles { - @Argument(fullName = "arg_file", shortName = "args", doc = "Reads arguments from the specified file", required = false) - public List files = new ArrayList(); -} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngineArgumentFiles.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngineArgumentFiles.java new file mode 100644 index 000000000..3f3921937 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngineArgumentFiles.java @@ -0,0 +1,30 @@ +package org.broadinstitute.sting.commandline; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; +import java.util.SortedMap; + +/** + * Container class to store the list of argument files. + * The files will be parsed after the command line arguments. + */ +public class ParsingEngineArgumentFiles extends ParsingEngineArgumentProvider { + @Argument(fullName = "arg_file", shortName = "args", doc = "Reads arguments from the specified file", required = false) + public List files = new ArrayList(); + + @Override + public void parse(ParsingEngine parsingEngine, SortedMap parsedArgs) { + ArgumentMatches argumentMatches = parsingEngine.getArgumentMatches(); + for (File file: this.files) { + List fileTokens = parsingEngine.getArguments(file); + parsingEngine.parse(new ArgumentMatchFileSource(file), fileTokens, argumentMatches, parsedArgs); + } + } +} + +class ArgumentMatchFileSource extends ArgumentMatchSource { + ArgumentMatchFileSource(File file) { + super("file " + file.getAbsolutePath()); + } +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngineArgumentProvider.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngineArgumentProvider.java new file mode 100644 index 000000000..a57f8b08a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngineArgumentProvider.java @@ -0,0 +1,12 @@ +package org.broadinstitute.sting.commandline; + +import java.util.List; +import java.util.SortedMap; + +/** + * A class that can parse arguments for the engine + */ +public abstract class ParsingEngineArgumentProvider { + public abstract void parse(ParsingEngine parsingEngine, SortedMap parsedArgs); +} + diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java index da4eb3955..ac01468eb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java @@ -86,7 +86,7 @@ public class OutputStreamArgumentTypeDescriptor extends ArgumentTypeDescriptor { @Override public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) { ArgumentDefinition definition = createDefaultArgumentDefinition(source); - String fileName = getArgumentValue( definition, matches ); + String fileName = getArgumentValue( definition, matches ).asString(); // This parser has been passed a null filename and the GATK is not responsible for creating a type default for the object; // therefore, the user must have failed to specify a type default diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java index 83d1b7eb2..f13cb8fa8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java @@ -25,15 +25,11 @@ package org.broadinstitute.sting.gatk.io.stubs; import net.sf.samtools.SAMFileReader; -import org.broadinstitute.sting.commandline.ArgumentMatches; -import org.broadinstitute.sting.commandline.ArgumentSource; -import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor; -import org.broadinstitute.sting.commandline.ParsingEngine; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder; -import java.io.File; import java.lang.reflect.Type; /** @@ -47,7 +43,7 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor /** * Create a new SAMFileReader argument, notifying the given engine when that argument has been created. - * @param engine + * @param engine engine */ public SAMFileReaderArgumentTypeDescriptor( GenomeAnalysisEngine engine ) { this.engine = engine; @@ -62,12 +58,12 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) { SAMFileReaderBuilder builder = new SAMFileReaderBuilder(); - String readerFileName = getArgumentValue( createDefaultArgumentDefinition(source), matches ); + ArgumentMatchValue readerFileName = getArgumentValue( createDefaultArgumentDefinition(source), matches ); if( readerFileName == null ) throw new UserException.CommandLineException("SAM file compression was supplied, but no associated writer was supplied with it."); - builder.setSAMFile(new File(readerFileName)); + builder.setSAMFile(readerFileName.asFile()); // WARNING: Skipping required side-effect because stub is impossible to generate. engine.addInput(source, builder); diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java index dcf2704f5..2ea4bdfb0 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java @@ -31,7 +31,6 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.io.File; import java.io.OutputStream; import java.lang.annotation.Annotation; import java.lang.reflect.Type; @@ -111,10 +110,10 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) { // Extract all possible parameters that could be passed to a BAM file writer? ArgumentDefinition bamArgumentDefinition = createBAMArgumentDefinition(source); - String writerFileName = getArgumentValue( bamArgumentDefinition, matches ); + ArgumentMatchValue writerFileName = getArgumentValue( bamArgumentDefinition, matches ); - String compressionLevelText = getArgumentValue( createBAMCompressionArgumentDefinition(source), matches ); - Integer compressionLevel = compressionLevelText != null ? Integer.valueOf(compressionLevelText) : null; + ArgumentMatchValue compressionLevelText = getArgumentValue( createBAMCompressionArgumentDefinition(source), matches ); + Integer compressionLevel = compressionLevelText != null ? Integer.valueOf(compressionLevelText.asString()) : null; boolean indexOnTheFly = !argumentIsPresent(disableWriteIndexArgumentDefinition(source),matches); boolean generateMD5 = argumentIsPresent(this.enableMD5GenerationArgumentDefinition(source),matches); @@ -124,14 +123,14 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor // This parser has been passed a null filename and the GATK is not responsible for creating a type default for the object; // therefore, the user must have failed to specify a type default - if(writerFileName == null && generateMD5) + if(writerFileName.asFile() == null && generateMD5) throw new ArgumentException("MD5 generation specified, but no output file specified. If md5 generation is desired, please specify a BAM output file and an md5 file will be written alongside."); // Create the stub and set parameters. SAMFileWriterStub stub = null; // stub = new SAMFileWriterStub(engine, defaultOutputStream); - if ( writerFileName != null ) { - stub = new SAMFileWriterStub(engine, new File(writerFileName)); + if ( writerFileName.asFile() != null ) { + stub = new SAMFileWriterStub(engine, writerFileName.asFile()); if ( compressionLevel != null ) stub.setCompressionLevel(compressionLevel); diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java index 5e1132d45..43350ccc1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java @@ -138,8 +138,8 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) { ArgumentDefinition defaultArgumentDefinition = createDefaultArgumentDefinition(source); // Get the filename for the genotype file, if it exists. If not, we'll need to send output to out. - String writerFileName = getArgumentValue(defaultArgumentDefinition,matches); - File writerFile = writerFileName != null ? new File(writerFileName) : null; + ArgumentMatchValue writerFileName = getArgumentValue(defaultArgumentDefinition,matches); + File writerFile = writerFileName != null ? writerFileName.asFile() : null; // This parser has been passed a null filename and the GATK is not responsible for creating a type default for the object; // therefore, the user must have failed to specify a type default @@ -151,7 +151,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { ? new VariantContextWriterStub(engine, writerFile, argumentSources) : new VariantContextWriterStub(engine, defaultOutputStream, argumentSources); - stub.setCompressed(isCompressed(writerFileName)); + stub.setCompressed(isCompressed(writerFileName.asString())); stub.setDoNotWriteGenotypes(argumentIsPresent(createSitesOnlyArgumentDefinition(),matches)); stub.setSkipWritingCommandLineHeader(argumentIsPresent(createNoCommandLineHeaderArgumentDefinition(),matches)); stub.setForceBCF(argumentIsPresent(createBCFArgumentDefinition(),matches)); diff --git a/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java b/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java index 25ef8ccd2..0f6808718 100755 --- a/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java @@ -26,10 +26,7 @@ package org.broadinstitute.sting.utils.help; import org.apache.log4j.Logger; -import org.broadinstitute.sting.commandline.ArgumentDefinition; -import org.broadinstitute.sting.commandline.ArgumentDefinitionGroup; -import org.broadinstitute.sting.commandline.ArgumentDefinitions; -import org.broadinstitute.sting.commandline.ArgumentMatchSource; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.text.TextFormattingUtils; @@ -273,9 +270,9 @@ public class HelpFormatter { * Generate a standard header for the logger * * @param applicationDetails details of the application to run. - * @param parsedArgs the command line arguments passed in + * @param parsedArgs the arguments passed in */ - public static void generateHeaderInformation(ApplicationDetails applicationDetails, Map> parsedArgs) { + public static void generateHeaderInformation(ApplicationDetails applicationDetails, Map parsedArgs) { DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); java.util.Date date = new java.util.Date(); @@ -286,19 +283,16 @@ public class HelpFormatter { for (String headerLine : applicationDetails.applicationHeader) logger.info(headerLine); logger.debug("Current directory: " + System.getProperty("user.dir")); - for (Map.Entry> entry: parsedArgs.entrySet()) { + for (Map.Entry entry: parsedArgs.entrySet()) { ArgumentMatchSource matchSource = entry.getKey(); final String sourceName; switch (matchSource.getType()) { case CommandLine: sourceName = "Program"; break; - case File: sourceName = matchSource.getFile().getPath(); break; + case Provider: sourceName = matchSource.getDescription(); break; default: throw new RuntimeException("Unexpected argument match source type: " + matchSource.getType()); } - String output = sourceName + " Args:"; - for (String str : entry.getValue()) { - output = output + " " + str; - } + String output = sourceName + " Args: " + entry.getValue().getDescription(); logger.info(output); } logger.info("Date/Time: " + dateFormat.format(date)); diff --git a/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSiteUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSiteUnitTest.java index 99d6b88f3..b1e788dc5 100644 --- a/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSiteUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSiteUnitTest.java @@ -39,7 +39,7 @@ public class ArgumentMatchSiteUnitTest { @Test public void testFile() { - ArgumentMatchSource source = new ArgumentMatchSource(new File("test")); + ArgumentMatchSource source = new ArgumentMatchFileSource(new File("test")); ArgumentMatchSite site = new ArgumentMatchSite(source, 1); Assert.assertEquals(site.getSource(), source); Assert.assertEquals(site.getIndex(), 1); diff --git a/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSourceUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSourceUnitTest.java index 4bc7eb822..a183b2001 100644 --- a/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSourceUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSourceUnitTest.java @@ -35,15 +35,15 @@ public class ArgumentMatchSourceUnitTest extends BaseTest { public void testCommandLine() { ArgumentMatchSource source = ArgumentMatchSource.COMMAND_LINE; Assert.assertEquals(source.getType(), ArgumentMatchSourceType.CommandLine); - Assert.assertNull(source.getFile()); + Assert.assertNull(source.getDescription()); } @Test public void testFile() { File f = new File("test"); - ArgumentMatchSource source = new ArgumentMatchSource(f); - Assert.assertEquals(source.getType(), ArgumentMatchSourceType.File); - Assert.assertEquals(source.getFile(), f); + ArgumentMatchSource source = new ArgumentMatchFileSource(f); + Assert.assertEquals(source.getType(), ArgumentMatchSourceType.Provider); + Assert.assertEquals(source.getDescription(), "file " + f.getAbsolutePath()); } @Test(expectedExceptions = IllegalArgumentException.class) @@ -54,8 +54,8 @@ public class ArgumentMatchSourceUnitTest extends BaseTest { @Test public void testEquals() { ArgumentMatchSource cmdLine = ArgumentMatchSource.COMMAND_LINE; - ArgumentMatchSource fileA = new ArgumentMatchSource(new File("a")); - ArgumentMatchSource fileB = new ArgumentMatchSource(new File("b")); + ArgumentMatchSource fileA = new ArgumentMatchFileSource(new File("a")); + ArgumentMatchSource fileB = new ArgumentMatchFileSource(new File("b")); Assert.assertFalse(cmdLine.equals(null)); @@ -75,8 +75,8 @@ public class ArgumentMatchSourceUnitTest extends BaseTest { @Test public void testCompareTo() { ArgumentMatchSource cmdLine = ArgumentMatchSource.COMMAND_LINE; - ArgumentMatchSource fileA = new ArgumentMatchSource(new File("a")); - ArgumentMatchSource fileB = new ArgumentMatchSource(new File("b")); + ArgumentMatchSource fileA = new ArgumentMatchFileSource(new File("a")); + ArgumentMatchSource fileB = new ArgumentMatchFileSource(new File("b")); Assert.assertTrue(cmdLine.compareTo(cmdLine) == 0); Assert.assertTrue(cmdLine.compareTo(fileA) < 0); diff --git a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala index d0379d022..f4c4b613f 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala @@ -28,7 +28,7 @@ import function.QFunction import java.io.File import org.broadinstitute.sting.commandline._ import org.broadinstitute.sting.queue.util._ -import org.broadinstitute.sting.queue.engine.{QGraphSettings, QGraph} +import org.broadinstitute.sting.queue.engine.{QStatusMessenger, QGraphSettings, QGraph} import collection.JavaConversions._ import org.broadinstitute.sting.utils.classloader.PluginManager import org.broadinstitute.sting.utils.exceptions.UserException @@ -90,12 +90,16 @@ class QCommandLine extends CommandLineProgram with Logging { private var qScriptClasses: File = _ private var shuttingDown = false - private lazy val pluginManager = { + private lazy val qScriptPluginManager = { qScriptClasses = IOUtils.tempDir("Q-Classes-", "", settings.qSettings.tempDirectory) qScriptManager.loadScripts(scripts, qScriptClasses) new PluginManager[QScript](classOf[QScript], Seq(qScriptClasses.toURI.toURL)) } + private lazy val qStatusMessengerPluginManager = { + new PluginManager[QStatusMessenger](classOf[QStatusMessenger]) + } + QFunction.parsingEngine = new ParsingEngine(this) /** @@ -103,15 +107,25 @@ class QCommandLine extends CommandLineProgram with Logging { * functions, and then builds and runs a QGraph based on the dependencies. */ def execute = { + val allStatusMessengers = qStatusMessengerPluginManager.createAllTypes() + if (settings.qSettings.runName == null) settings.qSettings.runName = FilenameUtils.removeExtension(scripts.head.getName) if (IOUtils.isDefaultTempDir(settings.qSettings.tempDirectory)) settings.qSettings.tempDirectory = IOUtils.absolute(settings.qSettings.runDirectory, ".queue/tmp") qGraph.initializeWithSettings(settings) - val allQScripts = pluginManager.createAllTypes() + for (statusMessenger <- allStatusMessengers) { + loadArgumentsIntoObject(statusMessenger) + } + + for (statusMessenger <- allStatusMessengers) { + statusMessenger.started() + } + + val allQScripts = qScriptPluginManager.createAllTypes() for (script <- allQScripts) { - logger.info("Scripting " + pluginManager.getName(script.getClass.asSubclass(classOf[QScript]))) + logger.info("Scripting " + qScriptPluginManager.getName(script.getClass.asSubclass(classOf[QScript]))) loadArgumentsIntoObject(script) script.qSettings = settings.qSettings try { @@ -124,6 +138,10 @@ class QCommandLine extends CommandLineProgram with Logging { logger.info("Added " + script.functions.size + " functions") } + if (settings.run) { + allQScripts.foreach(_.pullInputs()) + } + // Execute the job graph qGraph.run() @@ -142,11 +160,18 @@ class QCommandLine extends CommandLineProgram with Logging { logger.info("Writing final jobs report...") qGraph.writeJobsReport() - if (!qGraph.success) { + if (!success) { logger.info("Done with errors") qGraph.logFailed() + for (statusMessenger <- allStatusMessengers) + statusMessenger.exit("Done with errors") 1 } else { + if (settings.run) { + allQScripts.foreach(_.pushOutputs()) + for (statusMessenger <- allStatusMessengers) + statusMessenger.done() + } 0 } } @@ -158,19 +183,30 @@ class QCommandLine extends CommandLineProgram with Logging { override def canAddArgumentsDynamically = true /** - * Returns the list of QScripts passed in via -S so that their - * arguments can be inspected before QScript.script is called. - * @return Array of QScripts passed in. + * Returns the list of QScripts passed in via -S and other plugins + * so that their arguments can be inspected before QScript.script is called. + * @return Array of dynamic sources */ - override def getArgumentSources = - pluginManager.getPlugins.toIterable.toArray.asInstanceOf[Array[Class[_]]] + override def getArgumentSources = { + var plugins = Seq.empty[Class[_]] + plugins ++= qScriptPluginManager.getPlugins + plugins ++= qStatusMessengerPluginManager.getPlugins + plugins.toArray + } /** - * Returns the name of a QScript - * @return The name of a QScript + * Returns the name of a script/plugin + * @return The name of a script/plugin */ - override def getArgumentSourceName(source: Class[_]) = - pluginManager.getName(source.asSubclass(classOf[QScript])) + override def getArgumentSourceName(source: Class[_]) = { + if (classOf[QScript].isAssignableFrom(source)) + qScriptPluginManager.getName(source.asSubclass(classOf[QScript])) + else if (classOf[QStatusMessenger].isAssignableFrom(source)) + qStatusMessengerPluginManager.getName(source.asSubclass(classOf[QStatusMessenger])) + else + null + + } /** * Returns a ScalaCompoundArgumentTypeDescriptor that can parse argument sources into scala collections. diff --git a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala index 6f887ea00..c59220d4b 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala @@ -27,7 +27,9 @@ package org.broadinstitute.sting.queue import engine.JobRunInfo import org.broadinstitute.sting.queue.function.QFunction import annotation.target.field -import util.{StringFileConversions, PrimitiveOptionConversions, Logging} +import util.{ReflectionUtils, StringFileConversions, PrimitiveOptionConversions, Logging} +import org.broadinstitute.sting.utils.classloader.JVMUtils +import java.lang.reflect.Field /** * Defines a Queue pipeline as a collection of CommandLineFunctions. @@ -106,6 +108,33 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon def addAll(functions: Seq[QFunction]) { functions.foreach( f => add(f) ) } + + def pullInputs() { + val inputs = getInputs + inputs.filter(_.isInstanceOf[RemoteFile]).map(_.asInstanceOf[RemoteFile]).foreach(_.pullToLocal()) + } + + def pushOutputs() { + val outputs = getOutputs + outputs.filter(_.isInstanceOf[RemoteFile]).map(_.asInstanceOf[RemoteFile]).foreach(_.pushToRemote()) + } + + private def getInputs: Seq[File] = { + getFieldValues(classOf[Input]) + } + + private def getOutputs: Seq[File] = { + getFieldValues(classOf[Output]) + } + + private def getFieldValues(annotation: Class[_ <: java.lang.annotation.Annotation]): Seq[File] = { + val filtered: Seq[Field] = fields.filter(field => ReflectionUtils.hasAnnotation(field, annotation)) + val files = filtered.filter(field => classOf[File].isAssignableFrom(field.getType)).map(field => ReflectionUtils.getValue(this, field).asInstanceOf[File]) + val seqFiles = filtered.filter(field => classOf[Seq[File]].isAssignableFrom(field.getType)).map(field => ReflectionUtils.getValue(this, field).asInstanceOf[Seq[File]]) + seqFiles.foldLeft(files)(_ ++ _).filter(_ != null) + } + + private lazy val fields = collection.JavaConversions.asScalaBuffer(JVMUtils.getAllFields(this.getClass)).toSeq } object QScript { diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala new file mode 100644 index 000000000..c61f2ef1f --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala @@ -0,0 +1,10 @@ +package org.broadinstitute.sting.queue.engine + +/** + * Plugin to sends QStatus messages + */ +trait QStatusMessenger { + def started() + def done() + def exit(message: String) +} diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala index e619c0a02..395a34c60 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala @@ -92,6 +92,6 @@ object GATKIntervals { } private def createBinding(interval: String, argumentName: String, tags: Tags): IntervalBinding[Feature] = { - ArgumentTypeDescriptor.parseBinding(interval, classOf[Feature], classOf[IntervalBinding[Feature]], argumentName, tags, argumentName).asInstanceOf[IntervalBinding[Feature]] + ArgumentTypeDescriptor.parseBinding(new ArgumentMatchStringValue(interval), classOf[Feature], classOf[IntervalBinding[Feature]], argumentName, tags, argumentName).asInstanceOf[IntervalBinding[Feature]] } } diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/RemoteFile.scala b/public/scala/src/org/broadinstitute/sting/queue/util/RemoteFile.scala new file mode 100644 index 000000000..cfe848ba8 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/util/RemoteFile.scala @@ -0,0 +1,13 @@ +package org.broadinstitute.sting.queue.util + +import java.io.File +import org.broadinstitute.sting.utils.io.FileExtension + +/** + * An extension of java.io.File that can be pulled from or pushed to a remote location. + */ +trait RemoteFile extends File with FileExtension { + def pullToLocal() + def pushToRemote() + def deleteRemote() +} From c4ee31075c1268eb990c36744e14139a5b1a8d80 Mon Sep 17 00:00:00 2001 From: kshakir Date: Mon, 15 Oct 2012 15:29:40 -0400 Subject: [PATCH 71/90] Fixed package error and a few deprecated scala warnings. --- .../scala/src/org/broadinstitute/sting/queue/QScript.scala | 2 +- .../org/broadinstitute/sting/queue/QScriptManager.scala | 7 ++++--- .../broadinstitute/sting/queue/engine/FunctionEdge.scala | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala index c59220d4b..da24b854e 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala @@ -27,7 +27,7 @@ package org.broadinstitute.sting.queue import engine.JobRunInfo import org.broadinstitute.sting.queue.function.QFunction import annotation.target.field -import util.{ReflectionUtils, StringFileConversions, PrimitiveOptionConversions, Logging} +import util._ import org.broadinstitute.sting.utils.classloader.JVMUtils import java.lang.reflect.Field diff --git a/public/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala b/public/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala index 74487917f..2528c0572 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala @@ -11,6 +11,7 @@ import org.apache.log4j.Level import scala.tools.nsc.util.{FakePos, NoPosition, Position} import org.broadinstitute.sting.queue.util.TextFormatUtils._ import org.broadinstitute.sting.utils.classloader.JVMUtils +import tools.util.StringOps /** * Plugin manager for QScripts which loads QScripts into the current class loader. @@ -63,7 +64,7 @@ object QScriptManager extends Logging { * Heavily based on scala/src/compiler/scala/tools/nsc/reporters/ConsoleReporter.scala */ private class Log4JReporter(val settings: Settings) extends AbstractReporter { - def displayPrompt { throw new UnsupportedOperationException("Unable to prompt the user. Prompting should be off.") } + def displayPrompt() { throw new UnsupportedOperationException("Unable to prompt the user. Prompting should be off.") } /** * Displays the message at position with severity. @@ -98,9 +99,9 @@ object QScriptManager extends Logging { */ def printSummary() { if (WARNING.count > 0) - printMessage(Level.WARN, countElementsAsString(WARNING.count, "warning") + " found") + printMessage(Level.WARN, StringOps.countElementsAsString(WARNING.count, "warning") + " found") if (ERROR.count > 0) - printMessage(Level.ERROR, countElementsAsString(ERROR.count, "error") + " found") + printMessage(Level.ERROR, StringOps.countElementsAsString(ERROR.count, "error") + " found") } /** diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala index 2d4ff60f5..62c016812 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala @@ -185,7 +185,7 @@ class FunctionEdge(val function: QFunction, val inputs: QNode, val outputs: QNod val tailLines = IOUtils.tail(errorFile, maxLines) val nl = "%n".format() val summary = if (tailLines.size > maxLines) "Last %d lines".format(maxLines) else "Contents" - this.function.jobErrorLines = collection.JavaConversions.asScalaIterable(tailLines).toSeq + this.function.jobErrorLines = collection.JavaConversions.collectionAsScalaIterable(tailLines).toSeq logger.error("%s of %s:%n%s".format(summary, errorFile, StringUtils.join(tailLines, nl))) } else { logger.error("Unable to access log file: %s".format(errorFile)) From d27ae67bb65e9fe4edd9cecbe6961405d55bb5d4 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Mon, 15 Oct 2012 22:30:01 -0400 Subject: [PATCH 74/90] Updating the multi-step UG integration test. --- .../walkers/haplotypecaller/LikelihoodCalculationEngine.java | 4 +++- .../walkers/genotyper/UnifiedGenotyperIntegrationTest.java | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index 072f81db9..1eba893fc 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -348,12 +348,14 @@ public class LikelihoodCalculationEngine { } } } + // add all filtered reads to the NO_CALL list because they weren't given any likelihoods for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample.getKey()) ) { // only count the read if it overlaps the event, otherwise it is not added to the output read list at all if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) { - for( final Allele a : call.getFirst().getAlleles() ) + for( final Allele a : call.getFirst().getAlleles() ) { likelihoodMap.add(read, a, 0.0); + } } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index e2ea47d9c..df088a4ad 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -349,7 +349,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("beee9457d7cea42006ac45400db5e873")); + Arrays.asList("f3ff7fe0f15f31eadd726c711d6bf3de")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } From 31be8076649ae0d2682c9f8fb0d899c59ae6bb00 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Mon, 15 Oct 2012 22:31:52 -0400 Subject: [PATCH 75/90] Updating missed integration test. --- .../walkers/haplotypecaller/LikelihoodCalculationEngine.java | 2 +- .../sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index 1eba893fc..14c1cd59d 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -348,7 +348,7 @@ public class LikelihoodCalculationEngine { } } } - + // add all filtered reads to the NO_CALL list because they weren't given any likelihoods for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample.getKey()) ) { // only count the read if it overlaps the event, otherwise it is not added to the output read list at all diff --git a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java index 24ffde9c3..93099f82a 100755 --- a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java @@ -21,7 +21,7 @@ public class NanoSchedulerIntegrationTest extends WalkerTest { for ( final int nct : Arrays.asList(1, 2) ) { // tests.add(new Object[]{ "SNP", "a1c7546f32a8919a3f3a70a04b2e8322", nt, nct }); //// tests.add(new Object[]{ "INDEL", "0a6d2be79f4f8a4b0eb788cc4751b31b", nt, nct }); - tests.add(new Object[]{ "BOTH", "78ce72d8f9d029313f5f2ceb02bb9822", nt, nct }); + tests.add(new Object[]{ "BOTH", "f8184e336dec7632408aa9afa98e6914", nt, nct }); } return tests.toArray(new Object[][]{}); From 9fcf71c031d382b9eaa4bb26e59bfa588abd3909 Mon Sep 17 00:00:00 2001 From: kshakir Date: Tue, 16 Oct 2012 02:21:38 -0400 Subject: [PATCH 76/90] Updated google reflections due to stale slf4j version conflicting with other projects also trying to use Queue as a component. Added targets to build.xml to effectively 'mvn install' packaged GATK/Queue from ant. TODO: Versions during 'mvn install' are hardcoded at 0.0.1 until a better versioning scheme that works with maven dependencies has been identified. --- build.xml | 42 +++++++++++++++++-- ivy.xml | 3 +- .../sting/utils/classloader/JVMUtils.java | 7 ++-- .../utils/classloader/PluginManager.java | 14 +++---- settings/ivysettings.xml | 1 - 5 files changed, 49 insertions(+), 18 deletions(-) diff --git a/build.xml b/build.xml index 7e7415f08..c6b1afc56 100644 --- a/build.xml +++ b/build.xml @@ -22,7 +22,9 @@ ~ OTHER DEALINGS IN THE SOFTWARE. --> - + Compile and distribute the Sting toolkit @@ -250,11 +252,14 @@ + + - + + @@ -262,6 +267,15 @@ uri="antlib:org.apache.ivy.ant" classpath="${ivy.jar.dir}/${ivy.jar.file}"/> + + + + + @@ -295,7 +309,7 @@ - + @@ -942,6 +956,28 @@ + + + + + + + + + + + + + + + + + + + diff --git a/ivy.xml b/ivy.xml index 1e3346ff5..1d2f95dc1 100644 --- a/ivy.xml +++ b/ivy.xml @@ -46,7 +46,8 @@ - + + diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java b/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java index dd12ce761..49851249c 100755 --- a/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/classloader/JVMUtils.java @@ -32,7 +32,6 @@ import org.reflections.util.ClasspathHelper; import java.io.File; import java.io.IOException; -import java.lang.annotation.Annotation; import java.lang.reflect.*; import java.net.URL; import java.util.*; @@ -198,7 +197,7 @@ public class JVMUtils { * @return the list of class path urls. */ public static Set getClasspathURLs() { - return ClasspathHelper.getUrlsForManifestsCurrentClasspath(); + return ClasspathHelper.forManifest(); } /** @@ -240,8 +239,8 @@ public class JVMUtils { /** * Returns a comma-separated list of the names of the interfaces implemented by this class * - * @param covClass - * @return + * @param covClass class + * @return names of interfaces */ public static String classInterfaces(final Class covClass) { final List interfaces = new ArrayList(); diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java index 82fb6b8d6..43cc800d8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java +++ b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java @@ -25,8 +25,6 @@ package org.broadinstitute.sting.utils.classloader; -import ch.qos.logback.classic.Level; -import ch.qos.logback.classic.Logger; import org.broadinstitute.sting.gatk.WalkerManager; import org.broadinstitute.sting.gatk.filters.FilterManager; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; @@ -35,7 +33,6 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import org.reflections.Reflections; import org.reflections.scanners.SubTypesScanner; import org.reflections.util.ConfigurationBuilder; -import org.slf4j.LoggerFactory; import java.io.File; import java.lang.reflect.Constructor; @@ -57,9 +54,8 @@ public class PluginManager { private static final Reflections defaultReflections; static { - // turn off logging in the reflections library - they talk too much (to the wrong logger factory as well, logback) - Logger logger = (ch.qos.logback.classic.Logger) LoggerFactory.getLogger(Reflections.class); - logger.setLevel(Level.OFF); + // turn off logging in the reflections library - they talk too much + Reflections.log = null; Set classPathUrls = new LinkedHashSet(); @@ -179,9 +175,9 @@ public class PluginManager { /** * Sorts, in place, the list of plugins according to getName() on each element * - * @param unsortedPlugins + * @param unsortedPlugins unsorted plugins */ - private final void sortPlugins(final List> unsortedPlugins) { + private void sortPlugins(final List> unsortedPlugins) { Collections.sort(unsortedPlugins, new ComparePluginsByName()); } @@ -235,7 +231,7 @@ public class PluginManager { * @param plugin Name of the plugin for which to search. * @return True if the plugin exists, false otherwise. */ - public boolean exists(Class plugin) { + public boolean exists(Class plugin) { return pluginsByName.containsValue(plugin); } diff --git a/settings/ivysettings.xml b/settings/ivysettings.xml index e17342442..ce7667140 100644 --- a/settings/ivysettings.xml +++ b/settings/ivysettings.xml @@ -7,7 +7,6 @@ - From d1511e38ad0fdb10ab0b9f2146331e7a43099b6b Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 15 Oct 2012 16:06:11 -0400 Subject: [PATCH 77/90] Removing ConstrainedAFCalculationModel; AFCalcPerformanceTest -- Superceded by IndependentAFCalc -- Added support to read in an ExactModelLog in AFCalcPerformanceTest and run the independent alleles model on it. -- A few misc. bug fixes discovered during running the performance test --- .../afcalc/AFCalcPerformanceTest.java | 36 ++++- .../genotyper/afcalc/AFCalcUnitTest.java | 8 +- ...ConstrainedAFCalculationModelUnitTest.java | 124 ------------------ .../gatk/walkers/genotyper/afcalc/AFCalc.java | 92 +++++++++++-- .../genotyper/afcalc/AFCalcFactory.java | 4 - .../genotyper/afcalc/AFCalcResult.java | 13 +- .../afcalc/ConstrainedDiploidExactAFCalc.java | 107 --------------- .../sting/utils/variantcontext/Genotype.java | 14 +- 8 files changed, 135 insertions(+), 263 deletions(-) delete mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ConstrainedAFCalculationModelUnitTest.java delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ConstrainedDiploidExactAFCalc.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java index 68b068509..f019d8f8e 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java @@ -5,15 +5,16 @@ import org.apache.log4j.Logger; import org.apache.log4j.TTCCLayout; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.SimpleTimer; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; -import java.io.FileOutputStream; -import java.io.PrintStream; +import java.io.*; import java.util.*; /** @@ -190,7 +191,8 @@ public class AFCalcPerformanceTest { public enum Operation { ANALYZE, - SINGLE + SINGLE, + EXACT_LOG } public static void main(final String[] args) throws Exception { final TTCCLayout layout = new TTCCLayout(); @@ -204,10 +206,37 @@ public class AFCalcPerformanceTest { switch ( op ) { case ANALYZE: analyze(args); break; case SINGLE: profileBig(args); break; + case EXACT_LOG: exactLog(args); break; default: throw new IllegalAccessException("unknown operation " + op); } } + private static void exactLog(final String[] args) throws Exception { + final File ref = new File(args[1]); + final File exactLogFile = new File(args[2]); + final List startsToUse = new LinkedList(); + + for ( int i = 3; i < args.length; i++ ) + startsToUse.add(Integer.valueOf(args[i])); + + final CachingIndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(ref); + final GenomeLocParser parser = new GenomeLocParser(seq); + final BufferedReader reader = new BufferedReader(new FileReader(exactLogFile)); + final List loggedCalls = AFCalc.readExactLog(reader, startsToUse, parser); + + for ( final AFCalc.ExactCall call : loggedCalls ) { + final AFCalcTestBuilder testBuilder = new AFCalcTestBuilder(call.vc.getNSamples(), 1, + AFCalcFactory.Calculation.EXACT_INDEPENDENT, + AFCalcTestBuilder.PriorType.human); + final SimpleTimer timer = new SimpleTimer().start(); + final AFCalcResult result = testBuilder.makeModel().getLog10PNonRef(call.vc, testBuilder.makePriors()); + call.newNanoTime = timer.getElapsedTimeNano(); + call.newPNonRef = result.getLog10PosteriorOfAFGT0(); + logger.info(call); + logger.info("\t\t" + result); + } + } + private static void profileBig(final String[] args) throws Exception { final int nSamples = Integer.valueOf(args[1]); final int ac = Integer.valueOf(args[2]); @@ -234,7 +263,6 @@ public class AFCalcPerformanceTest { final List modelParams = Arrays.asList( new ModelParams(AFCalcFactory.Calculation.EXACT_REFERENCE, 10000, 10), // new ModelParams(AFCalcTestBuilder.ModelType.GeneralExact, 100, 10), - new ModelParams(AFCalcFactory.Calculation.EXACT_CONSTRAINED, 10000, 100), new ModelParams(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 10000, 1000)); final boolean ONLY_HUMAN_PRIORS = false; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java index 4ac4692d7..e2407989b 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java @@ -372,16 +372,14 @@ public class AFCalcUnitTest extends BaseTest { final VariantContext vc3 = new VariantContextBuilder("x","1", 1, 1, Arrays.asList(A, C, G)).make(); final AFCalcTestBuilder.PriorType priorType = AFCalcTestBuilder.PriorType.flat; - final List constrainedModel = Arrays.asList(AFCalcFactory.Calculation.EXACT_CONSTRAINED); - final double TOLERANCE = 0.5; final List initialPNonRefData = Arrays.asList( // bi-allelic sites new PNonRefData(vc2, makePL(AA, 0, 10, 10), 0.1666667, TOLERANCE, true), - new PNonRefData(vc2, makePL(AA, 0, 1, 10), 0.4721084, TOLERANCE, false, constrainedModel), - new PNonRefData(vc2, makePL(AA, 0, 1, 1), 0.6136992, TOLERANCE, false, constrainedModel), - new PNonRefData(vc2, makePL(AA, 0, 5, 5), 0.3874259, TOLERANCE, false, constrainedModel), + new PNonRefData(vc2, makePL(AA, 0, 1, 10), 0.4721084, TOLERANCE, false), + new PNonRefData(vc2, makePL(AA, 0, 1, 1), 0.6136992, TOLERANCE, false), + new PNonRefData(vc2, makePL(AA, 0, 5, 5), 0.3874259, TOLERANCE, false), new PNonRefData(vc2, makePL(AC, 10, 0, 10), 0.9166667, TOLERANCE, true), new PNonRefData(vc2, makePL(CC, 10, 10, 0), 0.9166667, TOLERANCE, true), diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ConstrainedAFCalculationModelUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ConstrainedAFCalculationModelUnitTest.java deleted file mode 100644 index 31ec28af4..000000000 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ConstrainedAFCalculationModelUnitTest.java +++ /dev/null @@ -1,124 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; - -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - - -public class ConstrainedAFCalculationModelUnitTest extends BaseTest { - static Allele A = Allele.create("A", true); - static Allele C = Allele.create("C"); - static Allele G = Allele.create("G"); - - protected static Genotype makePL(final List expectedGT, int ... pls) { - return AFCalcUnitTest.makePL(expectedGT, pls); - } - - @DataProvider(name = "MaxACsToVisit") - public Object[][] makeMaxACsToVisit() { - List tests = new ArrayList(); - - final int nSamples = 10; - - for (int nNonInformative = 0; nNonInformative < nSamples - 1; nNonInformative++ ) { - final int nChrom = (nSamples - nNonInformative) * 2; - for ( int i = 0; i < nChrom; i++ ) { - // bi-allelic - tests.add(new Object[]{nSamples, Arrays.asList(i), nNonInformative, AFCalcFactory.Calculation.EXACT_CONSTRAINED}); - - // tri-allelic - for ( int j = 0; j < (nChrom - i); j++) - tests.add(new Object[]{nSamples, Arrays.asList(i, j), nNonInformative, AFCalcFactory.Calculation.EXACT_CONSTRAINED}); - } - } - - return tests.toArray(new Object[][]{}); - } - - @Test(enabled = true, dataProvider = "MaxACsToVisit") - public void testMaxACsToVisit(final int nSamples, final List requestedACs, final int nNonInformative, final AFCalcFactory.Calculation modelType) { - final int nAlts = requestedACs.size(); - final AFCalcTestBuilder testBuilder - = new AFCalcTestBuilder(nSamples, nAlts, modelType, - AFCalcTestBuilder.PriorType.human); - - final VariantContext vc = testBuilder.makeACTest(requestedACs, nNonInformative, 100); - final int[] maxACsToVisit = ((ConstrainedDiploidExactAFCalc)testBuilder.makeModel()).computeMaxACs(vc); - - testExpectedACs(vc, maxACsToVisit); - } - - private void testExpectedACs(final VariantContext vc, final int[] maxACsToVisit) { - // this is necessary because cannot ensure that the tester gives us back the - // requested ACs due to rounding errors - final List ACs = new ArrayList(); - for ( final Allele a : vc.getAlternateAlleles() ) - ACs.add(vc.getCalledChrCount(a)); - - for ( int i = 0; i < maxACsToVisit.length; i++ ) { - Assert.assertEquals(maxACsToVisit[i], (int)ACs.get(i), "Maximum AC computed wasn't equal to the max possible in the construction for alt allele " + i); - } - } - - @DataProvider(name = "MaxACsGenotypes") - public Object[][] makeMaxACsForGenotype() { - List tests = new ArrayList(); - - final List AA = Arrays.asList(A, A); - final List AC = Arrays.asList(A, C); - final List CC = Arrays.asList(C, C); - final List AG = Arrays.asList(A, G); - final List GG = Arrays.asList(G, G); - final List CG = Arrays.asList(C, G); - - final VariantContext vc2 = new VariantContextBuilder("x","1", 1, 1, Arrays.asList(A, C)).make(); - final VariantContext vc3 = new VariantContextBuilder("x","1", 1, 1, Arrays.asList(A, C, G)).make(); - - tests.add(new Object[]{vc2, makePL(AA, 0, 10, 10)}); - tests.add(new Object[]{vc2, makePL(AC, 10, 0, 10)}); - tests.add(new Object[]{vc2, makePL(CC, 10, 10, 0)}); - - // make sure non-informative => 0 - tests.add(new Object[]{vc2, makePL(AA, 0, 0, 0)}); - tests.add(new Object[]{vc3, makePL(AA, 0, 0, 0, 0, 0, 0)}); - - // multi-allelics - tests.add(new Object[]{vc3, makePL(AG, 10, 10, 10, 0, 10, 10)}); - tests.add(new Object[]{vc3, makePL(CG, 10, 10, 10, 10, 0, 10)}); - tests.add(new Object[]{vc3, makePL(GG, 10, 10, 10, 10, 10, 0)}); - - // deal with non-informatives third alleles - tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 0, 0, 10)}); - tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 10, 0, 10)}); - tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 10, 0, 0)}); - tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 0, 0, 0)}); - tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 0, 0, 10)}); - tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 10, 0, 10)}); - tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 10, 0, 0)}); - tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 0, 0, 0)}); - - return tests.toArray(new Object[][]{}); - } - - @Test(enabled = true, dataProvider = "MaxACsGenotypes") - private void testMakeACByGenotype(final VariantContext vcRoot, final Genotype g) { - final VariantContext vc = new VariantContextBuilder(vcRoot).genotypes(g).make(); - - final AFCalcTestBuilder testBuilder - = new AFCalcTestBuilder(1, vc.getNAlleles()-1, AFCalcFactory.Calculation.EXACT_CONSTRAINED, - AFCalcTestBuilder.PriorType.human); - - final int[] maxACsToVisit = ((ConstrainedDiploidExactAFCalc)testBuilder.makeModel()).computeMaxACs(vc); - - testExpectedACs(vc, maxACsToVisit); - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java index f87084a9c..8cb6bcabc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java @@ -28,19 +28,14 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.SimpleTimer; -import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.*; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.PrintStream; +import java.io.*; +import java.util.ArrayList; import java.util.Arrays; +import java.util.LinkedList; import java.util.List; @@ -218,7 +213,84 @@ public abstract class AFCalc implements Cloneable { callReport.println(Utils.join("\t", Arrays.asList(loc, variable, key, value))); } + public static class ExactCall { + final VariantContext vc; + final long origNanoTime; + long newNanoTime = -1; + final double origPNonRef; + double newPNonRef = -1; + + public ExactCall(VariantContext vc, long origNanoTime, double origPNonRef) { + this.vc = vc; + this.origNanoTime = origNanoTime; + this.origPNonRef = origPNonRef; + } + + @Override + public String toString() { + return String.format("ExactCall %s:%d alleles=%s nSamples=%s orig.pNonRef=%.2f orig.runtime=%s new.pNonRef=%.2f new.runtime=%s", + vc.getChr(), vc.getStart(), vc.getAlleles(), vc.getNSamples(), + origPNonRef, + new AutoFormattingTime(origNanoTime / 1e9).toString(), + newPNonRef, + newNanoTime == -1 ? "not.run" : new AutoFormattingTime(newNanoTime / 1e9).toString()); + } + } + + public static List readExactLog(final BufferedReader reader, final List startsToKeep, GenomeLocParser parser) throws IOException { + List calls = new LinkedList(); + + // skip the header line + reader.readLine(); + + while ( true ) { + final VariantContextBuilder builder = new VariantContextBuilder(); + final List alleles = new ArrayList(); + final List genotypes = new ArrayList(); + long runtimeNano = -1; + + GenomeLoc currentLoc = null; + while ( true ) { + final String line = reader.readLine(); + if ( line == null ) + return calls; + + final String[] parts = line.split("\t"); + final GenomeLoc lineLoc = parser.parseGenomeLoc(parts[0]); + final String variable = parts[1]; + final String key = parts[2]; + final String value = parts[3]; + + if ( currentLoc == null ) + currentLoc = lineLoc; + + if ( variable.equals("log10PosteriorOfAFzero") ) { + if ( startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart()) ) { + builder.alleles(alleles); + final int stop = currentLoc.getStart() + alleles.get(0).length() - 1; + builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop); + builder.genotypes(genotypes); + calls.add(new ExactCall(builder.make(), runtimeNano, Double.valueOf(value))); + } + break; + } else if ( variable.equals("allele") ) { + final boolean isRef = key.equals("0"); + alleles.add(Allele.create(value, isRef)); + } else if ( variable.equals("PL") ) { + final GenotypeBuilder gb = new GenotypeBuilder(key); + gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs()); + genotypes.add(gb.make()); + } else if ( variable.equals("runtime.nano") ) { + runtimeNano = Long.valueOf(value); + } else { + // nothing to do + } + } + } + } + public AFCalcResultTracker getResultTracker() { return resultTracker; } + } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcFactory.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcFactory.java index 981100eaa..7d67815cf 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcFactory.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcFactory.java @@ -30,10 +30,6 @@ public class AFCalcFactory { /** reference implementation of multi-allelic EXACT model */ EXACT_REFERENCE(ReferenceDiploidExactAFCalc.class, 2, -1), - /** expt. implementation */ - @Deprecated - EXACT_CONSTRAINED(ConstrainedDiploidExactAFCalc.class, 2, -1), - /** expt. implementation -- for testing only */ EXACT_INDEPENDENT(IndependentAllelesDiploidExactAFCalc.class, 2, -1), diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java index da7fd08ce..a42795593 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java @@ -31,10 +31,7 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.variantcontext.Allele; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; /** * Describes the results of the AFCalc @@ -217,6 +214,14 @@ public class AFCalcResult { return log10PriorsOfAC[AF1p]; } + @Override + public String toString() { + final List byAllele = new LinkedList(); + for ( final Allele a : getAllelesUsedInGenotyping() ) + if ( a.isNonReference() ) byAllele.add(String.format("%s => MLE %d / posterior %.2f", a, getAlleleCountAtMLE(a), getLog10PosteriorOfAFGt0ForAllele(a))); + return String.format("AFCalc%n\t\tlog10PosteriorOfAFGT0=%.2f%n\t\t%s", getLog10LikelihoodOfAFGT0(), Utils.join("\n\t\t", byAllele)); + } + /** * Are we sufficiently confidence in being non-ref that the site is considered polymorphic? * diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ConstrainedDiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ConstrainedDiploidExactAFCalc.java deleted file mode 100644 index 36d53ceaa..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ConstrainedDiploidExactAFCalc.java +++ /dev/null @@ -1,107 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; - -import com.google.java.contract.Ensures; -import com.google.java.contract.Requires; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -@Deprecated -public class ConstrainedDiploidExactAFCalc extends DiploidExactAFCalc { - protected ConstrainedDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) { - super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy); - } - - protected StateTracker makeMaxLikelihood(final VariantContext vc, final AFCalcResultTracker resultTracker) { - final int[] maxACsToConsider = computeMaxACs(vc); - resultTracker.setAClimits(maxACsToConsider); - return new StateTracker(maxACsToConsider); - } - - /** - * Computes the maximum ACs we need to consider for each alt allele - * - * Walks over the genotypes in VC, and computes for each alt allele the maximum - * AC we need to consider in that alt allele dimension. Does the calculation - * based on the PLs in each genotype g, choosing to update the max AC for the - * alt alleles corresponding to that PL. Only takes the first lowest PL, - * if there are multiple genotype configurations with the same PL value. It - * takes values in the order of the alt alleles. - * - * @param vc the variant context we will compute max alt alleles for - * @return a vector of max alt alleles, indexed by alt allele, so result[0] is the AC of the - * first alt allele. - */ - @Ensures("result != null") - protected final int[] computeMaxACs(final VariantContext vc) { - final int[] maxACs = new int[vc.getNAlleles()-1]; - - for ( final Genotype g : vc.getGenotypes() ) - updateMaxACs(g, maxACs); - - return maxACs; - } - - /** - * Update the maximum achievable allele counts in maxAC according to the PLs in g - * - * Selects the maximum genotype configuration from the PLs in g, and updates - * the maxAC for this configure. For example, if the lowest PL is for 0/1, updates - * the maxAC for the alt allele 1 by 1. If it's 1/1, update is 2. Works for - * many number of alt alleles (determined by length of maxACs). - * - * If the max PL occurs at 0/0, updates nothing - * Note that this function greedily takes the first min PL, so that if 0/1 and 1/1 have - * the same PL value, then updates the first one. - * - * Also, only will update 1 alt allele, so if 0/1 and 0/2 both have the same PL, - * then only first one (1) will be updated - * - * @param g the genotype to update - * @param maxACs the max allele count vector for alt alleles (starting at 0 => first alt allele) - */ - @Requires({ - "g != null", - "maxACs != null", - "goodMaxACs(maxACs)"}) - private void updateMaxACs(final Genotype g, final int[] maxACs) { - final int[] PLs = g.getLikelihoods().getAsPLs(); - - int minPLi = 0; - int minPL = PLs[0]; - - for ( int i = 0; i < PLs.length; i++ ) { - if ( PLs[i] < minPL ) { - minPL = PLs[i]; - minPLi = i; - } - } - - final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair pair = GenotypeLikelihoods.getAllelePair(minPLi); - updateMaxACs(maxACs, pair.alleleIndex1); - updateMaxACs(maxACs, pair.alleleIndex2); - } - - /** - * Simple helper. Update max alt alleles maxACs according to the allele index (where 0 == ref) - * - * If alleleI == 0 => doesn't update anything - * else maxACs[alleleI - 1]++ - * - * @param maxACs array of max alt allele ACs - * @param alleleI the index (relative to 0) to update a count of 1 in max alt alleles. - */ - @Requires({ - "alleleI >= 0", - "(alleleI - 1) < maxACs.length", - "goodMaxACs(maxACs)"}) - private void updateMaxACs(final int[] maxACs, final int alleleI) { - if ( alleleI > 0 ) - maxACs[alleleI-1]++; - } - - private static boolean goodMaxACs(final int[] maxACs) { - return MathUtils.sum(maxACs) >= 0; - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java index aa801c2b9..67e80cf3c 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java @@ -298,12 +298,16 @@ public abstract class Genotype implements Comparable { * @return true if all samples PLs are equal and == 0 */ public boolean isNonInformative() { - for ( final int PL : getPL() ) { - if ( PL != 0 ) - return false; - } + if ( getPL() == null ) + return true; + else { + for ( final int PL : getPL() ) { + if ( PL != 0 ) + return false; + } - return true; + return true; + } } /** From 6bd0ec8de40f74548b2c39a8767ceaf100d30a0f Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 15 Oct 2012 20:23:07 -0400 Subject: [PATCH 78/90] Proper likelihoods and posterior probability of the joint allele frequency in IndependentAllelesDiploidExactAFCalc -- Fixed minor numerical stability issue in AFCalcResult -- posterior of joint A/B/C is 1 - (1 - P(D | AF_b == 0)) x (1 - P(D | AF_c == 0)), for any number of alleles, obviously. Now computes the joint posterior like this, and then back-calculates likelihoods that generate these posteriors given the priors. It's not pretty but it's the best thing to do --- .../genotyper/afcalc/AFCalcResult.java | 4 +- .../IndependentAllelesDiploidExactAFCalc.java | 155 +++++++++--------- 2 files changed, 81 insertions(+), 78 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java index a42795593..209a21d82 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java @@ -275,11 +275,11 @@ public class AFCalcResult { // necessary because the posteriors may be so skewed that the log-space normalized value isn't // good, so we have to try both log-space normalization as well as the real-space normalization if the // result isn't good - final double[] logNormalized = MathUtils.normalizeFromLog10(log10UnnormalizedPosteriors, true, true); + final double[] logNormalized = MathUtils.normalizeFromLog10(log10UnnormalizedPosteriors, true, false); if ( goodLog10ProbVector(logNormalized, logNormalized.length, true) ) return logNormalized; else - return MathUtils.normalizeFromLog10(log10UnnormalizedPosteriors, true, false); + return MathUtils.normalizeFromLog10(log10UnnormalizedPosteriors, true, true); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java index 0ac964c9c..ba1e5bbb8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java @@ -32,13 +32,74 @@ import org.broadinstitute.sting.utils.variantcontext.*; import java.util.*; -public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc { +/** + * Computes the conditional bi-allelic exact results + * + * Suppose vc contains 2 alt allele: A* with C and T. This function first computes: + * + * (1) P(D | AF_c > 0 && AF_t == *) [i.e., T can be anything] + * + * it then computes the conditional probability on AF_c == 0: + * + * (2) P(D | AF_t > 0 && AF_c == 0) + * + * Thinking about this visually, we have the following likelihood matrix where each cell is + * the P(D | AF_c == i && AF_t == j): + * + * 0 AF_c > 0 + * ----------------- + * 0 | | + * |--|------------- + * a | | + * f | | + * _ | | + * t | | + * > | | + * 0 | | + * + * What we really want to know how + * + * (3) P(D | AF_c == 0 & AF_t == 0) + * + * compares with + * + * (4) P(D | AF_c > 0 || AF_t > 0) + * + * This is effectively asking for the value in the upper left vs. the sum of all cells. + * + * This class implements the conditional likelihoods summation for any number of alt + * alleles, where each alt allele has its EXACT probability of segregating calculated by + * reducing each alt B into the case XB and computing P(D | AF_b > 0 ) as follows: + * + * Suppose we have for a A/B/C site the following GLs: + * + * AA AB BB AC BC CC + * + * and we want to get the bi-allelic GLs for X/B, where X is everything not B + * + * XX = AA + AC + CC (since X = A or C) + * XB = AB + BC + * BB = BB + * + * After each allele has its probability calculated we compute the joint posterior + * as P(D | AF_* == 0) = prod_i P (D | AF_i == 0), after applying the theta^i + * prior for the ith least likely allele. + */ + public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc { + /** + * The min. confidence of an allele to be included in the joint posterior. + */ + private final static double MIN_LOG10_CONFIDENCE_TO_INCLUDE_ALLELE_IN_POSTERIOR = Math.log10(1e-20); + private final static List BIALLELIC_NOCALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); + /** + * Sorts AFCalcResults by their posteriors of AF > 0, so the + */ private final static class CompareAFCalcResultsByPNonRef implements Comparator { @Override public int compare(AFCalcResult o1, AFCalcResult o2) { - return Double.compare(o1.getLog10LikelihoodOfAFGT0(), o2.getLog10LikelihoodOfAFGT0()); + return Double.compare(o1.getLog10PosteriorOfAFGT0(), o2.getLog10PosteriorOfAFGT0()); } } @@ -68,76 +129,13 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc { @Override public AFCalcResult computeLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors) { - final double log10LikelihoodOfRef = computelog10LikelihoodOfRef(vc); final List independentResultTrackers = computeAlleleConditionalExact(vc, log10AlleleFrequencyPriors); final List withMultiAllelicPriors = applyMultiAllelicPriors(independentResultTrackers); - return combineIndependentPNonRefs(vc, log10LikelihoodOfRef, withMultiAllelicPriors); + return combineIndependentPNonRefs(vc, withMultiAllelicPriors); } - protected final double computelog10LikelihoodOfRef(final VariantContext vc) { - // this value just the likelihood of AF == 0 in the special constrained multi-allelic calculation - final List allGLs = getGLs(vc.getGenotypes(), false); - double log10LikelihoodOfHomRef = 0.0; - - // TODO -- can be easily optimized (currently looks at all GLs via getGLs) - for ( int i = 0; i < allGLs.size(); i++ ) { - final double[] GLs = allGLs.get(i); - log10LikelihoodOfHomRef += GLs[0]; - //log10LikelihoodOfHomRef += MathUtils.normalizeFromLog10(GLs, true)[0]; - } - - return log10LikelihoodOfHomRef; - } /** - * Computes the conditional bi-allelic exact results - * - * Suppose vc contains 2 alt allele: A* with C and T. This function first computes: - * - * (1) P(D | AF_c > 0 && AF_t == *) [i.e., T can be anything] - * - * it then computes the conditional probability on AF_c == 0: - * - * (2) P(D | AF_t > 0 && AF_c == 0) - * - * Thinking about this visually, we have the following likelihood matrix where each cell is - * the P(D | AF_c == i && AF_t == j): - * - * 0 AF_c > 0 - * ----------------- - * 0 | | - * |--|------------- - * a | | - * f | | - * _ | | - * t | | - * > | | - * 0 | | - * - * What we really want to know how - * - * (3) P(D | AF_c == 0 & AF_t == 0) - * - * compares with - * - * (4) P(D | AF_c > 0 || AF_t > 0) - * - * This is effectively asking for the value in the upper left vs. the sum of all cells. - * - * The quantity (1) is the same of all cells except those with AF_c == 0, while (2) is the - * band at the top where AF_t > 0 and AF_c == 0 - * - * So (4) is actually (1) + (2). - * - * (3) is the direct inverse of the (1) and (2), as we are simultaneously calculating - * - * (1*) P(D | AF_c == 0 && AF_t == *) [i.e., T can be anything] - * (2*) P(D | AF_t == 0 && AF_c == 0) [TODO -- note this value looks like the thing we are supposed to use] - * - * This function implements the conditional likelihoods summation for any number of alt - * alleles (not just the tri-allelic case), where each subsequent variant context is - * further constrained such that each already considered allele x has AF_x == 0 in the - * compute. * * @param vc * @param log10AlleleFrequencyPriors @@ -294,7 +292,6 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc { * @param sortedResultsWithThetaNPriors the pNonRef result for each allele independently */ protected AFCalcResult combineIndependentPNonRefs(final VariantContext vc, - final double log10LikelihoodsOfACEq0, final List sortedResultsWithThetaNPriors) { int nEvaluations = 0; final int nAltAlleles = sortedResultsWithThetaNPriors.size(); @@ -302,8 +299,8 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc { final double[] log10PriorsOfAC = new double[2]; final Map log10pNonRefByAllele = new HashMap(nAltAlleles); - // this value is a sum in real space so we need to store values to sum up later - final double[] log10LikelihoodsOfACGt0 = new double[nAltAlleles]; + // this value is a sum in log space + double log10PosteriorOfACEq0Sum = 0.0; for ( final AFCalcResult sortedResultWithThetaNPriors : sortedResultsWithThetaNPriors ) { final Allele altAllele = sortedResultWithThetaNPriors.getAllelesUsedInGenotyping().get(1); @@ -316,7 +313,8 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc { log10PriorsOfAC[1] += sortedResultWithThetaNPriors.getLog10PriorOfAFGT0(); // the AF > 0 case requires us to store the normalized likelihood for later summation - log10LikelihoodsOfACGt0[altI] = sortedResultWithThetaNPriors.getLog10LikelihoodOfAFGT0(); + if ( sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0() > MIN_LOG10_CONFIDENCE_TO_INCLUDE_ALLELE_IN_POSTERIOR ) + log10PosteriorOfACEq0Sum += sortedResultWithThetaNPriors.getLog10PosteriorOfAFEq0(); // bind pNonRef for allele to the posterior value of the AF > 0 with the new adjusted prior log10pNonRefByAllele.put(altAllele, sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0()); @@ -325,14 +323,19 @@ public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc { nEvaluations += sortedResultWithThetaNPriors.nEvaluations; } - // the log10 likelihoods are the sum of the log10 likelihoods across all alt alleles - final double[] log10LikelihoodsOfAC = new double[]{ - log10LikelihoodsOfACEq0, - MathUtils.log10sumLog10(log10LikelihoodsOfACGt0)}; + // In principle, if B_p = x and C_p = y are the probabilities of being poly for alleles B and C, + // the probability of being poly is (1 - B_p) * (1 - C_p) = (1 - x) * (1 - y). We want to estimate confidently + // log10((1 - x) * (1 - y)) which is log10(1 - x) + log10(1 - y). This sum is log10PosteriorOfACEq0 + final double log10PosteriorOfACGt0 = Math.max(Math.log10(1 - Math.pow(10, log10PosteriorOfACEq0Sum)), MathUtils.LOG10_P_OF_ZERO); + final double[] log10LikelihoodsOfAC = new double[] { + // L + prior = posterior => L = poster - prior + log10PosteriorOfACEq0Sum - log10PriorsOfAC[0], + log10PosteriorOfACGt0 - log10PriorsOfAC[1] + }; return new MyAFCalcResult(alleleCountsOfMLE, nEvaluations, vc.getAlleles(), - MathUtils.normalizeFromLog10(log10LikelihoodsOfAC, true, true), // necessary to ensure all values < 0 - MathUtils.normalizeFromLog10(log10PriorsOfAC, true), // priors incorporate multiple alt alleles, must be normalized + MathUtils.normalizeFromLog10(log10LikelihoodsOfAC, true), // necessary to ensure all values < 0 + MathUtils.normalizeFromLog10(log10PriorsOfAC, true), // priors incorporate multiple alt alleles, must be normalized log10pNonRefByAllele, sortedResultsWithThetaNPriors); } } From 9b0ab4e9417c4cb61d56acac0a0cf0de1add078a Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 15 Oct 2012 20:58:55 -0400 Subject: [PATCH 79/90] Cleanup IndependentAllelesDiploidExactAFCalc -- Remove capability to truncate genotype likelihoods -- this wasn't used and isn't really useful after all -- Added lots of contracts and docs, still more to come. -- Created a default makeMaxLikelihoods function in ReferenceDiploidExactAFCalc and DiploidExactAFCalc so that multiple subclasses don't just do the default thing -- Generalized reference bi-allelic model in IndependentAllelesDiploidExactAFCalc so that in principle any bi-allelic reference model can be used. --- ...dentAllelesDiploidExactAFCalcUnitTest.java | 48 ++----- .../genotyper/afcalc/DiploidExactAFCalc.java | 4 +- .../IndependentAllelesDiploidExactAFCalc.java | 118 ++++++++++-------- .../afcalc/ReferenceDiploidExactAFCalc.java | 6 - 4 files changed, 77 insertions(+), 99 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java index ed164f245..391c99990 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java @@ -55,48 +55,14 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest { return tests.toArray(new Object[][]{}); } - @DataProvider(name = "TestCombineGLsWithDrops") - public Object[][] makeTestCombineGLsWithDrops() { - List tests = new ArrayList(); - - final Set noDrops = Collections.emptySet(); - final Set drop1 = Collections.singleton(1); - final Set drop2 = Collections.singleton(2); - - // AA AB BB AC BC CC - // drop1 (B): AA AC CC - // drop2 (C): AA AB BB - tests.add(new Object[]{1, 2, makePL( 0, 1, 2, 3, 4, 5), makePL(0, 2, 5), noDrops}); - tests.add(new Object[]{2, 2, makePL( 0, 1, 2, 3, 4, 5), makePL(0, 4, 9), noDrops}); - tests.add(new Object[]{1, 2, makePL( 0, 1, 2, 3, 4, 5), makePL(0, 1, 2), drop2}); - tests.add(new Object[]{2, 2, makePL( 0, 1, 2, 3, 4, 5), makePL(0, 3, 5), drop1}); - - tests.add(new Object[]{1, 2, makePL( 5, 4, 3, 2, 1, 0), makePL(0, 2, 6), noDrops}); - tests.add(new Object[]{2, 2, makePL( 5, 4, 3, 2, 1, 0), makePL(1, 0, 2), noDrops}); - tests.add(new Object[]{1, 2, makePL( 5, 4, 3, 2, 1, 0), makePL(2, 1, 0), drop2}); - tests.add(new Object[]{2, 2, makePL( 5, 4, 3, 2, 1, 0), makePL(5, 2, 0), drop1}); - - tests.add(new Object[]{1, 2, makePL(10,10,10,10,10, 0), makePL( 0, 8,11), noDrops}); - tests.add(new Object[]{2, 2, makePL(10,10,10,10,10, 0), makePL( 5, 7, 0), noDrops}); - tests.add(new Object[]{1, 2, makePL(10,10,10,10,10, 0), makePL( 0, 0, 0), drop2}); - tests.add(new Object[]{2, 2, makePL(10,10,10,10,10, 0), makePL(10,10, 0), drop1}); - - return tests.toArray(new Object[][]{}); - } - private Genotype makePL(final int ... PLs) { return AFCalcUnitTest.makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), PLs); } @Test(enabled = true, dataProvider = "TestCombineGLs") private void testCombineGLs(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected) { - testCombineGLsWithDrops(altIndex, nAlts, testg, expected, Collections.emptySet()); - } - - @Test(enabled = true, dataProvider = "TestCombineGLsWithDrops") - private void testCombineGLsWithDrops(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected, Set allelesToDrop) { final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4); - final Genotype combined = calc.combineGLs(testg, altIndex, allelesToDrop, nAlts); + final Genotype combined = calc.combineGLs(testg, altIndex, nAlts); Assert.assertEquals(combined.getPL(), expected.getPL(), "Combined PLs " + Utils.join(",", combined.getPL()) + " != expected " + Utils.join(",", expected.getPL())); @@ -120,22 +86,21 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest { final Genotype gACG = makePL( 0, 1, 2, 3, 4, 5); final Genotype gAGC = makePL( 0, 4, 5, 1, 3, 2); final Genotype gACcombined = makePL(0, 2, 5); + final Genotype gACcombined2 = makePL(0, 1, 4); final Genotype gAGcombined = makePL(0, 4, 9); - final Genotype gACdropped = makePL(0, 1, 2); - final Genotype gAGdropped = makePL(0, 3, 5); // biallelic tests.add(new Object[]{vcAC.genotypes(gACcombined).make(), Arrays.asList(vcAC.genotypes(gACcombined).make())}); // tri-allelic - tests.add(new Object[]{vcACG.genotypes(gACG).make(), Arrays.asList(vcAC.genotypes(gACcombined).make(), vcAG.genotypes(gAGdropped).make())}); - tests.add(new Object[]{vcAGC.genotypes(gAGC).make(), Arrays.asList(vcAG.genotypes(gAGcombined).make(), vcAC.genotypes(gACdropped).make())}); + tests.add(new Object[]{vcACG.genotypes(gACG).make(), Arrays.asList(vcAC.genotypes(gACcombined).make(), vcAG.genotypes(gAGcombined).make())}); + tests.add(new Object[]{vcAGC.genotypes(gAGC).make(), Arrays.asList(vcAG.genotypes(gAGcombined).make(), vcAC.genotypes(gACcombined2).make())}); return tests.toArray(new Object[][]{}); } - @Test(enabled = false, dataProvider = "TestMakeAlleleConditionalContexts") + @Test(enabled = true, dataProvider = "TestMakeAlleleConditionalContexts") private void testMakeAlleleConditionalContexts(final VariantContext vc, final List expectedVCs) { final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4); final List biAllelicVCs = calc.makeAlleleConditionalContexts(vc); @@ -148,7 +113,8 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest { Assert.assertEquals(actual.getAlleles(), expected.getAlleles()); for ( int j = 0; j < actual.getNSamples(); j++ ) - Assert.assertEquals(actual.getGenotype(j).getPL(), expected.getGenotype(j).getPL()); + Assert.assertEquals(actual.getGenotype(j).getPL(), expected.getGenotype(j).getPL(), + "expected PLs " + Utils.join(",", expected.getGenotype(j).getPL()) + " not equal to actual " + Utils.join(",", actual.getGenotype(j).getPL())); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java index 8b12dff61..49915c515 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java @@ -36,7 +36,9 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { if ( ploidy != 2 ) throw new IllegalArgumentException("ploidy must be two for DiploidExactAFCalc and subclasses but saw " + ploidy); } - protected abstract StateTracker makeMaxLikelihood(final VariantContext vc, final AFCalcResultTracker resultTracker); + protected StateTracker makeMaxLikelihood(VariantContext vc, AFCalcResultTracker resultTracker) { + return new StateTracker(); + } @Override protected AFCalcResult computeLog10PNonRef(final VariantContext vc, diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java index ba1e5bbb8..c0edee291 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java @@ -91,6 +91,7 @@ import java.util.*; */ private final static double MIN_LOG10_CONFIDENCE_TO_INCLUDE_ALLELE_IN_POSTERIOR = Math.log10(1e-20); + private final static int[] BIALLELIC_NON_INFORMATIVE_PLS = new int[]{0,0,0}; private final static List BIALLELIC_NOCALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); /** @@ -105,19 +106,23 @@ import java.util.*; private final static CompareAFCalcResultsByPNonRef compareAFCalcResultsByPNonRef = new CompareAFCalcResultsByPNonRef(); - final ReferenceDiploidExactAFCalc refModel; + /** + * The AFCalc model we are using to do the bi-allelic computation + */ + final AFCalc biAlleleExactModel; protected IndependentAllelesDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) { super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy); - refModel = new ReferenceDiploidExactAFCalc(nSamples, 1, 1, ploidy); - } - - @Override - protected StateTracker makeMaxLikelihood(VariantContext vc, AFCalcResultTracker resultTracker) { - return refModel.makeMaxLikelihood(vc, resultTracker); + biAlleleExactModel = new ReferenceDiploidExactAFCalc(nSamples, 1, 1, ploidy); } + /** + * Trivial subclass that helps with debugging by keeping track of the supporting information for this joint call + */ private static class MyAFCalcResult extends AFCalcResult { + /** + * List of the supporting bi-allelic AFCalcResults that went into making this multi-allelic joint call + */ final List supporting; private MyAFCalcResult(int[] alleleCountsOfMLE, int nEvaluations, List allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map log10pNonRefByAllele, List supporting) { @@ -129,58 +134,89 @@ import java.util.*; @Override public AFCalcResult computeLog10PNonRef(final VariantContext vc, final double[] log10AlleleFrequencyPriors) { - final List independentResultTrackers = computeAlleleConditionalExact(vc, log10AlleleFrequencyPriors); + final List independentResultTrackers = computeAlleleIndependentExact(vc, log10AlleleFrequencyPriors); final List withMultiAllelicPriors = applyMultiAllelicPriors(independentResultTrackers); return combineIndependentPNonRefs(vc, withMultiAllelicPriors); } /** + * Compute the conditional exact AFCalcResult for each allele in vc independently, returning + * the result of each, in order of the alt alleles in VC * - * @param vc - * @param log10AlleleFrequencyPriors - * @return + * @param vc the VariantContext we want to analyze + * @param log10AlleleFrequencyPriors the priors + * @return a list of the AFCalcResults for each bi-allelic sub context of vc */ - protected List computeAlleleConditionalExact(final VariantContext vc, - final double[] log10AlleleFrequencyPriors) { + @Requires({"vc != null", "log10AlleleFrequencyPriors != null"}) + @Ensures("goodIndependentResult(vc, result)") + protected final List computeAlleleIndependentExact(final VariantContext vc, + final double[] log10AlleleFrequencyPriors) { final List results = new LinkedList(); for ( final VariantContext subvc : makeAlleleConditionalContexts(vc) ) { - final AFCalcResult resultTracker = refModel.getLog10PNonRef(subvc, log10AlleleFrequencyPriors); + final AFCalcResult resultTracker = biAlleleExactModel.getLog10PNonRef(subvc, log10AlleleFrequencyPriors); results.add(resultTracker); } return results; } - protected List makeAlleleConditionalContexts(final VariantContext vc) { + /** + * Helper function to ensure that the computeAlleleIndependentExact is returning reasonable results + */ + private static boolean goodIndependentResult(final VariantContext vc, final List results) { + if ( results.size() != vc.getNAlleles() - 1) return false; + for ( int i = 0; i < results.size(); i++ ) { + if ( results.get(i).getAllelesUsedInGenotyping().size() != 2 ) + return false; + if ( ! results.get(i).getAllelesUsedInGenotyping().contains(vc.getAlternateAllele(i)) ) + return false; + } + + return true; + } + + /** + * Returns the bi-allelic variant context for each alt allele in vc with bi-allelic likelihoods, in order + * + * @param vc the variant context to split. Must have n.alt.alleles > 1 + * @return a bi-allelic variant context for each alt allele in vc + */ + @Requires({"vc != null", "vc.getNAlleles() > 1"}) + @Ensures("result.size() == vc.getNAlleles() - 1") + protected final List makeAlleleConditionalContexts(final VariantContext vc) { final int nAltAlleles = vc.getNAlleles() - 1; final List vcs = new LinkedList(); - final List afZeroAlleles = new LinkedList(); for ( int altI = 0; altI < nAltAlleles; altI++ ) { - final Allele altAllele = vc.getAlternateAllele(altI); - final List biallelic = Arrays.asList(vc.getReference(), altAllele); - vcs.add(biallelicCombinedGLs(vc, biallelic, afZeroAlleles, altI + 1)); - //afZeroAlleles.add(altAllele); + vcs.add(biallelicCombinedGLs(vc, altI + 1)); } return vcs; } - protected VariantContext biallelicCombinedGLs(final VariantContext rootVC, final List biallelic, final List afZeroAlleles, final int allele2) { + /** + * Create a single bi-allelic variant context from rootVC with alt allele with index altAlleleIndex + * + * @param rootVC the root (potentially multi-allelic) variant context + * @param altAlleleIndex index of the alt allele, from 0 == first alt allele + * @return a bi-allelic variant context based on rootVC + */ + @Requires({"rootVC.getNAlleles() > 1", "altAlleleIndex < rootVC.getNAlleles()"}) + @Ensures({"result.isBiallelic()"}) + protected final VariantContext biallelicCombinedGLs(final VariantContext rootVC, final int altAlleleIndex) { if ( rootVC.isBiallelic() ) { - if ( ! afZeroAlleles.isEmpty() ) throw new IllegalArgumentException("Root VariantContext is biallelic but afZeroAlleles wasn't empty: " + afZeroAlleles); return rootVC; } else { - final Set allelesToDiscard = new HashSet(rootVC.getAlleleIndices(afZeroAlleles)); final int nAlts = rootVC.getNAlleles() - 1; final List biallelicGenotypes = new ArrayList(rootVC.getNSamples()); for ( final Genotype g : rootVC.getGenotypes() ) - biallelicGenotypes.add(combineGLs(g, allele2, allelesToDiscard, nAlts)); + biallelicGenotypes.add(combineGLs(g, altAlleleIndex, nAlts)); final VariantContextBuilder vcb = new VariantContextBuilder(rootVC); - vcb.alleles(biallelic); + final Allele altAllele = rootVC.getAlternateAllele(altAlleleIndex - 1); + vcb.alleles(Arrays.asList(rootVC.getReference(), altAllele)); vcb.genotypes(biallelicGenotypes); return vcb.make(); } @@ -201,30 +237,16 @@ import java.util.*; * XB = AB + BC * BB = BB * - * Supports the additional mode of simply dropping GLs whose allele index occurs in allelesToDiscard. This is - * useful in the case where you want to drop alleles (not combine them), such as above: - * - * AA AB BB AC BC CC - * - * and we want to get the bi-allelic GLs for X/B, where X is everything not B, but dropping C (index 2) - * - * XX = AA (since X = A and C is dropped) - * XB = AB - * BB = BB - * - * This allows us to recover partial GLs the correspond to any allele in allelesToDiscard having strictly - * AF == 0. - * * @param original the original multi-allelic genotype * @param altIndex the index of the alt allele we wish to keep in the bialleic case -- with ref == 0 * @param nAlts the total number of alt alleles * @return a new biallelic genotype with appropriate PLs */ - @Requires({"original.hasLikelihoods()", "! allelesToDiscard.contains(altIndex)"}) + @Requires({"original.hasLikelihoods()"}) // TODO -- add ploidy == 2 test "original.getPLs() == null || original.getPLs().length == 3"}) @Ensures({"result.hasLikelihoods()", "result.getPL().length == 3"}) - protected Genotype combineGLs(final Genotype original, final int altIndex, final Set allelesToDiscard, final int nAlts ) { + protected Genotype combineGLs(final Genotype original, final int altIndex, final int nAlts ) { if ( original.isNonInformative() ) - return new GenotypeBuilder(original).PL(new int[]{0,0,0}).alleles(BIALLELIC_NOCALL).make(); + return new GenotypeBuilder(original).PL(BIALLELIC_NON_INFORMATIVE_PLS).alleles(BIALLELIC_NOCALL).make(); if ( altIndex < 1 || altIndex > nAlts ) throw new IllegalStateException("altIndex must be between 1 and nAlts " + nAlts); @@ -234,10 +256,6 @@ import java.util.*; for ( int index = 0; index < normalizedPr.length; index++ ) { final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair pair = GenotypeLikelihoods.getAllelePair(index); - // just continue if we shouldn't include the pair because it's in the discard set - if ( discardAllelePair(pair, allelesToDiscard) ) - continue; - if ( pair.alleleIndex1 == altIndex ) { if ( pair.alleleIndex2 == altIndex ) // hom-alt case @@ -261,11 +279,7 @@ import java.util.*; return new GenotypeBuilder(original).PL(GLs).alleles(BIALLELIC_NOCALL).make(); } - protected boolean discardAllelePair(final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair pair, Set allelesToDiscard) { - return allelesToDiscard.contains(pair.alleleIndex1) || allelesToDiscard.contains(pair.alleleIndex2); - } - - protected List applyMultiAllelicPriors(final List conditionalPNonRefResults) { + protected final List applyMultiAllelicPriors(final List conditionalPNonRefResults) { final ArrayList sorted = new ArrayList(conditionalPNonRefResults); // sort the results, so the most likely allele is first @@ -289,6 +303,8 @@ import java.util.*; /** * Take the independent estimates of pNonRef for each alt allele and combine them into a single result * + * TODO -- add more docs + * * @param sortedResultsWithThetaNPriors the pNonRef result for each allele independently */ protected AFCalcResult combineIndependentPNonRefs(final VariantContext vc, diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ReferenceDiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ReferenceDiploidExactAFCalc.java index 4de983508..b4e7b2ab1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ReferenceDiploidExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ReferenceDiploidExactAFCalc.java @@ -1,13 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - public class ReferenceDiploidExactAFCalc extends DiploidExactAFCalc { protected ReferenceDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) { super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy); } - - protected StateTracker makeMaxLikelihood(final VariantContext vc, final AFCalcResultTracker resultTracker) { - return new StateTracker(); - } } From c74d7061fe389510187cf4aa362a0c6028d7591d Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 16 Oct 2012 08:10:22 -0400 Subject: [PATCH 80/90] Added AFCalcResultUnitTest -- Ensures that the posteriors remain within reasonable ranges. Fixed bug where normalization of posteriors = {-1e30, 0.0} => {-100000, 0.0} which isn't good. Now tests ensure that the normalization process preserves log10 precision where possible -- Updated MathUtils to make this possible --- .../afcalc/AFCalcResultUnitTest.java | 77 +++++++++++++++++++ .../genotyper/afcalc/AFCalcUnitTest.java | 37 +++++---- .../genotyper/afcalc/AFCalcResult.java | 10 +-- .../broadinstitute/sting/utils/MathUtils.java | 8 +- 4 files changed, 103 insertions(+), 29 deletions(-) create mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java new file mode 100644 index 000000000..1070642e9 --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java @@ -0,0 +1,77 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +public class AFCalcResultUnitTest extends BaseTest { + private static class MyTest { + final double[] Ls, expectedPosteriors; + + private MyTest(double[] ls, double[] expectedPosteriors) { + Ls = ls; + this.expectedPosteriors = expectedPosteriors; + } + + @Override + public String toString() { + return "Ls [" + Utils.join(",", Ls) + "] expectedPosteriors [" + Utils.join(",", expectedPosteriors) + "]"; + } + } + + @DataProvider(name = "TestComputePosteriors") + public Object[][] makeTestCombineGLs() { + List tests = new ArrayList(); + + tests.add(new Object[]{new MyTest(log10Even, log10Even)}); + + for ( double L0 = -1e9; L0 < 0.0; L0 /= 10.0 ) { + for ( double L1 = -1e2; L1 < 0.0; L1 /= 100.0 ) { + final double[] input = new double[]{L0, L1}; + final double[] expected = MathUtils.normalizeFromLog10(input, true); + tests.add(new Object[]{new MyTest(input, expected)}); + } + } + + for ( double bigBadL = -1e50; bigBadL < -1e200; bigBadL *= 10 ) { + // test that a huge bad likelihood remains, even with a massive better result + for ( final double betterL : Arrays.asList(-1000.0, -100.0, -10.0, -1.0, -0.1, -0.01, -0.001, 0.0)) { + tests.add(new Object[]{new MyTest(new double[]{bigBadL, betterL}, new double[]{bigBadL, 0.0})}); + tests.add(new Object[]{new MyTest(new double[]{betterL, bigBadL}, new double[]{0.0, bigBadL})}); + } + } + + // test that a modest bad likelihood with an ~0.0 value doesn't get lost + for ( final double badL : Arrays.asList(-10000.0, -1000.0, -100.0, -10.0)) { + tests.add(new Object[]{new MyTest(new double[]{badL, -1e-9}, new double[]{badL, 0.0})}); + tests.add(new Object[]{new MyTest(new double[]{-1e-9, badL}, new double[]{0.0, badL})}); + } + + return tests.toArray(new Object[][]{}); + } + + + final static double[] log10Even = MathUtils.normalizeFromLog10(new double[]{0.5, 0.5}, true); + final static Allele C = Allele.create("C"); + final static List alleles = Arrays.asList(Allele.create("A", true), C); + + @Test(enabled = true, dataProvider = "TestComputePosteriors") + private void testComputingPosteriors(final MyTest data) { + final AFCalcResult result = new AFCalcResult(new int[]{0}, 1, alleles, data.Ls, log10Even, Collections.singletonMap(C, -1.0)); + + Assert.assertEquals(result.getLog10PosteriorOfAFEq0(), data.expectedPosteriors[0], 1e-3, "AF = 0 not expected"); + Assert.assertEquals(result.getLog10PosteriorOfAFGT0(), data.expectedPosteriors[1], 1e-3, "AF > 0 not expected"); + + final double[] actualPosteriors = new double[]{result.getLog10PosteriorOfAFEq0(), result.getLog10PosteriorOfAFGT0()}; + Assert.assertEquals(MathUtils.sumLog10(actualPosteriors), 1.0, 1e-3, "Posteriors don't sum to 1 with 1e-3 precision"); + } +} diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java index e2407989b..25df0f6d2 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java @@ -446,7 +446,7 @@ public class AFCalcUnitTest extends BaseTest { return tests.toArray(new Object[][]{}); } - @Test(enabled = true & ! DEBUG_ONLY, dataProvider = "Models") + @Test(enabled = true && !DEBUG_ONLY, dataProvider = "Models") public void testBiallelicPriors(final AFCalc model) { for ( int REF_PL = 10; REF_PL <= 20; REF_PL += 10 ) { @@ -454,26 +454,29 @@ public class AFCalcUnitTest extends BaseTest { for ( int log10NonRefPrior = 1; log10NonRefPrior < 10*REF_PL; log10NonRefPrior += 1 ) { final double refPrior = 1 - QualityUtils.qualToErrorProb(log10NonRefPrior); - final double[] priors = MathUtils.normalizeFromLog10(MathUtils.toLog10(new double[]{refPrior, (1-refPrior) / 2, (1-refPrior) / 2}), true); - GetGLsTest cfg = new GetGLsTest(model, 1, Arrays.asList(AB), priors, "pNonRef" + log10NonRefPrior); - final AFCalcResult resultTracker = cfg.execute(); - final int actualAC = resultTracker.getAlleleCountsOfMLE()[0]; + final double nonRefPrior = (1-refPrior) / 2; + final double[] priors = MathUtils.normalizeFromLog10(MathUtils.toLog10(new double[]{refPrior, nonRefPrior, nonRefPrior}), true); + if ( ! Double.isInfinite(priors[1]) ) { + GetGLsTest cfg = new GetGLsTest(model, 1, Arrays.asList(AB), priors, "pNonRef" + log10NonRefPrior); + final AFCalcResult resultTracker = cfg.execute(); + final int actualAC = resultTracker.getAlleleCountsOfMLE()[0]; - final double pRefWithPrior = AB.getLikelihoods().getAsVector()[0] + priors[0]; - final double pHetWithPrior = AB.getLikelihoods().getAsVector()[1] + priors[1] - Math.log10(0.5); - final double nonRefPost = Math.pow(10, pHetWithPrior) / (Math.pow(10, pRefWithPrior) + Math.pow(10, pHetWithPrior)); - final double log10NonRefPost = Math.log10(nonRefPost); + final double pRefWithPrior = AB.getLikelihoods().getAsVector()[0] + priors[0]; + final double pHetWithPrior = AB.getLikelihoods().getAsVector()[1] + priors[1] - Math.log10(0.5); + final double nonRefPost = Math.pow(10, pHetWithPrior) / (Math.pow(10, pRefWithPrior) + Math.pow(10, pHetWithPrior)); + final double log10NonRefPost = Math.log10(nonRefPost); - if ( ! Double.isInfinite(log10NonRefPost) ) - Assert.assertEquals(resultTracker.getLog10PosteriorOfAFGT0(), log10NonRefPost, 1e-2); + if ( ! Double.isInfinite(log10NonRefPost) ) + Assert.assertEquals(resultTracker.getLog10PosteriorOfAFGT0(), log10NonRefPost, 1e-2); - if ( nonRefPost >= 0.9 ) - Assert.assertTrue(resultTracker.isPolymorphic(C, -1)); + if ( nonRefPost >= 0.9 ) + Assert.assertTrue(resultTracker.isPolymorphic(C, -1)); - final int expectedMLEAC = 1; // the MLE is independent of the prior - Assert.assertEquals(actualAC, expectedMLEAC, - "actual AC with priors " + log10NonRefPrior + " not expected " - + expectedMLEAC + " priors " + Utils.join(",", priors)); + final int expectedMLEAC = 1; // the MLE is independent of the prior + Assert.assertEquals(actualAC, expectedMLEAC, + "actual AC with priors " + log10NonRefPrior + " not expected " + + expectedMLEAC + " priors " + Utils.join(",", priors)); + } } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java index 209a21d82..c737416c5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java @@ -271,15 +271,7 @@ public class AFCalcResult { final double[] log10UnnormalizedPosteriors = new double[log10LikelihoodsOfAC.length]; for ( int i = 0; i < log10LikelihoodsOfAC.length; i++ ) log10UnnormalizedPosteriors[i] = log10LikelihoodsOfAC[i] + log10PriorsOfAC[i]; - - // necessary because the posteriors may be so skewed that the log-space normalized value isn't - // good, so we have to try both log-space normalization as well as the real-space normalization if the - // result isn't good - final double[] logNormalized = MathUtils.normalizeFromLog10(log10UnnormalizedPosteriors, true, false); - if ( goodLog10ProbVector(logNormalized, logNormalized.length, true) ) - return logNormalized; - else - return MathUtils.normalizeFromLog10(log10UnnormalizedPosteriors, true, true); + return MathUtils.normalizeFromLog10(log10UnnormalizedPosteriors, true, false); } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index a1d6907a2..3740d5d7c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -596,7 +596,6 @@ public class MathUtils { if (keepInLogSpace) { for (int i = 0; i < array.length; i++) { array[i] -= maxValue; - array[i] = Math.max(array[i], LOG10_P_OF_ZERO); } return array; } @@ -613,8 +612,11 @@ public class MathUtils { sum += normalized[i]; for (int i = 0; i < array.length; i++) { double x = normalized[i] / sum; - if (takeLog10OfOutput) - x = Math.max(Math.log10(x), LOG10_P_OF_ZERO); + if (takeLog10OfOutput) { + x = Math.log10(x); + if ( x < LOG10_P_OF_ZERO || Double.isInfinite(x) ) + x = array[i] - maxValue; + } normalized[i] = x; } From 9bcefadd4efaf6094f51268d2113c3328d27a585 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 16 Oct 2012 13:30:09 -0400 Subject: [PATCH 82/90] Refactor ExactCallLogger into a separate class -- Update minor integration tests with NanoSchedule due to qual accuracy update --- .../afcalc/AFCalcPerformanceTest.java | 11 +- .../gatk/walkers/genotyper/afcalc/AFCalc.java | 145 +-------------- .../genotyper/afcalc/ExactCallLogger.java | 166 ++++++++++++++++++ .../NanoSchedulerIntegrationTest.java | 2 +- 4 files changed, 180 insertions(+), 144 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java index f019d8f8e..fab26c9d2 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java @@ -18,11 +18,8 @@ import java.io.*; import java.util.*; /** - * Created with IntelliJ IDEA. - * User: depristo - * Date: 10/2/12 - * Time: 10:25 AM - * To change this template use File | Settings | File Templates. + * A simple GATK utility (i.e, runs from command-line) for assessing the performance of + * the exact model */ public class AFCalcPerformanceTest { final static Logger logger = Logger.getLogger(AFCalcPerformanceTest.class); @@ -222,9 +219,9 @@ public class AFCalcPerformanceTest { final CachingIndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(ref); final GenomeLocParser parser = new GenomeLocParser(seq); final BufferedReader reader = new BufferedReader(new FileReader(exactLogFile)); - final List loggedCalls = AFCalc.readExactLog(reader, startsToUse, parser); + final List loggedCalls = ExactCallLogger.readExactLog(reader, startsToUse, parser); - for ( final AFCalc.ExactCall call : loggedCalls ) { + for ( final ExactCallLogger.ExactCall call : loggedCalls ) { final AFCalcTestBuilder testBuilder = new AFCalcTestBuilder(call.vc.getNSamples(), 1, AFCalcFactory.Calculation.EXACT_INDEPENDENT, AFCalcTestBuilder.PriorType.human); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java index 8cb6bcabc..07f88c9e3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java @@ -28,20 +28,17 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.sting.utils.SimpleTimer; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.io.*; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.LinkedList; +import java.io.File; import java.util.List; /** * Generic interface for calculating the probability of alleles segregating given priors and genotype likelihoods - * */ public abstract class AFCalc implements Cloneable { private final static Logger defaultLogger = Logger.getLogger(AFCalc.class); @@ -53,8 +50,8 @@ public abstract class AFCalc implements Cloneable { protected Logger logger = defaultLogger; private SimpleTimer callTimer = new SimpleTimer(); - private PrintStream callReport = null; private final AFCalcResultTracker resultTracker; + private ExactCallLogger exactCallLogger = null; protected AFCalc(final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) { if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples); @@ -69,7 +66,7 @@ public abstract class AFCalc implements Cloneable { } public void enableProcessLog(final File exactCallsLog) { - initializeOutputFile(exactCallsLog); + exactCallLogger = new ExactCallLogger(exactCallsLog); } public void setLogger(Logger logger) { @@ -97,8 +94,8 @@ public abstract class AFCalc implements Cloneable { final AFCalcResult result = computeLog10PNonRef(vcWorking, log10AlleleFrequencyPriors); final long nanoTime = callTimer.getElapsedTimeNano(); - if ( callReport != null ) - printCallInfo(vcWorking, log10AlleleFrequencyPriors, nanoTime, resultTracker.getLog10PosteriorOfAFzero()); + if ( exactCallLogger != null ) + exactCallLogger.printCallInfo(vcWorking, log10AlleleFrequencyPriors, nanoTime, result); return result; } @@ -165,130 +162,6 @@ public abstract class AFCalc implements Cloneable { return Math.max(maxAlternateAllelesToGenotype, maxAlternateAllelesForIndels); } - - // --------------------------------------------------------------------------- - // - // Print information about the call to the calls log - // - // --------------------------------------------------------------------------- - - private void initializeOutputFile(final File outputFile) { - try { - if (outputFile != null) { - callReport = new PrintStream( new FileOutputStream(outputFile) ); - callReport.println(Utils.join("\t", Arrays.asList("loc", "variable", "key", "value"))); - } - } catch ( FileNotFoundException e ) { - throw new UserException.CouldNotCreateOutputFile(outputFile, e); - } - } - - private void printCallInfo(final VariantContext vc, - final double[] log10AlleleFrequencyPriors, - final long runtimeNano, - final double log10PosteriorOfAFzero) { - printCallElement(vc, "type", "ignore", vc.getType()); - - int allelei = 0; - for ( final Allele a : vc.getAlleles() ) - printCallElement(vc, "allele", allelei++, a.getDisplayString()); - - for ( final Genotype g : vc.getGenotypes() ) - printCallElement(vc, "PL", g.getSampleName(), g.getLikelihoodsString()); - - for ( int priorI = 0; priorI < log10AlleleFrequencyPriors.length; priorI++ ) - printCallElement(vc, "priorI", priorI, log10AlleleFrequencyPriors[priorI]); - - printCallElement(vc, "runtime.nano", "ignore", runtimeNano); - printCallElement(vc, "log10PosteriorOfAFzero", "ignore", log10PosteriorOfAFzero); - - callReport.flush(); - } - - private void printCallElement(final VariantContext vc, - final Object variable, - final Object key, - final Object value) { - final String loc = String.format("%s:%d", vc.getChr(), vc.getStart()); - callReport.println(Utils.join("\t", Arrays.asList(loc, variable, key, value))); - } - - public static class ExactCall { - final VariantContext vc; - final long origNanoTime; - long newNanoTime = -1; - final double origPNonRef; - double newPNonRef = -1; - - public ExactCall(VariantContext vc, long origNanoTime, double origPNonRef) { - this.vc = vc; - this.origNanoTime = origNanoTime; - this.origPNonRef = origPNonRef; - } - - @Override - public String toString() { - return String.format("ExactCall %s:%d alleles=%s nSamples=%s orig.pNonRef=%.2f orig.runtime=%s new.pNonRef=%.2f new.runtime=%s", - vc.getChr(), vc.getStart(), vc.getAlleles(), vc.getNSamples(), - origPNonRef, - new AutoFormattingTime(origNanoTime / 1e9).toString(), - newPNonRef, - newNanoTime == -1 ? "not.run" : new AutoFormattingTime(newNanoTime / 1e9).toString()); - } - } - - public static List readExactLog(final BufferedReader reader, final List startsToKeep, GenomeLocParser parser) throws IOException { - List calls = new LinkedList(); - - // skip the header line - reader.readLine(); - - while ( true ) { - final VariantContextBuilder builder = new VariantContextBuilder(); - final List alleles = new ArrayList(); - final List genotypes = new ArrayList(); - long runtimeNano = -1; - - GenomeLoc currentLoc = null; - while ( true ) { - final String line = reader.readLine(); - if ( line == null ) - return calls; - - final String[] parts = line.split("\t"); - final GenomeLoc lineLoc = parser.parseGenomeLoc(parts[0]); - final String variable = parts[1]; - final String key = parts[2]; - final String value = parts[3]; - - if ( currentLoc == null ) - currentLoc = lineLoc; - - if ( variable.equals("log10PosteriorOfAFzero") ) { - if ( startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart()) ) { - builder.alleles(alleles); - final int stop = currentLoc.getStart() + alleles.get(0).length() - 1; - builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop); - builder.genotypes(genotypes); - calls.add(new ExactCall(builder.make(), runtimeNano, Double.valueOf(value))); - } - break; - } else if ( variable.equals("allele") ) { - final boolean isRef = key.equals("0"); - alleles.add(Allele.create(value, isRef)); - } else if ( variable.equals("PL") ) { - final GenotypeBuilder gb = new GenotypeBuilder(key); - gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs()); - genotypes.add(gb.make()); - } else if ( variable.equals("runtime.nano") ) { - runtimeNano = Long.valueOf(value); - } else { - // nothing to do - } - } - } - } - public AFCalcResultTracker getResultTracker() { return resultTracker; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java new file mode 100644 index 000000000..3794ba240 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java @@ -0,0 +1,166 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; + +import org.broadinstitute.sting.utils.AutoFormattingTime; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.*; + +import java.io.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; + +/** + * Allows us to write out and read in information about exact calls (site, alleles, PLs, etc) in tabular format + */ +public class ExactCallLogger implements Cloneable { + private PrintStream callReport = null; + + /** + * Create a new ExactCallLogger writing it's output to outputFile + * + * @param outputFile + */ + public ExactCallLogger(final File outputFile) { + try { + callReport = new PrintStream(new FileOutputStream(outputFile)); + callReport.println(Utils.join("\t", Arrays.asList("loc", "variable", "key", "value"))); + } catch (FileNotFoundException e) { + throw new UserException.CouldNotCreateOutputFile(outputFile, e); + } + } + + /** + * Summarizes information about an exact call that happened + */ + public static class ExactCall { + final VariantContext vc; + final long origNanoTime; + long newNanoTime = -1; + final double origPNonRef; + double newPNonRef = -1; + + public ExactCall(VariantContext vc, long origNanoTime, double origPNonRef) { + this.vc = vc; + this.origNanoTime = origNanoTime; + this.origPNonRef = origPNonRef; + } + + @Override + public String toString() { + return String.format("ExactCall %s:%d alleles=%s nSamples=%s orig.pNonRef=%.2f orig.runtime=%s new.pNonRef=%.2f new.runtime=%s", + vc.getChr(), vc.getStart(), vc.getAlleles(), vc.getNSamples(), + origPNonRef, + new AutoFormattingTime(origNanoTime / 1e9).toString(), + newPNonRef, + newNanoTime == -1 ? "not.run" : new AutoFormattingTime(newNanoTime / 1e9).toString()); + } + } + + protected void printCallInfo(final VariantContext vc, + final double[] log10AlleleFrequencyPriors, + final long runtimeNano, + final AFCalcResult result) { + printCallElement(vc, "type", "ignore", vc.getType()); + + int allelei = 0; + for (final Allele a : vc.getAlleles()) + printCallElement(vc, "allele", allelei++, a.getDisplayString()); + + for (final Genotype g : vc.getGenotypes()) + printCallElement(vc, "PL", g.getSampleName(), g.getLikelihoodsString()); + + for (int priorI = 0; priorI < log10AlleleFrequencyPriors.length; priorI++) + printCallElement(vc, "priorI", priorI, log10AlleleFrequencyPriors[priorI]); + + printCallElement(vc, "runtime.nano", "ignore", runtimeNano); + printCallElement(vc, "log10PosteriorOfAFEq0", "ignore", result.getLog10PosteriorOfAFEq0()); + printCallElement(vc, "log10PosteriorOfAFGt0", "ignore", result.getLog10PosteriorOfAFGT0()); + + for ( final Allele allele : result.getAllelesUsedInGenotyping() ) { + if ( allele.isNonReference() ) { + printCallElement(vc, "MLE", allele, result.getAlleleCountAtMLE(allele)); + printCallElement(vc, "pNonRefByAllele", allele, result.getLog10PosteriorOfAFGt0ForAllele(allele)); + } + } + + callReport.flush(); + } + + private void printCallElement(final VariantContext vc, + final Object variable, + final Object key, + final Object value) { + final String loc = String.format("%s:%d", vc.getChr(), vc.getStart()); + callReport.println(Utils.join("\t", Arrays.asList(loc, variable, key, value))); + } + + /** + * Read in a list of ExactCall objects from reader, keeping only those + * with starts in startsToKeep or all sites (if this is empty) + * + * @param reader + * @param startsToKeep + * @param parser + * @return + * @throws IOException + */ + public static List readExactLog(final BufferedReader reader, final List startsToKeep, GenomeLocParser parser) throws IOException { + if ( reader == null ) throw new IllegalArgumentException("reader cannot be null"); + if ( startsToKeep == null ) throw new IllegalArgumentException("startsToKeep cannot be null"); + if ( parser == null ) throw new IllegalArgumentException("GenomeLocParser cannot be null"); + + List calls = new LinkedList(); + + // skip the header line + reader.readLine(); + + while (true) { + final VariantContextBuilder builder = new VariantContextBuilder(); + final List alleles = new ArrayList(); + final List genotypes = new ArrayList(); + long runtimeNano = -1; + + GenomeLoc currentLoc = null; + while (true) { + final String line = reader.readLine(); + if (line == null) + return calls; + + final String[] parts = line.split("\t"); + final GenomeLoc lineLoc = parser.parseGenomeLoc(parts[0]); + final String variable = parts[1]; + final String key = parts[2]; + final String value = parts[3]; + + if (currentLoc == null) + currentLoc = lineLoc; + + if (variable.equals("log10PosteriorOfAFzero")) { + if (startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart())) { + builder.alleles(alleles); + final int stop = currentLoc.getStart() + alleles.get(0).length() - 1; + builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop); + builder.genotypes(genotypes); + calls.add(new ExactCall(builder.make(), runtimeNano, Double.valueOf(value))); + } + break; + } else if (variable.equals("allele")) { + final boolean isRef = key.equals("0"); + alleles.add(Allele.create(value, isRef)); + } else if (variable.equals("PL")) { + final GenotypeBuilder gb = new GenotypeBuilder(key); + gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs()); + genotypes.add(gb.make()); + } else if (variable.equals("runtime.nano")) { + runtimeNano = Long.valueOf(value); + } else { + // nothing to do + } + } + } + } +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java index 93099f82a..c1b28314c 100755 --- a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java @@ -21,7 +21,7 @@ public class NanoSchedulerIntegrationTest extends WalkerTest { for ( final int nct : Arrays.asList(1, 2) ) { // tests.add(new Object[]{ "SNP", "a1c7546f32a8919a3f3a70a04b2e8322", nt, nct }); //// tests.add(new Object[]{ "INDEL", "0a6d2be79f4f8a4b0eb788cc4751b31b", nt, nct }); - tests.add(new Object[]{ "BOTH", "f8184e336dec7632408aa9afa98e6914", nt, nct }); + tests.add(new Object[]{ "BOTH", "8cad82c3a5f5b932042933f136663c8a", nt, nct }); } return tests.toArray(new Object[][]{}); From b30e2a5b7dc5387df5b8da91a9f8b802455de18b Mon Sep 17 00:00:00 2001 From: David Roazen Date: Mon, 15 Oct 2012 11:53:57 -0400 Subject: [PATCH 84/90] BQSR: tool to profile the effects of more-granular locking on scalability by # of threads --- .../LoggingNestedIntegerArray.java | 43 ++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/collections/LoggingNestedIntegerArray.java b/public/java/src/org/broadinstitute/sting/utils/collections/LoggingNestedIntegerArray.java index 617391714..6fda0245b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/collections/LoggingNestedIntegerArray.java +++ b/public/java/src/org/broadinstitute/sting/utils/collections/LoggingNestedIntegerArray.java @@ -34,7 +34,12 @@ import java.io.PrintStream; * to the provided output stream. For testing/debugging purposes. * * Log entries are of the following form (fields are tab-separated): - * LABEL VALUE KEY1 KEY2 ... KEY_N + * LABEL OPERATION VALUE KEY1 KEY2 ... KEY_N + * + * A header line is written before the log entries giving the dimensions of this NestedIntegerArray. + * It has the form: + * + * # LABEL SIZE_OF_FIRST_DIMENSION SIZE_OF_SECOND_DIMENSION ... SIZE_OF_NTH_DIMENSION * * @author David Roazen */ @@ -43,6 +48,9 @@ public class LoggingNestedIntegerArray extends NestedIntegerArray { private PrintStream log; private String logEntryLabel; + public static final String HEADER_LINE_PREFIX = "# "; + public enum NestedIntegerArrayOperation { GET, PUT }; + /** * * @param log output stream to which to log update operations @@ -57,6 +65,37 @@ public class LoggingNestedIntegerArray extends NestedIntegerArray { } this.log = log; this.logEntryLabel = logEntryLabel != null ? logEntryLabel : ""; + + // Write the header line recording the dimensions of this NestedIntegerArray: + StringBuilder logHeaderLine = new StringBuilder(); + + logHeaderLine.append(HEADER_LINE_PREFIX); + logHeaderLine.append(this.logEntryLabel); + for ( int dimension : dimensions ) { + logHeaderLine.append("\t"); + logHeaderLine.append(dimension); + } + + this.log.println(logHeaderLine.toString()); + } + + @Override + public T get( final int... keys ) { + StringBuilder logEntry = new StringBuilder(); + + logEntry.append(logEntryLabel); + logEntry.append("\t"); + logEntry.append(NestedIntegerArrayOperation.GET); + logEntry.append("\t"); // empty field for the datum value + + for ( int key : keys ) { + logEntry.append("\t"); + logEntry.append(key); + } + + log.println(logEntry.toString()); + + return super.get(keys); } @Override @@ -67,6 +106,8 @@ public class LoggingNestedIntegerArray extends NestedIntegerArray { logEntry.append(logEntryLabel); logEntry.append("\t"); + logEntry.append(NestedIntegerArrayOperation.PUT); + logEntry.append("\t"); logEntry.append(value); for ( int key : keys ) { logEntry.append("\t"); From f93b2791511070afa5af1808426305365a4eb28f Mon Sep 17 00:00:00 2001 From: kshakir Date: Tue, 16 Oct 2012 18:49:10 -0400 Subject: [PATCH 85/90] Moved the class field caching from QScript to a ClassFieldCache utility. Using ClassFieldCache to pull values from QScript for passing to done() method of QStatusMessenger. --- .../sting/queue/QCommandLine.scala | 11 +- .../broadinstitute/sting/queue/QScript.scala | 37 ++-- .../sting/queue/engine/QStatusMessenger.scala | 5 +- .../extensions/gatk/BamGatherFunction.scala | 7 +- .../extensions/gatk/VcfGatherFunction.scala | 5 +- .../sting/queue/function/QFunction.scala | 80 +------- .../scattergather/CloneFunction.scala | 9 +- .../sting/queue/util/ClassFieldCache.scala | 183 ++++++++++++++++++ 8 files changed, 227 insertions(+), 110 deletions(-) create mode 100644 public/scala/src/org/broadinstitute/sting/queue/util/ClassFieldCache.scala diff --git a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala index f4c4b613f..5b84bfd16 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala @@ -100,7 +100,7 @@ class QCommandLine extends CommandLineProgram with Logging { new PluginManager[QStatusMessenger](classOf[QStatusMessenger]) } - QFunction.parsingEngine = new ParsingEngine(this) + ClassFieldCache.parsingEngine = new ParsingEngine(this) /** * Takes the QScripts passed in, runs their script() methods, retrieves their generated @@ -127,6 +127,9 @@ class QCommandLine extends CommandLineProgram with Logging { for (script <- allQScripts) { logger.info("Scripting " + qScriptPluginManager.getName(script.getClass.asSubclass(classOf[QScript]))) loadArgumentsIntoObject(script) + // TODO: Pulling inputs can be time/io expensive! Some scripts are using the files to generate functions-- even for dry runs-- so pull it all down for now. + //if (settings.run) + script.pullInputs() script.qSettings = settings.qSettings try { script.script() @@ -138,10 +141,6 @@ class QCommandLine extends CommandLineProgram with Logging { logger.info("Added " + script.functions.size + " functions") } - if (settings.run) { - allQScripts.foreach(_.pullInputs()) - } - // Execute the job graph qGraph.run() @@ -170,7 +169,7 @@ class QCommandLine extends CommandLineProgram with Logging { if (settings.run) { allQScripts.foreach(_.pushOutputs()) for (statusMessenger <- allStatusMessengers) - statusMessenger.done() + statusMessenger.done(allQScripts.map(_.remoteOutputs)) } 0 } diff --git a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala index da24b854e..ee2089dc5 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala @@ -28,8 +28,7 @@ import engine.JobRunInfo import org.broadinstitute.sting.queue.function.QFunction import annotation.target.field import util._ -import org.broadinstitute.sting.utils.classloader.JVMUtils -import java.lang.reflect.Field +import org.broadinstitute.sting.commandline.ArgumentSource /** * Defines a Queue pipeline as a collection of CommandLineFunctions. @@ -110,31 +109,29 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon } def pullInputs() { - val inputs = getInputs - inputs.filter(_.isInstanceOf[RemoteFile]).map(_.asInstanceOf[RemoteFile]).foreach(_.pullToLocal()) + val inputs = ClassFieldCache.getFieldFiles(this, inputFields) + filterRemoteFiles(inputs).foreach(_.pullToLocal()) } def pushOutputs() { - val outputs = getOutputs - outputs.filter(_.isInstanceOf[RemoteFile]).map(_.asInstanceOf[RemoteFile]).foreach(_.pushToRemote()) + val outputs = ClassFieldCache.getFieldFiles(this, outputFields) + filterRemoteFiles(outputs).foreach(_.pushToRemote()) } - private def getInputs: Seq[File] = { - getFieldValues(classOf[Input]) - } + def remoteOutputs: Map[ArgumentSource, Seq[RemoteFile]] = + outputFields.map(field => (field -> filterRemoteFiles(ClassFieldCache.getFieldFiles(this, field)))).filter(tuple => !tuple._2.isEmpty).toMap - private def getOutputs: Seq[File] = { - getFieldValues(classOf[Output]) - } + private def filterRemoteFiles(fields: Seq[File]): Seq[RemoteFile] = + fields.filter(field => field != null && field.isInstanceOf[RemoteFile]).map(_.asInstanceOf[RemoteFile]) - private def getFieldValues(annotation: Class[_ <: java.lang.annotation.Annotation]): Seq[File] = { - val filtered: Seq[Field] = fields.filter(field => ReflectionUtils.hasAnnotation(field, annotation)) - val files = filtered.filter(field => classOf[File].isAssignableFrom(field.getType)).map(field => ReflectionUtils.getValue(this, field).asInstanceOf[File]) - val seqFiles = filtered.filter(field => classOf[Seq[File]].isAssignableFrom(field.getType)).map(field => ReflectionUtils.getValue(this, field).asInstanceOf[Seq[File]]) - seqFiles.foldLeft(files)(_ ++ _).filter(_ != null) - } - - private lazy val fields = collection.JavaConversions.asScalaBuffer(JVMUtils.getAllFields(this.getClass)).toSeq + /** The complete list of fields. */ + def functionFields: Seq[ArgumentSource] = ClassFieldCache.classFunctionFields(this.getClass) + /** The @Input fields. */ + def inputFields: Seq[ArgumentSource] = ClassFieldCache.classInputFields(this.getClass) + /** The @Output fields. */ + def outputFields: Seq[ArgumentSource] = ClassFieldCache.classOutputFields(this.getClass) + /** The @Argument fields. */ + def argumentFields: Seq[ArgumentSource] = ClassFieldCache.classArgumentFields(this.getClass) } object QScript { diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala index c61f2ef1f..eeabe6d1d 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala @@ -1,10 +1,13 @@ package org.broadinstitute.sting.queue.engine +import org.broadinstitute.sting.commandline.ArgumentSource +import org.broadinstitute.sting.queue.util.RemoteFile + /** * Plugin to sends QStatus messages */ trait QStatusMessenger { def started() - def done() + def done(files: Seq[Map[ArgumentSource, Seq[RemoteFile]]]) def exit(message: String) } diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamGatherFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamGatherFunction.scala index 9522ec86c..a59f273ad 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamGatherFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamGatherFunction.scala @@ -28,6 +28,7 @@ import org.broadinstitute.sting.queue.function.scattergather.GatherFunction import org.broadinstitute.sting.queue.extensions.picard.PicardBamFunction import org.broadinstitute.sting.queue.function.{RetryMemoryLimit, QFunction} import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor +import org.broadinstitute.sting.queue.util.ClassFieldCache /** * Merges BAM files using net.sf.picard.sam.MergeSamFiles. @@ -47,13 +48,13 @@ class BamGatherFunction extends GatherFunction with PicardBamFunction with Retry // bam_compression and index_output_bam_on_the_fly from SAMFileWriterArgumentTypeDescriptor // are added by the GATKExtensionsGenerator to the subclass of CommandLineGATK - val compression = QFunction.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.COMPRESSION_FULLNAME) + val compression = ClassFieldCache.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.COMPRESSION_FULLNAME) this.compressionLevel = originalGATK.getFieldValue(compression).asInstanceOf[Option[Int]] - val disableIndex = QFunction.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.DISABLE_INDEXING_FULLNAME) + val disableIndex = ClassFieldCache.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.DISABLE_INDEXING_FULLNAME) this.createIndex = Some(!originalGATK.getFieldValue(disableIndex).asInstanceOf[Boolean]) - val enableMD5 = QFunction.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.ENABLE_MD5_FULLNAME) + val enableMD5 = ClassFieldCache.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.ENABLE_MD5_FULLNAME) this.createMD5 = Some(originalGATK.getFieldValue(enableMD5).asInstanceOf[Boolean]) super.freezeFieldValues() diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala index 75be4d773..fb22554f0 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala @@ -27,6 +27,7 @@ package org.broadinstitute.sting.queue.extensions.gatk import org.broadinstitute.sting.queue.function.scattergather.GatherFunction import org.broadinstitute.sting.queue.function.{RetryMemoryLimit, QFunction} import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor +import org.broadinstitute.sting.queue.util.ClassFieldCache /** * Merges a vcf text file. @@ -46,10 +47,10 @@ class VcfGatherFunction extends CombineVariants with GatherFunction with RetryMe // NO_HEADER and sites_only from VCFWriterArgumentTypeDescriptor // are added by the GATKExtensionsGenerator to the subclass of CommandLineGATK - val noHeader = QFunction.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.NO_HEADER_ARG_NAME) + val noHeader = ClassFieldCache.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.NO_HEADER_ARG_NAME) this.no_cmdline_in_header = originalGATK.getFieldValue(noHeader).asInstanceOf[Boolean] - val sitesOnly = QFunction.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.SITES_ONLY_ARG_NAME) + val sitesOnly = ClassFieldCache.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.SITES_ONLY_ARG_NAME) this.sites_only = originalGATK.getFieldValue(sitesOnly).asInstanceOf[Boolean] // ensure that the gather function receives the same unsafe parameter as the scattered function diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala index aae846534..3849b976a 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala @@ -28,7 +28,6 @@ import java.io.File import java.lang.annotation.Annotation import org.broadinstitute.sting.commandline._ import org.broadinstitute.sting.queue.{QException, QSettings} -import collection.JavaConversions._ import java.lang.IllegalStateException import org.broadinstitute.sting.queue.util._ import org.broadinstitute.sting.utils.io.IOUtils @@ -194,13 +193,13 @@ trait QFunction extends Logging with QJobReport { def failOutputs: Seq[File] = statusPrefixes.map(path => new File(path + ".fail")) /** The complete list of fields on this CommandLineFunction. */ - def functionFields = QFunction.classFields(this.functionFieldClass).functionFields + def functionFields: Seq[ArgumentSource] = ClassFieldCache.classFunctionFields(this.functionFieldClass) /** The @Input fields on this CommandLineFunction. */ - def inputFields = QFunction.classFields(this.functionFieldClass).inputFields + def inputFields: Seq[ArgumentSource] = ClassFieldCache.classInputFields(this.functionFieldClass) /** The @Output fields on this CommandLineFunction. */ - def outputFields = QFunction.classFields(this.functionFieldClass).outputFields + def outputFields: Seq[ArgumentSource] = ClassFieldCache.classOutputFields(this.functionFieldClass) /** The @Argument fields on this CommandLineFunction. */ - def argumentFields = QFunction.classFields(this.functionFieldClass).argumentFields + def argumentFields: Seq[ArgumentSource] = ClassFieldCache.classArgumentFields(this.functionFieldClass) /** * Returns the class that should be used for looking up fields. @@ -475,79 +474,12 @@ trait QFunction extends Logging with QJobReport { * @param source Field to get the value for. * @return value of the field. */ - def getFieldValue(source: ArgumentSource) = ReflectionUtils.getValue(invokeObj(source), source.field) + def getFieldValue(source: ArgumentSource) = ClassFieldCache.getFieldValue(this, source) /** * Gets the value of a field. * @param source Field to set the value for. * @return value of the field. */ - def setFieldValue(source: ArgumentSource, value: Any) = ReflectionUtils.setValue(invokeObj(source), source.field, value) - - /** - * Walks gets the fields in this object or any collections in that object - * recursively to find the object holding the field to be retrieved or set. - * @param source Field find the invoke object for. - * @return Object to invoke the field on. - */ - private def invokeObj(source: ArgumentSource) = source.parentFields.foldLeft[AnyRef](this)(ReflectionUtils.getValue(_, _)) -} - -object QFunction { - var parsingEngine: ParsingEngine = _ - - /** - * The list of fields defined on a class - * @param clazz The class to lookup fields. - */ - private class ClassFields(clazz: Class[_]) { - /** The complete list of fields on this CommandLineFunction. */ - val functionFields: Seq[ArgumentSource] = parsingEngine.extractArgumentSources(clazz).toSeq - /** The @Input fields on this CommandLineFunction. */ - val inputFields = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Input])) - /** The @Output fields on this CommandLineFunction. */ - val outputFields = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Output])) - /** The @Argument fields on this CommandLineFunction. */ - val argumentFields = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Argument])) - } - - /** - * The mapping from class to fields. - */ - private var classFieldsMap = Map.empty[Class[_], ClassFields] - - /** - * Returns the field on clazz. - * @param clazz Class to search. - * @param name Name of the field to return. - * @return Argument source for the field. - */ - def findField(clazz: Class[_], name: String) = { - classFields(clazz).functionFields.find(_.field.getName == name) match { - case Some(source) => source - case None => throw new QException("Could not find a field on class %s with name %s".format(clazz, name)) - } - } - - /** - * Returns the fields for a class. - * @param clazz Class to retrieve fields for. - * @return the fields for the class. - */ - private def classFields(clazz: Class[_]) = { - classFieldsMap.get(clazz) match { - case Some(classFields) => classFields - case None => - val classFields = new ClassFields(clazz) - classFieldsMap += clazz -> classFields - classFields - } - } - - /** - * Returns the Seq of fields for a QFunction class. - * @param clazz Class to retrieve fields for. - * @return the fields of the class. - */ - def classFunctionFields(clazz: Class[_]) = classFields(clazz).functionFields + def setFieldValue(source: ArgumentSource, value: Any) = ClassFieldCache.setFieldValue(this, source, value) } diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/scattergather/CloneFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/function/scattergather/CloneFunction.scala index 686188e72..91cacbb71 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/function/scattergather/CloneFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/function/scattergather/CloneFunction.scala @@ -25,13 +25,14 @@ package org.broadinstitute.sting.queue.function.scattergather import org.broadinstitute.sting.commandline.ArgumentSource -import org.broadinstitute.sting.queue.function.{QFunction, CommandLineFunction} +import org.broadinstitute.sting.queue.function.CommandLineFunction +import org.broadinstitute.sting.queue.util.ClassFieldCache /** * Shadow clones another command line function. */ object CloneFunction { - private lazy val cloneFunctionFields = QFunction.classFunctionFields(classOf[CloneFunction]) + private lazy val cloneFunctionFields = ClassFieldCache.classFunctionFields(classOf[CloneFunction]) } class CloneFunction extends CommandLineFunction { @@ -76,7 +77,7 @@ class CloneFunction extends CommandLineFunction { def commandLine = withScatterPart(() => originalFunction.commandLine) def getFieldValue(field: String): AnyRef = { - val source = QFunction.findField(originalFunction.getClass, field) + val source = ClassFieldCache.findField(originalFunction.getClass, field) getFieldValue(source) } @@ -98,7 +99,7 @@ class CloneFunction extends CommandLineFunction { } def setFieldValue(field: String, value: Any) { - val source = QFunction.findField(originalFunction.getClass, field) + val source = ClassFieldCache.findField(originalFunction.getClass, field) setFieldValue(source, value) } diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/ClassFieldCache.scala b/public/scala/src/org/broadinstitute/sting/queue/util/ClassFieldCache.scala new file mode 100644 index 000000000..870dd5617 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/util/ClassFieldCache.scala @@ -0,0 +1,183 @@ +package org.broadinstitute.sting.queue.util + +import org.broadinstitute.sting.commandline._ +import scala.Some +import org.broadinstitute.sting.queue.QException +import collection.JavaConversions._ +import java.io.File + +/** + * Utilities and a static cache of argument fields for various classes populated by the parsingEngine. + * Because this class works with the ParsingEngine it can walk @ArgumentCollection hierarchies. + */ +object ClassFieldCache { + var parsingEngine: ParsingEngine = _ + + + // + // Field caching + // + + /** + * The list of fields defined on a class + * @param clazz The class to lookup fields. + */ + private class ClassFields(clazz: Class[_]) { + /** The complete list of fields on this CommandLineFunction. */ + val functionFields: Seq[ArgumentSource] = parsingEngine.extractArgumentSources(clazz).toSeq + /** The @Input fields on this CommandLineFunction. */ + val inputFields: Seq[ArgumentSource] = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Input])) + /** The @Output fields on this CommandLineFunction. */ + val outputFields: Seq[ArgumentSource] = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Output])) + /** The @Argument fields on this CommandLineFunction. */ + val argumentFields: Seq[ArgumentSource] = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Argument])) + } + + /** + * The mapping from class to fields. + */ + private var classFieldsMap = Map.empty[Class[_], ClassFields] + + /** + * Returns the fields for a class. + * @param clazz Class to retrieve fields for. + * @return the fields for the class. + */ + private def classFields(clazz: Class[_]): ClassFields = { + classFieldsMap.get(clazz) match { + case Some(classFields) => classFields + case None => + val classFields = new ClassFields(clazz) + classFieldsMap += clazz -> classFields + classFields + } + } + + /** + * Returns the field on clazz. + * @param clazz Class to search. + * @param name Name of the field to return. + * @return Argument source for the field. + */ + def findField(clazz: Class[_], name: String): ArgumentSource = { + classFields(clazz).functionFields.find(_.field.getName == name) match { + case Some(source) => source + case None => throw new QException("Could not find a field on class %s with name %s".format(clazz, name)) + } + } + + /** + * Returns the Seq of fields for a QFunction class. + * @param clazz Class to retrieve fields for. + * @return the fields of the class. + */ + def classFunctionFields(clazz: Class[_]): Seq[ArgumentSource] = classFields(clazz).functionFields + + /** + * Returns the Seq of inputs for a QFunction class. + * @param clazz Class to retrieve inputs for. + * @return the inputs of the class. + */ + def classInputFields(clazz: Class[_]): Seq[ArgumentSource] = classFields(clazz).inputFields + + /** + * Returns the Seq of outputs for a QFunction class. + * @param clazz Class to retrieve outputs for. + * @return the outputs of the class. + */ + def classOutputFields(clazz: Class[_]): Seq[ArgumentSource] = classFields(clazz).outputFields + + /** + * Returns the Seq of arguments for a QFunction class. + * @param clazz Class to retrieve arguments for. + * @return the arguments of the class. + */ + def classArgumentFields(clazz: Class[_]): Seq[ArgumentSource] = classFields(clazz).argumentFields + + + // + // get/set fields as AnyRef + // + + /** + * Gets the value of a field. + * @param obj Top level object storing the source info. + * @param source Field to get the value for. + * @return value of the field. + */ + def getFieldValue(obj: AnyRef, source: ArgumentSource) = ReflectionUtils.getValue(invokeObj(obj, source), source.field) + + /** + * Gets the value of a field. + * @param obj Top level object storing the source info. + * @param source Field to set the value for. + * @return value of the field. + */ + def setFieldValue(obj: AnyRef, source: ArgumentSource, value: Any) = ReflectionUtils.setValue(invokeObj(obj, source), source.field, value) + + /** + * Walks gets the fields in this object or any collections in that object + * recursively to find the object holding the field to be retrieved or set. + * @param obj Top level object storing the source info. + * @param source Field find the invoke object for. + * @return Object to invoke the field on. + */ + private def invokeObj(obj: AnyRef, source: ArgumentSource) = source.parentFields.foldLeft[AnyRef](obj)(ReflectionUtils.getValue(_, _)) + + + // + // get/set fields as java.io.File + // + + /** + * Gets the files from the fields. The fields must be a File, a FileExtension, or a Seq or Set of either. + * @param obj Top level object storing the source info. + * @param fields Fields to get files. + * @return for the fields. + */ + def getFieldFiles(obj: AnyRef, fields: Seq[ArgumentSource]): Seq[File] = { + var files: Seq[File] = Nil + for (field <- fields) + files ++= getFieldFiles(obj, field) + files.distinct + } + + /** + * Gets the files from the field. The field must be a File, a FileExtension, or a Seq or Set of either. + * @param obj Top level object storing the source info. + * @param field Field to get files. + * @return for the field. + */ + def getFieldFiles(obj: AnyRef, field: ArgumentSource): Seq[File] = { + var files: Seq[File] = Nil + CollectionUtils.foreach(getFieldValue(obj, field), (fieldValue) => { + val file = fieldValueToFile(field, fieldValue) + if (file != null) + files :+= file + }) + files.distinct + } + + /** + * Gets the file from the field. The field must be a File or a FileExtension and not a Seq or Set. + * @param obj Top level object storing the source info. + * @param field Field to get the file. + * @return for the field. + */ + def getFieldFile(obj: AnyRef, field: ArgumentSource): File = + fieldValueToFile(field, getFieldValue(obj, field)) + + /** + * Converts the field value to a file. The field must be a File or a FileExtension. + * @param field Field to get the file. + * @param value Value of the File or FileExtension or null. + * @return Null if value is null, otherwise the File. + * @throws QException if the value is not a File or FileExtension. + */ + private def fieldValueToFile(field: ArgumentSource, value: Any): File = value match { + case file: File => file + case null => null + case unknown => throw new QException("Non-file found. Try removing the annotation, change the annotation to @Argument, or extend File with FileExtension: %s: %s".format(field.field, unknown)) + } + +} From 0196dbeacac298f04c65e024ab5addd137205d9e Mon Sep 17 00:00:00 2001 From: kshakir Date: Wed, 17 Oct 2012 09:52:17 -0400 Subject: [PATCH 86/90] Added more logging to push/pull of RemoteFiles. --- .../src/org/broadinstitute/sting/queue/QScript.scala | 10 ++++++++-- .../broadinstitute/sting/queue/util/RemoteFile.scala | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala index ee2089dc5..2dcfb916c 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala @@ -110,12 +110,18 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon def pullInputs() { val inputs = ClassFieldCache.getFieldFiles(this, inputFields) - filterRemoteFiles(inputs).foreach(_.pullToLocal()) + for (remoteFile <- filterRemoteFiles(inputs)) { + logger.info("Pulling %s from %s".format(remoteFile.getAbsolutePath, remoteFile.remoteDescription)) + remoteFile.pullToLocal() + } } def pushOutputs() { val outputs = ClassFieldCache.getFieldFiles(this, outputFields) - filterRemoteFiles(outputs).foreach(_.pushToRemote()) + for (remoteFile <- filterRemoteFiles(outputs)) { + logger.info("Pushing %s to %s".format(remoteFile.getAbsolutePath, remoteFile.remoteDescription)) + remoteFile.pushToRemote() + } } def remoteOutputs: Map[ArgumentSource, Seq[RemoteFile]] = diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/RemoteFile.scala b/public/scala/src/org/broadinstitute/sting/queue/util/RemoteFile.scala index cfe848ba8..9d94975ba 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/util/RemoteFile.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/util/RemoteFile.scala @@ -10,4 +10,5 @@ trait RemoteFile extends File with FileExtension { def pullToLocal() def pushToRemote() def deleteRemote() + def remoteDescription: String } From c9e7a947c26e0e632b3161eb457408fd75339717 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 17 Oct 2012 08:34:47 -0400 Subject: [PATCH 88/90] Improve interface of ExactCallLogger, use it to have a more informative AFCalcPerformanceTest --- .../afcalc/AFCalcPerformanceTest.java | 21 +++++- .../genotyper/afcalc/AFCalcResult.java | 13 ++++ .../genotyper/afcalc/ExactCallLogger.java | 73 +++++++++++-------- 3 files changed, 73 insertions(+), 34 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java index fab26c9d2..e9ed6b153 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java @@ -10,6 +10,7 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.SimpleTimer; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; @@ -225,12 +226,24 @@ public class AFCalcPerformanceTest { final AFCalcTestBuilder testBuilder = new AFCalcTestBuilder(call.vc.getNSamples(), 1, AFCalcFactory.Calculation.EXACT_INDEPENDENT, AFCalcTestBuilder.PriorType.human); + logger.info(call); final SimpleTimer timer = new SimpleTimer().start(); final AFCalcResult result = testBuilder.makeModel().getLog10PNonRef(call.vc, testBuilder.makePriors()); - call.newNanoTime = timer.getElapsedTimeNano(); - call.newPNonRef = result.getLog10PosteriorOfAFGT0(); - logger.info(call); - logger.info("\t\t" + result); + final long newNanoTime = timer.getElapsedTimeNano(); + if ( call.originalCall.anyPolymorphic(-1) || result.anyPolymorphic(-1) ) { + logger.info("**** ONE IS POLY"); + } + logger.info("\t\t getLog10PosteriorOfAFGT0: " + call.originalCall.getLog10PosteriorOfAFGT0() + " vs " + result.getLog10PosteriorOfAFGT0()); + final double speedup = call.runtime / (1.0 * newNanoTime); + logger.info("\t\t runtime: " + call.runtime + " vs " + newNanoTime + " speedup " + String.format("%.2f", speedup) + "x"); + for ( final Allele a : call.originalCall.getAllelesUsedInGenotyping() ) { + if ( a.isNonReference() ) { + final String warningmeMLE = call.originalCall.getAlleleCountAtMLE(a) != result.getAlleleCountAtMLE(a) ? " DANGER-MLE-DIFFERENT" : ""; + logger.info("\t\t MLE " + a + ": " + call.originalCall.getAlleleCountAtMLE(a) + " vs " + result.getAlleleCountAtMLE(a) + warningmeMLE); + final String warningmePost = call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) == 0 && result.getLog10PosteriorOfAFGt0ForAllele(a) < -10 ? " DANGER-POSTERIORS-DIFFERENT" : ""; + logger.info("\t\t Posterior " + a + ": " + call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) + " vs " + result.getLog10PosteriorOfAFGt0ForAllele(a) + warningmePost); + } + } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java index c737416c5..7cacb2060 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java @@ -238,6 +238,19 @@ public class AFCalcResult { return getLog10PosteriorOfAFGt0ForAllele(allele) >= log10minPNonRef; } + /** + * Are any of the alleles polymorphic w.r.t. #isPolymorphic? + * + * @param log10minPNonRef the confidence threshold, in log10 space + * @return true if any are poly, false otherwise + */ + public boolean anyPolymorphic(final double log10minPNonRef) { + for ( final Allele a : getAllelesUsedInGenotyping() ) + if ( a.isNonReference() && isPolymorphic(a, log10minPNonRef) ) + return true; + return false; + } + /** * Returns the log10 probability that allele is segregating * diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java index 3794ba240..9394c99d7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java @@ -1,20 +1,18 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; -import org.broadinstitute.sting.utils.AutoFormattingTime; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.Utils; +import com.google.java.contract.Requires; +import org.apache.commons.lang.ArrayUtils; +import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.*; import java.io.*; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.LinkedList; -import java.util.List; +import java.util.*; /** * Allows us to write out and read in information about exact calls (site, alleles, PLs, etc) in tabular format + * + * Once opened, calls can be writen to disk with printCallInfo */ public class ExactCallLogger implements Cloneable { private PrintStream callReport = null; @@ -38,32 +36,28 @@ public class ExactCallLogger implements Cloneable { */ public static class ExactCall { final VariantContext vc; - final long origNanoTime; - long newNanoTime = -1; - final double origPNonRef; - double newPNonRef = -1; + final long runtime; + final AFCalcResult originalCall; - public ExactCall(VariantContext vc, long origNanoTime, double origPNonRef) { + public ExactCall(VariantContext vc, final long runtime, final AFCalcResult originalCall) { this.vc = vc; - this.origNanoTime = origNanoTime; - this.origPNonRef = origPNonRef; + this.runtime = runtime; + this.originalCall = originalCall; } @Override public String toString() { - return String.format("ExactCall %s:%d alleles=%s nSamples=%s orig.pNonRef=%.2f orig.runtime=%s new.pNonRef=%.2f new.runtime=%s", + return String.format("ExactCall %s:%d alleles=%s nSamples=%s orig.pNonRef=%.2f orig.runtime=%s", vc.getChr(), vc.getStart(), vc.getAlleles(), vc.getNSamples(), - origPNonRef, - new AutoFormattingTime(origNanoTime / 1e9).toString(), - newPNonRef, - newNanoTime == -1 ? "not.run" : new AutoFormattingTime(newNanoTime / 1e9).toString()); + originalCall.getLog10PosteriorOfAFGT0(), + new AutoFormattingTime(runtime / 1e9).toString()); } } - protected void printCallInfo(final VariantContext vc, - final double[] log10AlleleFrequencyPriors, - final long runtimeNano, - final AFCalcResult result) { + protected final void printCallInfo(final VariantContext vc, + final double[] log10AlleleFrequencyPriors, + final long runtimeNano, + final AFCalcResult result) { printCallElement(vc, "type", "ignore", vc.getType()); int allelei = 0; @@ -90,6 +84,7 @@ public class ExactCallLogger implements Cloneable { callReport.flush(); } + @Requires({"vc != null", "variable != null", "key != null", "value != null", "callReport != null"}) private void printCallElement(final VariantContext vc, final Object variable, final Object key, @@ -102,10 +97,10 @@ public class ExactCallLogger implements Cloneable { * Read in a list of ExactCall objects from reader, keeping only those * with starts in startsToKeep or all sites (if this is empty) * - * @param reader - * @param startsToKeep - * @param parser - * @return + * @param reader a just-opened reader sitting at the start of the file + * @param startsToKeep a list of start position of the calls to keep, or empty if all calls should be kept + * @param parser a genome loc parser to create genome locs + * @return a list of ExactCall objects in reader * @throws IOException */ public static List readExactLog(final BufferedReader reader, final List startsToKeep, GenomeLocParser parser) throws IOException { @@ -118,10 +113,17 @@ public class ExactCallLogger implements Cloneable { // skip the header line reader.readLine(); + // skip the first "type" line + reader.readLine(); + while (true) { final VariantContextBuilder builder = new VariantContextBuilder(); final List alleles = new ArrayList(); final List genotypes = new ArrayList(); + final double[] posteriors = new double[2]; + final double[] priors = MathUtils.normalizeFromLog10(new double[]{0.5, 0.5}, true); + final List mle = new ArrayList(); + final Map log10pNonRefByAllele = new HashMap(); long runtimeNano = -1; GenomeLoc currentLoc = null; @@ -139,13 +141,15 @@ public class ExactCallLogger implements Cloneable { if (currentLoc == null) currentLoc = lineLoc; - if (variable.equals("log10PosteriorOfAFzero")) { + if (variable.equals("type")) { if (startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart())) { builder.alleles(alleles); final int stop = currentLoc.getStart() + alleles.get(0).length() - 1; builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop); builder.genotypes(genotypes); - calls.add(new ExactCall(builder.make(), runtimeNano, Double.valueOf(value))); + final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[]{})); + final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele); + calls.add(new ExactCall(builder.make(), runtimeNano, result)); } break; } else if (variable.equals("allele")) { @@ -155,6 +159,15 @@ public class ExactCallLogger implements Cloneable { final GenotypeBuilder gb = new GenotypeBuilder(key); gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs()); genotypes.add(gb.make()); + } else if (variable.equals("log10PosteriorOfAFEq0")) { + posteriors[0] = Double.valueOf(value); + } else if (variable.equals("log10PosteriorOfAFGt0")) { + posteriors[1] = Double.valueOf(value); + } else if (variable.equals("MLE")) { + mle.add(Integer.valueOf(value)); + } else if (variable.equals("pNonRefByAllele")) { + final Allele a = Allele.create(key); + log10pNonRefByAllele.put(a, Double.valueOf(value)); } else if (variable.equals("runtime.nano")) { runtimeNano = Long.valueOf(value); } else { From fa93681f513992b387387ec018f24d66232cc2dd Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 17 Oct 2012 14:14:41 -0400 Subject: [PATCH 89/90] Scalability test for EXACT models --- .../genotyper/afcalc/AFCalcTestBuilder.java | 6 +- .../afcalc/AFCalcPerformanceUnitTest.java | 87 +++++++++++++++++++ 2 files changed, 92 insertions(+), 1 deletion(-) create mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceUnitTest.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcTestBuilder.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcTestBuilder.java index b4d105507..cfb67164d 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcTestBuilder.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcTestBuilder.java @@ -45,12 +45,16 @@ public class AFCalcTestBuilder { human } + public int getNumAltAlleles() { + return numAltAlleles; + } + public int getnSamples() { return nSamples; } public AFCalc makeModel() { - return AFCalcFactory.createAFCalc(modelType, nSamples, 4, 4, 2); + return AFCalcFactory.createAFCalc(modelType, nSamples, getNumAltAlleles(), getNumAltAlleles(), 2); } public double[] makePriors() { diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceUnitTest.java new file mode 100644 index 000000000..556b7451f --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceUnitTest.java @@ -0,0 +1,87 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class AFCalcPerformanceUnitTest extends BaseTest { + @DataProvider(name = "ScalingTests") + public Object[][] makepolyTestProviderLotsOfAlleles() { + List tests = new ArrayList(); + + // list of all high-quality models in the system + final List biAllelicModels = Arrays.asList( + AFCalcFactory.Calculation.EXACT_INDEPENDENT, + AFCalcFactory.Calculation.EXACT_REFERENCE); + + final List multiAllelicModels = Arrays.asList( + AFCalcFactory.Calculation.EXACT_INDEPENDENT); + +// for ( final int nonTypePLs : Arrays.asList(100) ) { +// for ( final int nSamples : Arrays.asList(10000) ) { +// final List alleleCounts = Arrays.asList(50); +// for ( final int nAltAlleles : Arrays.asList(1) ) { + for ( final int nonTypePLs : Arrays.asList(100) ) { + for ( final int nSamples : Arrays.asList(100, 1000) ) { + final List alleleCounts = Arrays.asList(0, 1, 2, 3, 4, 5, 10, 50, 500); + for ( final int nAltAlleles : Arrays.asList(1, 2, 3) ) { + final List models = nAltAlleles > 1 ? multiAllelicModels : biAllelicModels; + for ( final AFCalcFactory.Calculation model : models ) { + for ( final List ACs : Utils.makePermutations(alleleCounts, nAltAlleles, true) ) { + if ( MathUtils.sum(ACs) < nSamples * 2 ) { + final AFCalcTestBuilder testBuilder + = new AFCalcTestBuilder(nSamples, nAltAlleles, model, AFCalcTestBuilder.PriorType.human); + tests.add(new Object[]{testBuilder, ACs, nonTypePLs}); + } + } + } + } + } + } + + return tests.toArray(new Object[][]{}); + } + + private Pair estNumberOfEvaluations(final AFCalcTestBuilder testBuilder, final VariantContext vc, final int nonTypePL) { + final int evalOverhead = 2; // 2 + final int maxEvalsPerSamplePerAC = 3; + + int minEvals = 0, maxEvals = 0; + + for ( final Allele alt : vc.getAlternateAlleles() ) { + final int AC = vc.getCalledChrCount(alt); + minEvals += AC + evalOverhead; // everyone is hom-var + maxEvals += AC * maxEvalsPerSamplePerAC + 10; + } + + return new Pair(minEvals, maxEvals); + } + + @Test(dataProvider = "ScalingTests") + private void testScaling(final AFCalcTestBuilder testBuilder, final List ACs, final int nonTypePL) { + final AFCalc calc = testBuilder.makeModel(); + final double[] priors = testBuilder.makePriors(); + final VariantContext vc = testBuilder.makeACTest(ACs, 0, nonTypePL); + final AFCalcResult result = calc.getLog10PNonRef(vc, priors); + final Pair expectedNEvaluation = estNumberOfEvaluations(testBuilder, vc, nonTypePL); + final int minEvals = expectedNEvaluation.getFirst(); + final int maxEvals = expectedNEvaluation.getSecond(); + + logger.warn(" min " + minEvals + " obs " + result.getnEvaluations() + " max " + maxEvals + " for test " + testBuilder + " sum(ACs)=" + (int)MathUtils.sum(ACs)); + + Assert.assertTrue(result.getnEvaluations() >= minEvals, + "Actual number of evaluations " + result.getnEvaluations() + " < min number of evals " + minEvals); + Assert.assertTrue(result.getnEvaluations() <= maxEvals, + "Actual number of evaluations " + result.getnEvaluations() + " > max number of evals " + minEvals); + } +} \ No newline at end of file From 8288c30e364b1aa858ef56411195fcdef9364b99 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 17 Oct 2012 14:14:55 -0400 Subject: [PATCH 90/90] Use buffered output for ExactCallLogger --- .../sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java index 9394c99d7..f13fe4429 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java @@ -24,7 +24,7 @@ public class ExactCallLogger implements Cloneable { */ public ExactCallLogger(final File outputFile) { try { - callReport = new PrintStream(new FileOutputStream(outputFile)); + callReport = new PrintStream(new BufferedOutputStream(new FileOutputStream(outputFile), 10000000)); callReport.println(Utils.join("\t", Arrays.asList("loc", "variable", "key", "value"))); } catch (FileNotFoundException e) { throw new UserException.CouldNotCreateOutputFile(outputFile, e);