diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala index 623d397d4..1193e7dec 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/cancer/MuTect.scala @@ -6,7 +6,7 @@ import org.broadinstitute.sting.commandline.Gather import org.broadinstitute.sting.commandline.Input import org.broadinstitute.sting.commandline.Output import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction -import org.broadinstitute.sting.queue.extensions.gatk.{LocusScatterFunction, TaggedFile} +import org.broadinstitute.sting.queue.extensions.gatk.{TaggedFile, VcfGatherFunction, LocusScatterFunction} class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineGATK with ScatterGatherableFunction { analysisName = "MuTect" @@ -45,6 +45,10 @@ class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineG @Argument(fullName="force_alleles", shortName="", doc="force output for all alleles at each site", required=false, exclusiveOf="", validation="") var force_alleles: Boolean = _ + /** only emit passing calls */ + @Argument(fullName="only_passing_calls", shortName="", doc="only emit passing calls", required=false, exclusiveOf="", validation="") + var only_passing_calls: Boolean = _ + /** Initial LOD threshold for calling tumor variant */ @Argument(fullName="initial_tumor_lod", shortName="", doc="Initial LOD threshold for calling tumor variant", required=false, exclusiveOf="", validation="") var initial_tumor_lod: Option[Float] = None @@ -242,6 +246,28 @@ class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineG */ def o_=(value: File) { this.out = value } + /** VCF output of mutation candidates */ + @Output(fullName="vcf", shortName="vcf", doc="VCF output of mutation candidates", required=false, exclusiveOf="", validation="") + @Gather(classOf[VcfGatherFunction]) + var vcf: File = _ + + /** Automatically generated index for vcf */ + @Output(fullName="vcfIndex", shortName="", doc="Automatically generated index for vcf", required=false, exclusiveOf="", validation="") + @Gather(enabled=false) + private var vcfIndex: File = _ + + /** Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests. */ + @Argument(fullName="no_cmdline_in_header", shortName="no_cmdline_in_header", doc="Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.", required=false, exclusiveOf="", validation="") + var no_cmdline_in_header: Boolean = _ + + /** Just output sites without genotypes (i.e. only the first 8 columns of the VCF) */ + @Argument(fullName="sites_only", shortName="sites_only", doc="Just output sites without genotypes (i.e. only the first 8 columns of the VCF)", required=false, exclusiveOf="", validation="") + var sites_only: Boolean = _ + + /** force BCF output, regardless of the file's extension */ + @Argument(fullName="bcf", shortName="bcf", doc="force BCF output, regardless of the file's extension", required=false, exclusiveOf="", validation="") + var bcf: Boolean = _ + /** VCF file of DBSNP information */ @Input(fullName="dbsnp", shortName="dbsnp", doc="VCF file of DBSNP information", required=false, exclusiveOf="", validation="") var dbsnp: Seq[File] = Nil @@ -369,10 +395,13 @@ class MuTect extends org.broadinstitute.sting.queue.extensions.gatk.CommandLineG override def freezeFieldValues() { super.freezeFieldValues() + if (vcf != null && !org.broadinstitute.sting.utils.io.IOUtils.isSpecialFile(vcf)) + if (!org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor.isCompressed(vcf.getPath)) + vcfIndex = new File(vcf.getPath + ".idx") dbsnpIndexes ++= dbsnp.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) cosmicIndexes ++= cosmic.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) normal_panelIndexes ++= normal_panel.filter(orig => orig != null).map(orig => new File(orig.getPath + ".idx")) } - override def commandLine = super.commandLine + conditional(noop, "--noop", escape=true, format="%s") + conditional(enable_extended_output, "--enable_extended_output", escape=true, format="%s") + conditional(artifact_detection_mode, "--artifact_detection_mode", escape=true, format="%s") + optional("--tumor_sample_name", tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--bam_tumor_sample_name", bam_tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--normal_sample_name", normal_sample_name, spaceSeparated=true, escape=true, format="%s") + conditional(force_output, "--force_output", escape=true, format="%s") + conditional(force_alleles, "--force_alleles", escape=true, format="%s") + optional("--initial_tumor_lod", initial_tumor_lod, spaceSeparated=true, escape=true, format=initial_tumor_lodFormat) + optional("--tumor_lod", tumor_lod, spaceSeparated=true, escape=true, format=tumor_lodFormat) + optional("--fraction_contamination", fraction_contamination, spaceSeparated=true, escape=true, format=fraction_contaminationFormat) + optional("--minimum_mutation_cell_fraction", minimum_mutation_cell_fraction, spaceSeparated=true, escape=true, format=minimum_mutation_cell_fractionFormat) + optional("--normal_lod", normal_lod, spaceSeparated=true, escape=true, format=normal_lodFormat) + optional("--normal_artifact_lod", normal_artifact_lod, spaceSeparated=true, escape=true, format=normal_artifact_lodFormat) + optional("--strand_artifact_lod", strand_artifact_lod, spaceSeparated=true, escape=true, format=strand_artifact_lodFormat) + optional("--strand_artifact_power_threshold", strand_artifact_power_threshold, spaceSeparated=true, escape=true, format=strand_artifact_power_thresholdFormat) + optional("--dbsnp_normal_lod", dbsnp_normal_lod, spaceSeparated=true, escape=true, format=dbsnp_normal_lodFormat) + optional("--somatic_classification_normal_power_threshold", somatic_classification_normal_power_threshold, spaceSeparated=true, escape=true, format=somatic_classification_normal_power_thresholdFormat) + optional("--minimum_normal_allele_fraction", minimum_normal_allele_fraction, spaceSeparated=true, escape=true, format=minimum_normal_allele_fractionFormat) + optional("--tumor_f_pretest", tumor_f_pretest, spaceSeparated=true, escape=true, format=tumor_f_pretestFormat) + optional("--min_qscore", min_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--gap_events_threshold", gap_events_threshold, spaceSeparated=true, escape=true, format="%s") + optional("--heavily_clipped_read_fraction", heavily_clipped_read_fraction, spaceSeparated=true, escape=true, format=heavily_clipped_read_fractionFormat) + optional("--clipping_bias_pvalue_threshold", clipping_bias_pvalue_threshold, spaceSeparated=true, escape=true, format=clipping_bias_pvalue_thresholdFormat) + optional("--fraction_mapq0_threshold", fraction_mapq0_threshold, spaceSeparated=true, escape=true, format=fraction_mapq0_thresholdFormat) + optional("--pir_median_threshold", pir_median_threshold, spaceSeparated=true, escape=true, format=pir_median_thresholdFormat) + optional("--pir_mad_threshold", pir_mad_threshold, spaceSeparated=true, escape=true, format=pir_mad_thresholdFormat) + optional("--required_maximum_alt_allele_mapping_quality_score", required_maximum_alt_allele_mapping_quality_score, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_count", max_alt_alleles_in_normal_count, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_qscore_sum", max_alt_alleles_in_normal_qscore_sum, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_allele_in_normal_fraction", max_alt_allele_in_normal_fraction, spaceSeparated=true, escape=true, format=max_alt_allele_in_normal_fractionFormat) + optional("--power_constant_qscore", power_constant_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--absolute_copy_number_data", absolute_copy_number_data, spaceSeparated=true, escape=true, format="%s") + optional("--power_constant_af", power_constant_af, spaceSeparated=true, escape=true, format=power_constant_afFormat) + optional("-o", out, spaceSeparated=true, escape=true, format="%s") + repeat("-dbsnp", dbsnp, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-cosmic", cosmic, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-normal_panel", normal_panel, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + optional("-cov", coverage_file, spaceSeparated=true, escape=true, format="%s") + optional("-cov_q20", coverage_20_q20_file, spaceSeparated=true, escape=true, format="%s") + optional("-pow", power_file, spaceSeparated=true, escape=true, format="%s") + optional("-tdf", tumor_depth_file, spaceSeparated=true, escape=true, format="%s") + optional("-ndf", normal_depth_file, spaceSeparated=true, escape=true, format="%s") + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape=true, format="%s") + override def commandLine = super.commandLine + conditional(noop, "--noop", escape=true, format="%s") + conditional(enable_extended_output, "--enable_extended_output", escape=true, format="%s") + conditional(artifact_detection_mode, "--artifact_detection_mode", escape=true, format="%s") + optional("--tumor_sample_name", tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--bam_tumor_sample_name", bam_tumor_sample_name, spaceSeparated=true, escape=true, format="%s") + optional("--normal_sample_name", normal_sample_name, spaceSeparated=true, escape=true, format="%s") + conditional(force_output, "--force_output", escape=true, format="%s") + conditional(force_alleles, "--force_alleles", escape=true, format="%s") + conditional(only_passing_calls, "--only_passing_calls", escape=true, format="%s") + optional("--initial_tumor_lod", initial_tumor_lod, spaceSeparated=true, escape=true, format=initial_tumor_lodFormat) + optional("--tumor_lod", tumor_lod, spaceSeparated=true, escape=true, format=tumor_lodFormat) + optional("--fraction_contamination", fraction_contamination, spaceSeparated=true, escape=true, format=fraction_contaminationFormat) + optional("--minimum_mutation_cell_fraction", minimum_mutation_cell_fraction, spaceSeparated=true, escape=true, format=minimum_mutation_cell_fractionFormat) + optional("--normal_lod", normal_lod, spaceSeparated=true, escape=true, format=normal_lodFormat) + optional("--normal_artifact_lod", normal_artifact_lod, spaceSeparated=true, escape=true, format=normal_artifact_lodFormat) + optional("--strand_artifact_lod", strand_artifact_lod, spaceSeparated=true, escape=true, format=strand_artifact_lodFormat) + optional("--strand_artifact_power_threshold", strand_artifact_power_threshold, spaceSeparated=true, escape=true, format=strand_artifact_power_thresholdFormat) + optional("--dbsnp_normal_lod", dbsnp_normal_lod, spaceSeparated=true, escape=true, format=dbsnp_normal_lodFormat) + optional("--somatic_classification_normal_power_threshold", somatic_classification_normal_power_threshold, spaceSeparated=true, escape=true, format=somatic_classification_normal_power_thresholdFormat) + optional("--minimum_normal_allele_fraction", minimum_normal_allele_fraction, spaceSeparated=true, escape=true, format=minimum_normal_allele_fractionFormat) + optional("--tumor_f_pretest", tumor_f_pretest, spaceSeparated=true, escape=true, format=tumor_f_pretestFormat) + optional("--min_qscore", min_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--gap_events_threshold", gap_events_threshold, spaceSeparated=true, escape=true, format="%s") + optional("--heavily_clipped_read_fraction", heavily_clipped_read_fraction, spaceSeparated=true, escape=true, format=heavily_clipped_read_fractionFormat) + optional("--clipping_bias_pvalue_threshold", clipping_bias_pvalue_threshold, spaceSeparated=true, escape=true, format=clipping_bias_pvalue_thresholdFormat) + optional("--fraction_mapq0_threshold", fraction_mapq0_threshold, spaceSeparated=true, escape=true, format=fraction_mapq0_thresholdFormat) + optional("--pir_median_threshold", pir_median_threshold, spaceSeparated=true, escape=true, format=pir_median_thresholdFormat) + optional("--pir_mad_threshold", pir_mad_threshold, spaceSeparated=true, escape=true, format=pir_mad_thresholdFormat) + optional("--required_maximum_alt_allele_mapping_quality_score", required_maximum_alt_allele_mapping_quality_score, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_count", max_alt_alleles_in_normal_count, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_alleles_in_normal_qscore_sum", max_alt_alleles_in_normal_qscore_sum, spaceSeparated=true, escape=true, format="%s") + optional("--max_alt_allele_in_normal_fraction", max_alt_allele_in_normal_fraction, spaceSeparated=true, escape=true, format=max_alt_allele_in_normal_fractionFormat) + optional("--power_constant_qscore", power_constant_qscore, spaceSeparated=true, escape=true, format="%s") + optional("--absolute_copy_number_data", absolute_copy_number_data, spaceSeparated=true, escape=true, format="%s") + optional("--power_constant_af", power_constant_af, spaceSeparated=true, escape=true, format=power_constant_afFormat) + optional("-o", out, spaceSeparated=true, escape=true, format="%s") + optional("-vcf", vcf, spaceSeparated=true, escape=true, format="%s") + conditional(no_cmdline_in_header, "-no_cmdline_in_header", escape=true, format="%s") + conditional(sites_only, "-sites_only", escape=true, format="%s") + conditional(bcf, "-bcf", escape=true, format="%s") + repeat("-dbsnp", dbsnp, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-cosmic", cosmic, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + repeat("-normal_panel", normal_panel, formatPrefix=TaggedFile.formatCommandLineParameter, spaceSeparated=true, escape=true, format="%s") + optional("-cov", coverage_file, spaceSeparated=true, escape=true, format="%s") + optional("-cov_q20", coverage_20_q20_file, spaceSeparated=true, escape=true, format="%s") + optional("-pow", power_file, spaceSeparated=true, escape=true, format="%s") + optional("-tdf", tumor_depth_file, spaceSeparated=true, escape=true, format="%s") + optional("-ndf", normal_depth_file, spaceSeparated=true, escape=true, format="%s") + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape=true, format="%s") }