From 71a3447772fae79636e84233482c47d9fabfcef6 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Thu, 12 May 2016 15:26:04 -0400 Subject: [PATCH] Yf tumor only (#1298) * tumor only pipeline * a few new wrappers for picard tools --- .../queue/extensions/picard/FilterVcf.scala | 86 +++++++++++++++++++ .../extensions/picard/MakeSitesOnlyVcf.scala | 68 +++++++++++++++ 2 files changed, 154 insertions(+) create mode 100644 public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/picard/FilterVcf.scala create mode 100644 public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/picard/MakeSitesOnlyVcf.scala diff --git a/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/picard/FilterVcf.scala b/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/picard/FilterVcf.scala new file mode 100644 index 000000000..971f67a28 --- /dev/null +++ b/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/picard/FilterVcf.scala @@ -0,0 +1,86 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.queue.extensions.picard + +import java.io.File + +import htsjdk.samtools.ValidationStringency +import org.broadinstitute.gatk.queue.function.JavaCommandLineFunction +import org.broadinstitute.gatk.utils.commandline.{Argument, Input, Output} + +/** + * Reads a VCF/VCF.gz/BCF and and filters it quickly based on some simple hard filters + */ +class FilterVcf extends JavaCommandLineFunction { + analysisName = "FilterVcf" + javaMainClass = "picard.vcf.FilterVcf" + + @Input(doc = "The input VCF files to filter.", shortName = "input", fullName = "input_vcf_file", required = true) + var input: File = _ + + @Output(doc = "The output VCF which will have it's FILTER field updated", required = false) + var output: File = _ + + + @Argument(doc = "The minimum allele balance acceptable before filtering a site. Allele balance is calculated for heterozygotes as " + + "the number of bases supporting the least-represented allele over the total number of base observations. Different heterozygous " + + "genotypes at the same locus are measured independently. The locus is filtered if any allele balance is below the limit.", required = false) + var minAb: Option[Double] = _ + + @Argument(doc = "The minimum sequencing depth supporting a genotype before the genotype will be filtered out.", required = false) + var minDp: Option[Double] = _ + + @Argument(doc = "The minimum genotype quality that must be achieved for a sample otherwise the genotype will be filtered out.", required = false) + var minGQ: Option[Double] = _ + + @Argument(doc = "The maximum phred scaled fisher strand value before a site will be filtered out.", required = false) + var maxFs: Option[Double] = _ + + @Argument(doc = "The minimum QD value to accept or otherwise filter out the variant.", required = false) + var minQd: Option[Double] = _ + + var validationStringency = ValidationStringency.SILENT + var compressionLevel: Option[Int] = None + var createIndex: Option[Boolean] = None + var maxRecordsInRam: Option[Int] = None + var assumeSorted: Option[Boolean] = None + + override def commandLine = super.commandLine + + required("INPUT=", input, spaceSeparated = false) + + required("TMP_DIR=" + jobTempDir) + + optional("OUTPUT=", output, spaceSeparated = false) + + optional("MIN_AB=", minAb, spaceSeparated = false) + + optional("MIN_DP=", minDp, spaceSeparated = false) + + optional("MIN_GQ=", minGQ, spaceSeparated = false) + + optional("MIN_FS=", maxFs, spaceSeparated = false) + + optional("MIN_QD=", minQd, spaceSeparated = false) + + optional("COMPRESSION_LEVEL=", compressionLevel, spaceSeparated = false) + + optional("VALIDATION_STRINGENCY=", validationStringency, spaceSeparated = false) + + optional("MAX_RECORDS_IN_RAM=", maxRecordsInRam, spaceSeparated = false) + + optional("ASSUME_SORTED=", assumeSorted, spaceSeparated = false) + + optional("CREATE_INDEX=", createIndex, spaceSeparated = false) + +} diff --git a/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/picard/MakeSitesOnlyVcf.scala b/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/picard/MakeSitesOnlyVcf.scala new file mode 100644 index 000000000..794bee0b9 --- /dev/null +++ b/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/picard/MakeSitesOnlyVcf.scala @@ -0,0 +1,68 @@ +/* +* Copyright 2012-2016 Broad Institute, Inc. +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.queue.extensions.picard + +import java.io.File + +import htsjdk.samtools.ValidationStringency +import org.broadinstitute.gatk.queue.function.JavaCommandLineFunction +import org.broadinstitute.gatk.utils.commandline.{Argument, Input, Output} + +/** + * "Reads a VCF/VCF.gz/BCF and removes all genotype information from it while retaining all site level information, + * including annotations based on genotypes (e.g. AN, AF). Output can be any support variant format including .vcf, .vcf.gz or .bcf." + */ +class MakeSitesOnlyVcf extends JavaCommandLineFunction { + analysisName = "MakeSitesOnlyVcf" + javaMainClass = "picard.vcf.MakeSitesOnlyVcf" + + @Input(doc = "The input VCF files to analyze.", shortName = "input", fullName = "input_vcf_file", required = true) + var input: File = _ + + @Output(doc = "The output VCF which will not have any genotypes, but will keep the INFO field intact.", required = false) + var output: File = _ + + @Argument(shortName = "S", doc = "Optionally one or more samples to retain when building the \'sites-only\' VCF.", required = false) + var samples: List[String] = _ + + var validationStringency = ValidationStringency.SILENT + var compressionLevel: Option[Int] = None + var createIndex: Option[Boolean] = None + var maxRecordsInRam: Option[Int] = None + var assumeSorted: Option[Boolean] = None + + override def commandLine = super.commandLine + + required("INPUT=", input, spaceSeparated = false) + + required("TMP_DIR=" + jobTempDir) + + optional("OUTPUT=", output, spaceSeparated = false) + + repeat("SAMPLE=", samples, spaceSeparated = false) + + optional("COMPRESSION_LEVEL=", compressionLevel, spaceSeparated = false) + + optional("VALIDATION_STRINGENCY=", validationStringency, spaceSeparated = false) + + optional("MAX_RECORDS_IN_RAM=", maxRecordsInRam, spaceSeparated = false) + + optional("ASSUME_SORTED=", assumeSorted, spaceSeparated = false) + + optional("CREATE_INDEX=", createIndex, spaceSeparated = false) + +}