/* Description: BQSR Copyright : All right reserved by ICT Author : Zhang Zhonghai Date : 2025/10/10 */ #pragma once #include #include using std::string; using std::vector; namespace nsbqsr { enum IndexFormat { BAI, CSI }; } // namespace nsbqsr /* bqsr parameters */ struct BQSRArg { // common parameters string INPUT_FILE; // input bam filename string OUTPUT_FILE; // output bam filename int NUM_THREADS = 1; size_t MAX_MEM = ((size_t)1) << 30; // // 1G bool DUPLEX_IO = true; // /* "Whether to create an index when writing VCF or coordinate sorted BAM output.", common = true */ bool CREATE_INDEX = true; nsbqsr::IndexFormat INDEX_FORMAT = nsbqsr::IndexFormat::BAI; /* Add PG tag to each read in a SAM or BAM (PGTagArgumentCollection)*/ bool ADD_PG_TAG_TO_READS = true; // string CLI_STR; // string START_TIME; string PROGRAM_RECORD_ID = "FastBQSR"; // end of common parameters /** * The context covariate will use a context of this size to calculate its covariate value for base mismatches. Must be * between 1 and 13 (inclusive). Note that higher values will increase runtime and required java heap size. */ int MISMATCHES_CONTEXT_SIZE = 2; /** * The context covariate will use a context of this size to calculate its covariate value for base insertions and deletions. * Must be between 1 and 13 (inclusive). Note that higher values will increase runtime and required java heap size. */ int INDELS_CONTEXT_SIZE = 3; /** * The cycle covariate will generate an error if it encounters a cycle greater than this value. * This argument is ignored if the Cycle covariate is not used. */ int MAXIMUM_CYCLE_VALUE = 500; /** * A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace * all base qualities in the read for this default value. Negative value turns it off. [default is off] */ int8_t MISMATCHES_DEFAULT_QUALITY = -1; /** * A default base qualities to use as a prior (reported quality) in the insertion covariate model. This parameter is used * for all reads without insertion quality scores for each base. [default is on] */ int8_t INSERTIONS_DEFAULT_QUALITY = 45; /** * A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace * all base qualities in the read for this default value. Negative value turns it off. [default is on] */ int8_t DELETIONS_DEFAULT_QUALITY = 45; /** * Reads with low quality bases on either tail (beginning or end) will not be considered in the context. This parameter * defines the quality below which (inclusive) a tail is considered low quality */ int8_t LOW_QUAL_TAIL = 2; /** * BQSR generates a quantization table for quick quantization later by subsequent tools. BQSR does not quantize the base * qualities, this is done by the engine with the -qq or -bqsr options. This parameter tells BQSR the number of levels of * quantization to use to build the quantization table. */ int QUANTIZING_LEVELS = 16; /** * The tag name for the binary tag covariate (if using it) */ string BINARY_TAG_NAME = ""; /** * bqsr-baq-gap-open-penalty, BQSR BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better * for whole genome call sets */ double BAQGOP = 40; /** * This flag tells GATK not to modify quality scores less than this value. Instead they will be written out unmodified in * the recalibrated BAM file. In general it's unsafe to change qualities scores below < 6, since base callers use these * values to indicate random or bad bases. For example, Illumina writes Q2 bases when the machine has really gone wrong. * This would be fine in and of itself, but when you select a subset of these reads based on their ability to align to the * reference and their dinucleotide effect, your Q2 bin can be elevated to Q8 or Q10, leading to issues downstream. */ int PRESERVE_QSCORES_LESS_THAN = 6; /** * enable-baq, do BAQ correction" */ bool enableBAQ = false; /** * compute-indel-bqsr-tables, compute indel BQSR tables" */ bool computeIndelBQSRTables = false; // -------------------------------------------------------------------------------------------------------------- // // quality encoding checking arguments // // -------------------------------------------------------------------------------------------------------------- /** * This flag tells GATK to use the original base qualities (that were in the data before BQSR/recalibration) which * are stored in the OQ tag, if they are present, rather than use the post-recalibration quality scores. If no OQ * tag is present for a read, the standard qual score will be used. */ bool useOriginalBaseQualities = false; /** * If reads are missing some or all base quality scores, this value will be used for all base quality scores. * By default this is set to -1 to disable default base quality assignment. */ int8_t defaultBaseQualities = -1; };