FastBQSR/src/bqsr/bqsr_args.h

150 lines
5.3 KiB
C
Raw Normal View History

2025-11-23 23:03:37 +08:00
/*
Description: BQSR
Copyright : All right reserved by ICT
Author : Zhang Zhonghai
Date : 2025/10/10
*/
#pragma once
#include <string>
#include <vector>
using std::string;
using std::vector;
namespace nsbqsr {
enum IndexFormat { BAI, CSI };
} // namespace nsbqsr
/* bqsr parameters */
struct BQSRArg {
// common parameters
string INPUT_FILE; // input bam filename
string OUTPUT_FILE; // output bam filename
int NUM_THREADS = 1;
size_t MAX_MEM = ((size_t)1) << 30; // // 1G
bool DUPLEX_IO = true; //
/* "Whether to create an index when writing VCF or coordinate sorted BAM output.", common = true */
bool CREATE_INDEX = true;
nsbqsr::IndexFormat INDEX_FORMAT = nsbqsr::IndexFormat::BAI;
/* Add PG tag to each read in a SAM or BAM (PGTagArgumentCollection)*/
bool ADD_PG_TAG_TO_READS = true;
//
string CLI_STR;
//
string START_TIME;
string PROGRAM_RECORD_ID = "FastBQSR";
// end of common parameters
/**
* The context covariate will use a context of this size to calculate its covariate value for base mismatches. Must be
* between 1 and 13 (inclusive). Note that higher values will increase runtime and required java heap size.
*/
int MISMATCHES_CONTEXT_SIZE = 2;
/**
* The context covariate will use a context of this size to calculate its covariate value for base insertions and deletions.
* Must be between 1 and 13 (inclusive). Note that higher values will increase runtime and required java heap size.
*/
int INDELS_CONTEXT_SIZE = 3;
/**
* The cycle covariate will generate an error if it encounters a cycle greater than this value.
* This argument is ignored if the Cycle covariate is not used.
*/
int MAXIMUM_CYCLE_VALUE = 500;
/**
* A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace
* all base qualities in the read for this default value. Negative value turns it off. [default is off]
*/
int8_t MISMATCHES_DEFAULT_QUALITY = -1;
/**
* A default base qualities to use as a prior (reported quality) in the insertion covariate model. This parameter is used
* for all reads without insertion quality scores for each base. [default is on]
*/
int8_t INSERTIONS_DEFAULT_QUALITY = 45;
/**
* A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace
* all base qualities in the read for this default value. Negative value turns it off. [default is on]
*/
int8_t DELETIONS_DEFAULT_QUALITY = 45;
/**
* Reads with low quality bases on either tail (beginning or end) will not be considered in the context. This parameter
* defines the quality below which (inclusive) a tail is considered low quality
*/
int8_t LOW_QUAL_TAIL = 2;
/**
* BQSR generates a quantization table for quick quantization later by subsequent tools. BQSR does not quantize the base
* qualities, this is done by the engine with the -qq or -bqsr options. This parameter tells BQSR the number of levels of
* quantization to use to build the quantization table.
*/
int QUANTIZING_LEVELS = 16;
/**
* The tag name for the binary tag covariate (if using it)
*/
string BINARY_TAG_NAME = "";
/**
* bqsr-baq-gap-open-penalty, BQSR BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better
* for whole genome call sets
*/
double BAQGOP = 40;
/**
* This flag tells GATK not to modify quality scores less than this value. Instead they will be written out unmodified in
* the recalibrated BAM file. In general it's unsafe to change qualities scores below < 6, since base callers use these
* values to indicate random or bad bases. For example, Illumina writes Q2 bases when the machine has really gone wrong.
* This would be fine in and of itself, but when you select a subset of these reads based on their ability to align to the
* reference and their dinucleotide effect, your Q2 bin can be elevated to Q8 or Q10, leading to issues downstream.
*/
int PRESERVE_QSCORES_LESS_THAN = 6;
/**
* enable-baq, do BAQ correction"
*/
bool enableBAQ = false;
/**
* compute-indel-bqsr-tables, compute indel BQSR tables"
*/
bool computeIndelBQSRTables = false;
// --------------------------------------------------------------------------------------------------------------
//
// quality encoding checking arguments
//
// --------------------------------------------------------------------------------------------------------------
/**
* This flag tells GATK to use the original base qualities (that were in the data before BQSR/recalibration) which
* are stored in the OQ tag, if they are present, rather than use the post-recalibration quality scores. If no OQ
* tag is present for a read, the standard qual score will be used.
*/
bool useOriginalBaseQualities = false;
/**
* If reads are missing some or all base quality scores, this value will be used for all base quality scores.
* By default this is set to -1 to disable default base quality assignment.
*/
int8_t defaultBaseQualities = -1;
};