150 lines
5.3 KiB
C
150 lines
5.3 KiB
C
|
|
/*
|
||
|
|
Description: BQSR
|
||
|
|
|
||
|
|
Copyright : All right reserved by ICT
|
||
|
|
|
||
|
|
Author : Zhang Zhonghai
|
||
|
|
Date : 2025/10/10
|
||
|
|
*/
|
||
|
|
#pragma once
|
||
|
|
|
||
|
|
#include <string>
|
||
|
|
#include <vector>
|
||
|
|
|
||
|
|
using std::string;
|
||
|
|
using std::vector;
|
||
|
|
|
||
|
|
namespace nsbqsr {
|
||
|
|
enum IndexFormat { BAI, CSI };
|
||
|
|
} // namespace nsbqsr
|
||
|
|
|
||
|
|
/* bqsr parameters */
|
||
|
|
struct BQSRArg {
|
||
|
|
// common parameters
|
||
|
|
|
||
|
|
string INPUT_FILE; // input bam filename
|
||
|
|
|
||
|
|
string OUTPUT_FILE; // output bam filename
|
||
|
|
|
||
|
|
int NUM_THREADS = 1;
|
||
|
|
|
||
|
|
size_t MAX_MEM = ((size_t)1) << 30; // // 1G
|
||
|
|
|
||
|
|
bool DUPLEX_IO = true; //
|
||
|
|
|
||
|
|
/* "Whether to create an index when writing VCF or coordinate sorted BAM output.", common = true */
|
||
|
|
bool CREATE_INDEX = true;
|
||
|
|
|
||
|
|
nsbqsr::IndexFormat INDEX_FORMAT = nsbqsr::IndexFormat::BAI;
|
||
|
|
|
||
|
|
/* Add PG tag to each read in a SAM or BAM (PGTagArgumentCollection)*/
|
||
|
|
bool ADD_PG_TAG_TO_READS = true;
|
||
|
|
|
||
|
|
//
|
||
|
|
string CLI_STR;
|
||
|
|
|
||
|
|
//
|
||
|
|
string START_TIME;
|
||
|
|
|
||
|
|
string PROGRAM_RECORD_ID = "FastBQSR";
|
||
|
|
|
||
|
|
// end of common parameters
|
||
|
|
|
||
|
|
/**
|
||
|
|
* The context covariate will use a context of this size to calculate its covariate value for base mismatches. Must be
|
||
|
|
* between 1 and 13 (inclusive). Note that higher values will increase runtime and required java heap size.
|
||
|
|
*/
|
||
|
|
int MISMATCHES_CONTEXT_SIZE = 2;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* The context covariate will use a context of this size to calculate its covariate value for base insertions and deletions.
|
||
|
|
* Must be between 1 and 13 (inclusive). Note that higher values will increase runtime and required java heap size.
|
||
|
|
*/
|
||
|
|
int INDELS_CONTEXT_SIZE = 3;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* The cycle covariate will generate an error if it encounters a cycle greater than this value.
|
||
|
|
* This argument is ignored if the Cycle covariate is not used.
|
||
|
|
*/
|
||
|
|
int MAXIMUM_CYCLE_VALUE = 500;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace
|
||
|
|
* all base qualities in the read for this default value. Negative value turns it off. [default is off]
|
||
|
|
*/
|
||
|
|
int8_t MISMATCHES_DEFAULT_QUALITY = -1;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* A default base qualities to use as a prior (reported quality) in the insertion covariate model. This parameter is used
|
||
|
|
* for all reads without insertion quality scores for each base. [default is on]
|
||
|
|
*/
|
||
|
|
int8_t INSERTIONS_DEFAULT_QUALITY = 45;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace
|
||
|
|
* all base qualities in the read for this default value. Negative value turns it off. [default is on]
|
||
|
|
*/
|
||
|
|
int8_t DELETIONS_DEFAULT_QUALITY = 45;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Reads with low quality bases on either tail (beginning or end) will not be considered in the context. This parameter
|
||
|
|
* defines the quality below which (inclusive) a tail is considered low quality
|
||
|
|
*/
|
||
|
|
int8_t LOW_QUAL_TAIL = 2;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* BQSR generates a quantization table for quick quantization later by subsequent tools. BQSR does not quantize the base
|
||
|
|
* qualities, this is done by the engine with the -qq or -bqsr options. This parameter tells BQSR the number of levels of
|
||
|
|
* quantization to use to build the quantization table.
|
||
|
|
*/
|
||
|
|
int QUANTIZING_LEVELS = 16;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* The tag name for the binary tag covariate (if using it)
|
||
|
|
*/
|
||
|
|
string BINARY_TAG_NAME = "";
|
||
|
|
|
||
|
|
/**
|
||
|
|
* bqsr-baq-gap-open-penalty, BQSR BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better
|
||
|
|
* for whole genome call sets
|
||
|
|
*/
|
||
|
|
double BAQGOP = 40;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* This flag tells GATK not to modify quality scores less than this value. Instead they will be written out unmodified in
|
||
|
|
* the recalibrated BAM file. In general it's unsafe to change qualities scores below < 6, since base callers use these
|
||
|
|
* values to indicate random or bad bases. For example, Illumina writes Q2 bases when the machine has really gone wrong.
|
||
|
|
* This would be fine in and of itself, but when you select a subset of these reads based on their ability to align to the
|
||
|
|
* reference and their dinucleotide effect, your Q2 bin can be elevated to Q8 or Q10, leading to issues downstream.
|
||
|
|
*/
|
||
|
|
int PRESERVE_QSCORES_LESS_THAN = 6;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* enable-baq, do BAQ correction"
|
||
|
|
*/
|
||
|
|
bool enableBAQ = false;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* compute-indel-bqsr-tables, compute indel BQSR tables"
|
||
|
|
*/
|
||
|
|
bool computeIndelBQSRTables = false;
|
||
|
|
|
||
|
|
// --------------------------------------------------------------------------------------------------------------
|
||
|
|
//
|
||
|
|
// quality encoding checking arguments
|
||
|
|
//
|
||
|
|
// --------------------------------------------------------------------------------------------------------------
|
||
|
|
|
||
|
|
/**
|
||
|
|
* This flag tells GATK to use the original base qualities (that were in the data before BQSR/recalibration) which
|
||
|
|
* are stored in the OQ tag, if they are present, rather than use the post-recalibration quality scores. If no OQ
|
||
|
|
* tag is present for a read, the standard qual score will be used.
|
||
|
|
*/
|
||
|
|
bool useOriginalBaseQualities = false;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* If reads are missing some or all base quality scores, this value will be used for all base quality scores.
|
||
|
|
* By default this is set to -1 to disable default base quality assignment.
|
||
|
|
*/
|
||
|
|
int8_t defaultBaseQualities = -1;
|
||
|
|
};
|