176 lines
6.4 KiB
C++
176 lines
6.4 KiB
C++
/*
|
||
Description: BQSR
|
||
|
||
Copyright : All right reserved by ICT
|
||
|
||
Author : Zhang Zhonghai
|
||
Date : 2025/10/10
|
||
*/
|
||
#pragma once
|
||
|
||
#include <string>
|
||
#include <vector>
|
||
|
||
using std::string;
|
||
using std::vector;
|
||
|
||
namespace nsbqsr {
|
||
enum IndexFormat { BAI, CSI };
|
||
} // namespace nsbqsr
|
||
|
||
/* bqsr parameters */
|
||
struct BQSRArg {
|
||
// common parameters
|
||
|
||
string INPUT_FILE; // input bam filename
|
||
|
||
string OUTPUT_FILE; // output bam filename
|
||
|
||
int NUM_THREADS = 1;
|
||
|
||
size_t MAX_MEM = ((size_t)1) << 30; // 1G
|
||
|
||
bool DUPLEX_IO = true; //
|
||
|
||
/* "Whether to create an index when writing VCF or coordinate sorted BAM output.", common = true */
|
||
bool CREATE_INDEX = true;
|
||
|
||
nsbqsr::IndexFormat INDEX_FORMAT = nsbqsr::IndexFormat::BAI;
|
||
|
||
/* Add PG tag to each read in a SAM or BAM (PGTagArgumentCollection)*/
|
||
bool ADD_PG_TAG_TO_READS = true;
|
||
|
||
//
|
||
string CLI_STR;
|
||
|
||
//
|
||
string START_TIME;
|
||
|
||
string PROGRAM_RECORD_ID = "FastBQSR";
|
||
|
||
// reference file
|
||
string REFERENCE_FILE;
|
||
|
||
// known sites vcf files
|
||
vector<string> KNOWN_SITES_VCFS;
|
||
|
||
// end of common parameters
|
||
|
||
// We always use the same covariates. The field is retained for compatibility with GATK3 reports.
|
||
bool DO_NOT_USE_STANDARD_COVARIATES = false;
|
||
|
||
//It makes no sense to run BQSR without sites. so we remove this option.
|
||
bool RUN_WITHOUT_DBSNP = false;
|
||
|
||
// We don't support SOLID. The field is retained for compatibility with GATK3 reports.
|
||
string SOLID_RECAL_MODE = "SET_Q_ZERO";
|
||
string SOLID_NOCALL_STRATEGY = "THROW_EXCEPTION";
|
||
|
||
//@Hidden @Argument(fullName = "default-platform", optional = true,
|
||
// doc = "If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.") public String
|
||
string DEFAULT_PLATFORM = "";
|
||
|
||
// @Hidden @Argument(fullName = "force-platform", optional = true,
|
||
// doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and "solid.")
|
||
string FORCE_PLATFORM = "";
|
||
|
||
string existingRecalibrationReport = "";
|
||
|
||
/**
|
||
* The context covariate will use a context of this size to calculate its covariate value for base mismatches. Must be
|
||
* between 1 and 13 (inclusive). Note that higher values will increase runtime and required java heap size.
|
||
*/
|
||
int MISMATCHES_CONTEXT_SIZE = 2;
|
||
|
||
/**
|
||
* The context covariate will use a context of this size to calculate its covariate value for base insertions and deletions.
|
||
* Must be between 1 and 13 (inclusive). Note that higher values will increase runtime and required java heap size.
|
||
*/
|
||
int INDELS_CONTEXT_SIZE = 3;
|
||
|
||
/**
|
||
* The cycle covariate will generate an error if it encounters a cycle greater than this value.
|
||
* This argument is ignored if the Cycle covariate is not used.
|
||
*/
|
||
int MAXIMUM_CYCLE_VALUE = 500;
|
||
|
||
/**
|
||
* A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace
|
||
* all base qualities in the read for this default value. Negative value turns it off. [default is off]
|
||
*/
|
||
int8_t MISMATCHES_DEFAULT_QUALITY = -1;
|
||
|
||
/**
|
||
* A default base qualities to use as a prior (reported quality) in the insertion covariate model. This parameter is used
|
||
* for all reads without insertion quality scores for each base. [default is on]
|
||
*/
|
||
int8_t INSERTIONS_DEFAULT_QUALITY = 45;
|
||
|
||
/**
|
||
* A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace
|
||
* all base qualities in the read for this default value. Negative value turns it off. [default is on]
|
||
*/
|
||
int8_t DELETIONS_DEFAULT_QUALITY = 45;
|
||
|
||
/**
|
||
* Reads with low quality bases on either tail (beginning or end) will not be considered in the context. This parameter
|
||
* defines the quality below which (inclusive) a tail is considered low quality
|
||
*/
|
||
int8_t LOW_QUAL_TAIL = 2;
|
||
|
||
/**
|
||
* BQSR generates a quantization table for quick quantization later by subsequent tools. BQSR does not quantize the base
|
||
* qualities, this is done by the engine with the -qq or -bqsr options. This parameter tells BQSR the number of levels of
|
||
* quantization to use to build the quantization table.
|
||
*/
|
||
int QUANTIZING_LEVELS = 16;
|
||
|
||
/**
|
||
* The tag name for the binary tag covariate (if using it)
|
||
*/
|
||
string BINARY_TAG_NAME = "";
|
||
|
||
/**
|
||
* bqsr-baq-gap-open-penalty, BQSR BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better
|
||
* for whole genome call sets
|
||
*/
|
||
double BAQGOP = 40;
|
||
|
||
/**
|
||
* This flag tells GATK not to modify quality scores less than this value. Instead they will be written out unmodified in
|
||
* the recalibrated BAM file. In general it's unsafe to change qualities scores below < 6, since base callers use these
|
||
* values to indicate random or bad bases. For example, Illumina writes Q2 bases when the machine has really gone wrong.
|
||
* This would be fine in and of itself, but when you select a subset of these reads based on their ability to align to the
|
||
* reference and their dinucleotide effect, your Q2 bin can be elevated to Q8 or Q10, leading to issues downstream.
|
||
*/
|
||
int PRESERVE_QSCORES_LESS_THAN = 6;
|
||
|
||
/**
|
||
* enable-baq, do BAQ correction" (base alignment quality), 在GATK里hidden了,用不到了?
|
||
*/
|
||
bool enableBAQ = false;
|
||
|
||
/**
|
||
* compute-indel-bqsr-tables, compute indel BQSR tables"
|
||
*/
|
||
bool computeIndelBQSRTables = false;
|
||
|
||
// --------------------------------------------------------------------------------------------------------------
|
||
//
|
||
// quality encoding checking arguments
|
||
//
|
||
// --------------------------------------------------------------------------------------------------------------
|
||
|
||
/**
|
||
* This flag tells GATK to use the original base qualities (that were in the data before BQSR/recalibration) which
|
||
* are stored in the OQ tag, if they are present, rather than use the post-recalibration quality scores. If no OQ
|
||
* tag is present for a read, the standard qual score will be used.
|
||
*/
|
||
bool useOriginalBaseQualities = false;
|
||
|
||
/**
|
||
* If reads are missing some or all base quality scores, this value will be used for all base quality scores.
|
||
* By default this is set to -1 to disable default base quality assignment.
|
||
*/
|
||
int8_t defaultBaseQualities = -1;
|
||
}; |