diff --git a/.vscode/launch.json b/.vscode/launch.json
index 9b52ffc..ca8917a 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -12,7 +12,14 @@
             "request": "launch",
             "program": "${workspaceRoot}/build/bin/picard_cpp",
             "args": [
-                "MarkDuplicates"
+                "MarkDuplicates",
+                "--INPUT", "test.bam",
+                "--OUTPUT", "out.bam",
+                "--METRICS_FILE", "metrics.txt",
+                "--num_threads", "12",
+                "--max_mem", "4G",
+                "--verbosity", "DEBUG",
+                "--asyncio", "true",
             ],
             "cwd": "${workspaceFolder}", // 当前工作路径：当前文件所在的工作空间
         }
diff --git a/.vscode/settings.json b/.vscode/settings.json
index f66631f..6c73617 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,5 +1,7 @@
 {
     "files.associations": {
-        "cstring": "cpp"
+        "cstring": "cpp",
+        "vector": "cpp",
+        "random": "cpp"
     }
 }
\ No newline at end of file
diff --git a/build.sh b/build.sh
new file mode 100755
index 0000000..963fc94
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+dir="/home/zzh/work/GeneKit/picard_cpp/build"
+#[ -d "$dir" ] && rm -rf "$dir"
+#mkdir "$dir"
+cd "$dir"
+cmake .. -DCMAKE_BUILD_TYPE=Debug
+#cmake .. -DCMAKE_BUILD_TYPE=Release
+make -j 8
diff --git a/run.sh b/run.sh
new file mode 100755
index 0000000..ede0c45
--- /dev/null
+++ b/run.sh
@@ -0,0 +1,8 @@
+/home/zzh/work/GeneKit/picard_cpp/build/bin/picard_cpp \
+    MarkDuplicates \
+    --INPUT test.bam \
+    --OUTPUT out.bam \
+    --num_threads 12 \
+    --max_mem 4G \
+    --verbosity DEBUG \
+    --asyncio true
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 35908d1..5286867 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -3,7 +3,7 @@ SET(EXECUTABLE_OUTPUT_PATH "${PROJECT_BINARY_DIR}/bin")
 
 # 源码目录
 AUX_SOURCE_DIRECTORY(${PROJECT_SOURCE_DIR}/src MAIN_SRC)
-# AUX_SOURCE_DIRECTORY(${PROJECT_SOURCE_DIR}/src/common COMMON)
+AUX_SOURCE_DIRECTORY(${PROJECT_SOURCE_DIR}/src/common COMMON)
 AUX_SOURCE_DIRECTORY(${PROJECT_SOURCE_DIR}/src/sam SAM_SRC)
 AUX_SOURCE_DIRECTORY(${PROJECT_SOURCE_DIR}/src/sam/markdups SAM_MARKDUPS_SRC)
 
@@ -19,7 +19,7 @@ LINK_DIRECTORIES("${PROJECT_SOURCE_DIR}/lib/htslib")
 set(PG_NAME "picard_cpp")
 
 # 为程序添加依赖关系
-ADD_EXECUTABLE(${PG_NAME} ${MAIN_SRC} ${SAM_SRC} ${SAM_MARKDUPS_SRC})
+ADD_EXECUTABLE(${PG_NAME} ${MAIN_SRC} ${COMMON} ${SAM_SRC} ${SAM_MARKDUPS_SRC})
 
 # 链接库
 TARGET_LINK_LIBRARIES(${PG_NAME} libhts.a)
diff --git a/src/common/global_arg.cpp b/src/common/global_arg.cpp
new file mode 100644
index 0000000..1588147
--- /dev/null
+++ b/src/common/global_arg.cpp
@@ -0,0 +1,109 @@
+/*
+     Description: 全局参数，所有模块都可能用到的参数
+
+     Copyright : All right reserved by NCIC.ICT
+
+     Author : Zhang Zhonghai
+     Date : 2023/10/23
+*/
+
+#include "global_arg.h"
+
+#include <cstring>
+#include <vector>
+#include <getopt.h>
+#include <iostream>
+
+using std::vector;
+
+/*
+ * GlobalArg 类
+ */
+
+struct option *GlobalArg::GLOBAL_OPT = nullptr;
+
+// 初始化参数
+void GlobalArg::initGlobalOptions()
+{
+    vector<struct option> v;
+    v.push_back({"INPUT", required_argument, NULL, ns_ga::GlobalOptEnum::OPT_INPUT}); // 输入文件
+    v.push_back({"OUTPUT", required_argument, NULL, ns_ga::GlobalOptEnum::OPT_OUTPUT}); // 输出文件
+    v.push_back({"num_threads", required_argument, NULL, ns_ga::GlobalOptEnum::OPT_NUM_THREADS});
+    v.push_back({"max_mem", required_argument, NULL, ns_ga::GlobalOptEnum::OPT_MAX_MEM});
+    v.push_back({"verbosity", required_argument, NULL, ns_ga::GlobalOptEnum::OPT_LOG_LEVEL});
+    v.push_back({"asyncio", required_argument, NULL, ns_ga::GlobalOptEnum::OPT_ASYNCIO});
+    v.push_back({"version", no_argument, NULL, ns_ga::GlobalOptEnum::OPT_VERSION});
+    v.push_back({"help", no_argument, NULL, ns_ga::GlobalOptEnum::OPT_HELP});
+    v.push_back({0, 0, 0, 0});
+
+    GLOBAL_OPT = new struct option[GLOBAL_ARG_CNT];
+    memcpy(GLOBAL_OPT, v.data(), v.size() * sizeof(struct option));
+
+    /* 添加帮助信息, 按arg enum顺序进行添加信息 */
+    vArgInfo.push_back("--INPUT                       Input file path (bam, vcf ...)\n");
+    vArgInfo.push_back("--OUTPUT                      Output file path \n");
+    vArgInfo.push_back("--num_threads <num_threads>   Number of threads to allocate to this analysis [1]\n");
+    vArgInfo.push_back("--max_mem <max_mem>           Set maximum memory; suffix K/M/G recognized [2G]\n");
+    vArgInfo.push_back("--verbosity <log level>       Control verbosity of logging. error/warning/info/debug [info]\n");
+    vArgInfo.push_back("--asyncio                     Use async io [true]\n");
+    vArgInfo.push_back("--version                     Output version information\n");
+    vArgInfo.push_back("--help                        Generate the help message\n");
+}
+
+// 解析参数
+void GlobalArg::parseArgument(int argNum)
+{
+    using namespace ns_ga;
+    switch (argNum)
+    {
+        case OPT_INPUT:
+            in_fn = optarg;
+            break;
+        case OPT_OUTPUT:
+            out_fn = optarg;
+            break;
+        case OPT_NUM_THREADS:
+            num_threads = std::stoi(optarg);
+            break;
+        case OPT_MAX_MEM:
+        {
+            char *q;
+            size_t mem_arg = strtol(optarg, &q, 0);
+            if (*q == 'k' || *q == 'K')
+                mem_arg <<= 10;
+            else if (*q == 'm' || *q == 'M')
+                mem_arg <<= 20;
+            else if (*q == 'g' || *q == 'G')
+                mem_arg <<= 30;
+            if (mem_arg >= max_mem)
+                max_mem = mem_arg;
+            else
+            {
+                std::cerr << "[Warn] Too small mem size, use default" << std::endl;
+            }
+            break;
+        }
+        case OPT_LOG_LEVEL:
+        {
+            if (strcmp("ERROR", optarg) == 0)
+                verbosity = ns_ga::ERROR;
+            else if (strcmp("WARNING", optarg) == 0)
+                verbosity = ns_ga::WARNING;
+            else if (strcmp("INFO", optarg) == 0)
+                verbosity = ns_ga::INFO;
+            else if (strcmp("DEBUG", optarg) == 0)
+                verbosity = ns_ga::DEBUG;
+            break;
+        }
+        case OPT_ASYNCIO:
+        {
+            if (strcmp("true", optarg) == 0)
+                use_asyncio = true;
+            else if (strcmp("false", optarg) == 0)
+                use_asyncio = false;
+            break;
+        }
+        default:
+            break;
+    }
+}
\ No newline at end of file
diff --git a/src/common/global_arg.h b/src/common/global_arg.h
new file mode 100644
index 0000000..4ae614e
--- /dev/null
+++ b/src/common/global_arg.h
@@ -0,0 +1,105 @@
+/*
+Description: picard_cpp共享的一些参数
+
+Copyright : All right reserved by NCIC.ICT
+
+Author : Zhang Zhonghai
+Date : 2023/10/23
+*/
+#ifndef GLOBAL_ARG_H_
+#define GLOBAL_ARG_H_
+
+#include <string>
+#include <map>
+#include <vector>
+#include <getopt.h>
+#include <stdio.h>
+
+using std::map;
+using std::string;
+using std::vector;
+
+namespace ns_ga {
+    enum GlobalOptEnum
+    {
+        _START_NUM = 1,
+        OPT_INPUT,
+        OPT_OUTPUT,
+        OPT_NUM_THREADS,
+        OPT_MAX_MEM,
+        OPT_LOG_LEVEL,
+        OPT_ASYNCIO,
+        OPT_VERSION,
+        OPT_HELP,
+        _END_NUM
+    };
+
+    // log level
+    enum LogLevelEnum
+    {
+        ERROR,
+        WARNING,
+        INFO,
+        DEBUG
+    };
+}
+
+/* 全局共享的一些参数 */
+struct GlobalArg
+{
+    const static int GLOBAL_ARG_CNT = ns_ga::GlobalOptEnum::_END_NUM - ns_ga::GlobalOptEnum::_START_NUM; // 这里不需要减1
+    static struct option *GLOBAL_OPT;
+
+    string in_fn; // input bam filename
+    string out_fn; // output bam filename
+    int num_threads = 1; // 线程个数
+    size_t max_mem = ((size_t)2) << 30; // 最小2G
+    ns_ga::LogLevelEnum verbosity = ns_ga::INFO; // 打印信息级别
+    bool use_asyncio = true; // 是否使用异步io
+
+    vector<string> vArgInfo; // 每个参数的帮助信息
+
+    // 单例模式
+    GlobalArg(const GlobalArg &) = delete;
+    GlobalArg &operator=(const GlobalArg &) = delete;
+
+    // 获取单例
+    static GlobalArg &Instance()
+    {
+        static GlobalArg instance;
+        return instance;
+    }
+    // 初始化参数
+    void initGlobalOptions();
+
+    // 解析参数
+    void parseArgument(int argNum);
+
+    // 获取对应参数在数组（option和help info）中的索引
+    int getArgIndx(ns_ga::GlobalOptEnum opt)
+    {
+        return opt - ns_ga::GlobalOptEnum::OPT_INPUT;
+    }
+
+    // 打印某个参数的帮助信息
+    void printArgInfo(ns_ga::GlobalOptEnum arg) {
+        int idx = getArgIndx(arg);
+        fprintf(stdout, "%s\n", vArgInfo[idx].c_str());
+    }
+
+    void printArgValue() {
+        printf("--INPUT       = %s\n", in_fn.c_str());
+        printf("--OUTPUT      = %s\n", out_fn.c_str());
+        printf("--num_threads = %d\n",num_threads);
+        printf("--max_mem     = %ld\n", max_mem);
+        printf("--verbosity   = %d\n", verbosity);
+        printf("--asyncio     = %d\n", use_asyncio);
+    }
+private : 
+    GlobalArg()
+    {
+        initGlobalOptions();
+    };
+};
+
+#endif
\ No newline at end of file
diff --git a/src/sam/markdups/markdups.cpp b/src/sam/markdups/markdups.cpp
index 48dbad6..f33e97c 100644
--- a/src/sam/markdups/markdups.cpp
+++ b/src/sam/markdups/markdups.cpp
@@ -6,6 +6,8 @@ Copyright : All right reserved by ICT
 Author : Zhang Zhonghai
 Date : 2023/10/23
 */
+#include "markdups_arg.h"
+#include <common/global_arg.h>
 
 #include <iostream>
 
@@ -16,11 +18,18 @@ using namespace std;
  */
 int MarkDuplicates(int argc, char *argv[])
 {
-    cout << argc << endl;
-    for (int i = 0; i < argc; ++i) {
-        cout << argv[i] << '\t';
-    }
-    cout << endl;
+    // cout << argc << endl;
+    // for (int i = 0; i < argc; ++i) {
+    //     cout << argv[i] << '\t';
+    // }
+    // cout << endl;
+
+    GlobalArg &gArg = GlobalArg::Instance();
+    MarkDupsArg mdArg;
+    vector<AuxVar> vAuxVar;
+    mdArg.parseArgument(argc, argv, &vAuxVar, &gArg);
+
+    // cout << ns_md::ValidationStringency::DEFAULT_STRINGENCY << '\t' << ns_md::ValidationStringency::SILENT << endl;
 
     return 0;
 }
\ No newline at end of file
diff --git a/src/sam/markdups/markdups_arg.cpp b/src/sam/markdups/markdups_arg.cpp
index e69de29..97b7c31 100644
--- a/src/sam/markdups/markdups_arg.cpp
+++ b/src/sam/markdups/markdups_arg.cpp
@@ -0,0 +1,488 @@
+/*
+Description: Markduplicate需要用到的一些参数，读取命令行给的参数，并做一些初始化
+
+Copyright : All right reserved by ICT
+
+Author : Zhang Zhonghai
+Date : 2023/10/27
+*/
+
+#include "markdups_arg.h"
+#include "common/global_arg.h"
+
+#include <cstring>
+#include <vector>
+#include <string>
+#include <sstream>
+#include <getopt.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#include <iostream>
+using std::cout, std::endl;
+
+using std::ostringstream;
+using std::stod;
+using std::stoi;
+using std::stol;
+using std::string;
+using std::vector;
+
+using namespace ns_md;
+
+/*
+ * mutect参数
+ */
+const static struct option kMdOpts[] = {
+    {"MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP", required_argument, NULL, MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP},
+    {"MAX_FILE_HANDLES_FOR_READ_ENDS_MAP", required_argument, NULL, MAX_FILE_HANDLES_FOR_READ_ENDS_MAP},
+    {"SORTING_COLLECTION_SIZE_RATIO", required_argument, NULL, SORTING_COLLECTION_SIZE_RATIO},
+    {"BARCODE_TAG", required_argument, NULL, BARCODE_TAG},
+    {"READ_ONE_BARCODE_TAG", required_argument, NULL, READ_ONE_BARCODE_TAG},
+    {"READ_TWO_BARCODE_TAG", required_argument, NULL, READ_TWO_BARCODE_TAG},
+    {"TAG_DUPLICATE_SET_MEMBERS", required_argument, NULL, TAG_DUPLICATE_SET_MEMBERS},
+    {"REMOVE_SEQUENCING_DUPLICATES", required_argument, NULL, REMOVE_SEQUENCING_DUPLICATES},
+    {"TAGGING_POLICY", required_argument, NULL, TAGGING_POLICY},
+    {"CLEAR_DT", required_argument, NULL, CLEAR_DT},
+    {"DUPLEX_UMI", required_argument, NULL, DUPLEX_UMI},
+    {"MOLECULAR_IDENTIFIER_TAG", required_argument, NULL, MOLECULAR_IDENTIFIER_TAG},
+    {"METRICS_FILE", required_argument, NULL, METRICS_FILE},
+    {"REMOVE_DUPLICATES", required_argument, NULL, REMOVE_DUPLICATES},
+    {"ASSUME_SORTED", required_argument, NULL, ASSUME_SORTED},
+    {"ASSUME_SORT_ORDER", required_argument, NULL, ASSUME_SORT_ORDER},
+    {"DUPLICATE_SCORING_STRATEGY", required_argument, NULL, DUPLICATE_SCORING_STRATEGY},
+    {"PROGRAM_RECORD_ID", required_argument, NULL, PROGRAM_RECORD_ID},
+    {"PROGRAM_GROUP_VERSION", required_argument, NULL, PROGRAM_GROUP_VERSION},
+    {"PROGRAM_GROUP_COMMAND_LINE", required_argument, NULL, PROGRAM_GROUP_COMMAND_LINE},
+    {"PROGRAM_GROUP_NAME", required_argument, NULL, PROGRAM_GROUP_NAME},
+    {"COMMENT", required_argument, NULL, COMMENT},
+    {"READ_NAME_REGEX", required_argument, NULL, READ_NAME_REGEX},
+    {"OPTICAL_DUPLICATE_PIXEL_DISTANCE", required_argument, NULL, OPTICAL_DUPLICATE_PIXEL_DISTANCE},
+    {"MAX_OPTICAL_DUPLICATE_SET_SIZE", required_argument, NULL, MAX_OPTICAL_DUPLICATE_SET_SIZE},
+    {"QUIET", required_argument, NULL, QUIET},
+    {"VALIDATION_STRINGENCY", required_argument, NULL, VALIDATION_STRINGENCY},
+    {"COMPRESSION_LEVEL", required_argument, NULL, COMPRESSION_LEVEL},
+    {"MAX_RECORDS_IN_RAM", required_argument, NULL, MAX_RECORDS_IN_RAM},
+    {"CREATE_INDEX", required_argument, NULL, CREATE_INDEX},
+    {"CREATE_MD5_FILE", required_argument, NULL, CREATE_MD5_FILE}};
+
+// 判断bool类型的参数
+void setBoolArg(bool *arg) {
+    if (strcmp("true", optarg) == 0)
+        *arg = true;
+    else if (strcmp("false", optarg) == 0)
+        *arg = false;
+}
+
+// 解析参数
+void MarkDupsArg::parseArgument(int argc,
+                                char **argv,
+                                vector<AuxVar> *pvAuxVar,
+                                GlobalArg *pGArg) 
+{
+    auto &vAuxVar = *pvAuxVar;
+    auto &gArg = *pGArg;
+
+    struct option allOpt[MarkDupsArg::ARG_COUNT + GlobalArg::GLOBAL_ARG_CNT];
+
+    memcpy(allOpt, kMdOpts, MarkDupsArg::ARG_COUNT * sizeof(struct option));
+    memcpy(&allOpt[MarkDupsArg::ARG_COUNT], GlobalArg::GLOBAL_OPT, GlobalArg::GLOBAL_ARG_CNT * sizeof(struct option));
+
+    // int cnt = MarkDupsArg::ARG_COUNT + GlobalArg::GLOBAL_ARG_CNT;
+    // cout << cnt << endl;
+    // for (int i = 0; i < cnt; ++i)
+    // {
+    //     cout << i << '\t' << allOpt[i].name << endl;
+    // }
+
+    int c;
+    while ((c = getopt_long_only(argc, argv, "", allOpt, NULL)) >= 0)
+    {
+
+        gArg.parseArgument(c);
+        switch (c)
+        {
+        case ns_ga::OPT_VERSION:
+            PrintVersion();
+            exit(0);
+        case ns_ga::OPT_HELP:
+            PrintHelp();
+            exit(0);
+        case ns_md::MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP:
+            MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP = stoi(optarg);
+            break;
+        case ns_md::MAX_FILE_HANDLES_FOR_READ_ENDS_MAP:
+            MAX_FILE_HANDLES_FOR_READ_ENDS_MAP = stoi(optarg);
+            break;
+        case ns_md::SORTING_COLLECTION_SIZE_RATIO:
+            SORTING_COLLECTION_SIZE_RATIO = stod(optarg);
+            break;
+        case ns_md::BARCODE_TAG:
+            BARCODE_TAG = optarg;
+            break;
+        case ns_md::READ_ONE_BARCODE_TAG:
+            READ_ONE_BARCODE_TAG = optarg;
+            break;
+        case ns_md::READ_TWO_BARCODE_TAG:
+            READ_TWO_BARCODE_TAG = optarg;
+            break;
+        case ns_md::TAG_DUPLICATE_SET_MEMBERS:
+            setBoolArg(&TAG_DUPLICATE_SET_MEMBERS);
+            break;
+        case ns_md::REMOVE_SEQUENCING_DUPLICATES:
+            setBoolArg(&REMOVE_SEQUENCING_DUPLICATES);
+            break;
+        case ns_md::TAGGING_POLICY:
+            if (strcmp("DontTag", optarg) == 0)
+                TAGGING_POLICY = ns_md::DuplicateTaggingPolicy::DontTag;
+            else if (strcmp("OpticalOnly", optarg) == 0)
+                TAGGING_POLICY = ns_md::DuplicateTaggingPolicy::OpticalOnly;
+            else if (strcmp("All", optarg) == 0)
+                TAGGING_POLICY = ns_md::DuplicateTaggingPolicy::All;
+            break;
+        case ns_md::CLEAR_DT:
+            setBoolArg(&CLEAR_DT);
+            break;
+        case ns_md::DUPLEX_UMI:
+            setBoolArg(&DUPLEX_UMI);
+            break;
+        case ns_md::MOLECULAR_IDENTIFIER_TAG:
+            MOLECULAR_IDENTIFIER_TAG = optarg;
+            break;
+        case ns_md::METRICS_FILE:
+            METRICS_FILE = optarg;
+            break;
+        case ns_md::REMOVE_DUPLICATES:
+            setBoolArg(&REMOVE_DUPLICATES);
+            break;
+        case ns_md::ASSUME_SORTED:
+            setBoolArg(&ASSUME_SORTED);
+            break;
+        case ns_md::ASSUME_SORT_ORDER:
+            if (strcmp("unsorted", optarg) == 0)
+                ASSUME_SORT_ORDER = ns_md::SortOrder::unsorted;
+            else if (strcmp("queryname", optarg) == 0)
+                ASSUME_SORT_ORDER = ns_md::SortOrder::queryname;
+            else if (strcmp("coordinate", optarg) == 0)
+                ASSUME_SORT_ORDER = ns_md::SortOrder::coordinate;
+            else if (strcmp("duplicate", optarg) == 0)
+                ASSUME_SORT_ORDER = ns_md::SortOrder::duplicate;
+            else if (strcmp("unknown", optarg) == 0)
+                ASSUME_SORT_ORDER = ns_md::SortOrder::unknown;
+            break;
+        case ns_md::DUPLICATE_SCORING_STRATEGY:
+            if (strcmp("SUM_OF_BASE_QUALITIES", optarg) == 0)
+                DUPLICATE_SCORING_STRATEGY = ns_md::ScoringStrategy::SUM_OF_BASE_QUALITIES;
+            else if (strcmp("TOTAL_MAPPED_REFERENCE_LENGTH", optarg) == 0)
+                DUPLICATE_SCORING_STRATEGY = ns_md::ScoringStrategy::TOTAL_MAPPED_REFERENCE_LENGTH;
+            else if (strcmp("RANDOM", optarg) == 0)
+                DUPLICATE_SCORING_STRATEGY = ns_md::ScoringStrategy::RANDOM;
+            break;
+        case ns_md::PROGRAM_RECORD_ID:
+            PROGRAM_RECORD_ID = optarg;
+            break;
+        case ns_md::PROGRAM_GROUP_VERSION:
+            PROGRAM_GROUP_VERSION = optarg;
+            break;
+        case ns_md::PROGRAM_GROUP_COMMAND_LINE:
+            PROGRAM_GROUP_COMMAND_LINE = optarg;
+            break;
+        case ns_md::PROGRAM_GROUP_NAME:
+            PROGRAM_GROUP_NAME = optarg;
+            break;
+        case ns_md::COMMENT:
+            COMMENT.push_back(optarg);
+            break;
+        case ns_md::READ_NAME_REGEX:
+            READ_NAME_REGEX = optarg;
+            break;
+        case ns_md::OPTICAL_DUPLICATE_PIXEL_DISTANCE:
+            OPTICAL_DUPLICATE_PIXEL_DISTANCE = stoi(optarg);
+            break;
+        case ns_md::MAX_OPTICAL_DUPLICATE_SET_SIZE:
+            MAX_OPTICAL_DUPLICATE_SET_SIZE = stol(optarg);
+            break;
+        case ns_md::QUIET:
+            setBoolArg(&QUIET);
+            break;
+        case ns_md::VALIDATION_STRINGENCY:
+            if (strcmp("STRICT", optarg) == 0)
+                VALIDATION_STRINGENCY = ns_md::ValidationStringency::STRICT;
+            else if (strcmp("LENIENT", optarg) == 0)
+                VALIDATION_STRINGENCY = ns_md::ValidationStringency::LENIENT;
+            else if (strcmp("SILENT", optarg) == 0)
+                VALIDATION_STRINGENCY = ns_md::ValidationStringency::SILENT;
+            break;
+        case ns_md::COMPRESSION_LEVEL:
+            COMPRESSION_LEVEL = stoi(optarg);
+            break;
+        case ns_md::MAX_RECORDS_IN_RAM:
+            MAX_RECORDS_IN_RAM = stoi(optarg);
+            break;
+        case ns_md::CREATE_INDEX:
+            setBoolArg(&CREATE_INDEX);
+            break;
+        case ns_md::CREATE_MD5_FILE:
+            setBoolArg(&CREATE_MD5_FILE);
+            break;
+        default:
+            break;
+        }
+    }
+
+    gArg.printArgValue();
+}
+
+// 打印版本信息
+void MarkDupsArg::PrintVersion()
+{
+    fprintf(stdout, "\n MarkDuplicate Version: %s\n", MARKDUPLICATE_VERSION);
+}
+
+// 释放资源，关闭文件等
+void MarkDupsArg::Finalize(MarkDupsArg *pMdArg,
+                           vector<AuxVar> *pvAuxVar,
+                           GlobalArg *pGArg)
+{
+}
+
+// 打印帮助信息
+void MarkDupsArg::PrintHelp()
+{
+    FILE *fp = stdout;
+    fprintf(fp,
+            "Usage: MarkDuplicates [arguments]\n"
+            "\n"
+            "Example:\n"
+            "  ./picard_cpp MarkDuplicates --num_thread 4 --INPUT input.bam --OUTPUT marked_duplicates.bam --METRICS_FILE marked_dup_metrics.txt\n"
+            "\n"
+            "Required Arguments:\n"
+            "\n"
+            "--INPUT,-I <String>           One or more input SAM, BAM or CRAM files to analyze. Must be coordinate sorted.  This\n"
+            "                              argument must be specified at least once.Required.\n"
+            "\n"
+            "--METRICS_FILE,-M <File>      File to write duplication metrics to  Required.\n"
+            "\n"
+            "--OUTPUT,-O <File>            The output file to write marked records to  Required.\n"
+            "\n"
+            "\n"
+            "Optional Arguments:\n"
+            "\n"
+            "--ADD_PG_TAG_TO_READS <Boolean>\n"
+            "                              Add PG tag to each read in a SAM or BAM  Default value: true. Possible values: {true,\n"
+            "                              false}\n"
+            "\n"
+            "--arguments_file <File>       read one or more arguments files and add them to the command line  This argument may be\n"
+            "                              specified 0 or more times. Default value: null.\n"
+            "\n"
+            "--ASSUME_SORT_ORDER,-ASO <SortOrder>\n"
+            "                              If not null, assume that the input file has this order even if the header says otherwise.\n"
+            "                              Default value: null. Possible values: {unsorted, queryname, coordinate, duplicate,\n"
+            "                              unknown}  Cannot be used in conjunction with argument(s) ASSUME_SORTED (AS)\n"
+            "\n"
+            "\n"
+            "--ASSUME_SORTED,-AS <Boolean> If true, assume that the input file is coordinate sorted even if the header says\n"
+            "                              otherwise. Deprecated, used ASSUME_SORT_ORDER=coordinate instead.  Default value: false.\n"
+            "                              Possible values: {true, false}  Cannot be used in conjunction with argument(s)\n"
+            "                              ASSUME_SORT_ORDER (ASO)\n"
+            "\n"
+            "--BARCODE_TAG <String>        Barcode SAM tag (ex. BC for 10X Genomics)  Default value: null.\n"
+            "\n"
+            "--CLEAR_DT <Boolean>          Clear DT tag from input SAM records. Should be set to false if input SAM doesn't have this\n"
+            "                              tag.  Default true  Default value: true. Possible values: {true, false}\n"
+            "\n"
+            "--COMMENT,-CO <String>        Comment(s) to include in the output file's header.  This argument may be specified 0 or\n"
+            "                              more times. Default value: null.\n"
+            "\n"
+            "--COMPRESSION_LEVEL <Integer> Compression level for all compressed files created (e.g. BAM and VCF).  Default value: 5.\n"
+            "\n"
+            "--CREATE_INDEX <Boolean>      Whether to create an index when writing VCF or coordinate sorted BAM output.  Default\n"
+            "                              value: false. Possible values: {true, false}\n"
+            "\n"
+            "--CREATE_MD5_FILE <Boolean>   Whether to create an MD5 digest for any BAM or FASTQ files created.    Default value:\n"
+            "                              false. Possible values: {true, false}\n"
+            "\n"
+            "--DUPLEX_UMI <Boolean>        Treat UMIs as being duplex stranded.  This option requires that the UMI consist of two\n"
+            "                              equal length strings that are separated by a hyphen (e.g. 'ATC-GTC'). Reads are considered\n"
+            "                              duplicates if, in addition to standard definition, have identical normalized UMIs.  A UMI\n"
+            "                              from the 'bottom' strand is normalized by swapping its content around the hyphen (eg.\n"
+            "                              ATC-GTC becomes GTC-ATC).  A UMI from the 'top' strand is already normalized as it is.\n"
+            "                              Both reads from a read pair considered top strand if the read 1 unclipped 5' coordinate is\n"
+            "                              less than the read 2 unclipped 5' coordinate. All chimeric reads and read fragments are\n"
+            "                              treated as having come from the top strand. With this option is it required that the\n"
+            "                              BARCODE_TAG hold non-normalized UMIs. Default false.  Default value: false. Possible\n"
+            "                              values: {true, false}\n"
+            "\n"
+            "--DUPLICATE_SCORING_STRATEGY,-DS <ScoringStrategy>\n"
+            "                              The scoring strategy for choosing the non-duplicate among candidates.  Default value:\n"
+            "                              SUM_OF_BASE_QUALITIES. Possible values: {SUM_OF_BASE_QUALITIES,\n"
+            "                              TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM}\n"
+            "\n"
+            "--FLOW_EFFECTIVE_QUALITY_THRESHOLD <Integer>\n"
+            "                              Threshold for considering a quality value high enough to be included when calculating\n"
+            "                              FLOW_QUALITY_SUM_STRATEGY calculation.  Default value: 15.\n"
+            "\n"
+            "--FLOW_MODE <Boolean>         enable parameters and behavior specific to flow based reads.  Default value: false.\n"
+            "                              Possible values: {true, false}\n"
+            "\n"
+            "--FLOW_Q_IS_KNOWN_END <Boolean>\n"
+            "                              Treat position of read trimming based on quality as the known end (relevant for flow based\n"
+            "                              reads). Default false - if the read is trimmed on quality its end is not defined and the\n"
+            "                              read is duplicate of any read starting at the same place.  Default value: false. Possible\n"
+            "                              values: {true, false}\n"
+            "\n"
+            "--FLOW_QUALITY_SUM_STRATEGY <Boolean>\n"
+            "                              Use specific quality summing strategy for flow based reads. The strategy ensures that the\n"
+            "                              same (and correct) quality value is used for all bases of the same homopolymer.  Default\n"
+            "                              value: false. Possible values: {true, false}\n"
+            "\n"
+            "--FLOW_SKIP_FIRST_N_FLOWS <Integer>\n"
+            "                              Skip first N flows, starting from the read's start, when considering duplicates. Useful\n"
+            "                              for flow based reads where sometimes there is noise in the first flows (for this argument,\n"
+            "                              \" read start \" means 5' end).  Default value: 0.\n"
+            "\n"
+            "--help,-h <Boolean>           display the help message  Default value: false. Possible values: {true, false}\n"
+            "\n"
+            "--MAX_FILE_HANDLES_FOR_READ_ENDS_MAP,-MAX_FILE_HANDLES <Integer>\n"
+            "                              Maximum number of file handles to keep open when spilling read ends to disk. Set this\n"
+            "                              number a little lower than the per-process maximum number of file that may be open. This\n"
+            "                              number can be found by executing the 'ulimit -n' command on a Unix system.  Default value:\n"
+            "                              8000.\n"
+            "\n"
+            "--MAX_OPTICAL_DUPLICATE_SET_SIZE <Long>\n"
+            "                              This number is the maximum size of a set of duplicate reads for which we will attempt to\n"
+            "                              determine which are optical duplicates.  Please be aware that if you raise this value too\n"
+            "                              high and do encounter a very large set of duplicate reads, it will severely affect the\n"
+            "                              runtime of this tool.  To completely disable this check, set the value to -1.  Default\n"
+            "                              value: 300000.\n"
+            "\n"
+            "--MAX_RECORDS_IN_RAM <Integer>When writing files that need to be sorted, this will specify the number of records stored\n"
+            "                              in RAM before spilling to disk. Increasing this number reduces the number of file handles\n"
+            "                              needed to sort the file, and increases the amount of RAM needed.  Default value: 500000.\n"
+            "\n"
+            "--MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP,-MAX_SEQS <Integer>\n"
+            "                              This option is obsolete. ReadEnds will always be spilled to disk.  Default value: 50000.\n"
+            "\n"
+            "--MOLECULAR_IDENTIFIER_TAG <String>\n"
+            "                              SAM tag to uniquely identify the molecule from which a read was derived.  Use of this\n"
+            "                              option requires that the BARCODE_TAG option be set to a non null value.  Default null.\n"
+            "                              Default value: null.\n"
+            "\n"
+            "--OPTICAL_DUPLICATE_PIXEL_DISTANCE <Integer>\n"
+            "                              The maximum offset between two duplicate clusters in order to consider them optical\n"
+            "                              duplicates. The default is appropriate for unpatterned versions of the Illumina platform.\n"
+            "                              For the patterned flowcell models, 2500 is moreappropriate. For other platforms and\n"
+            "                              models, users should experiment to find what works best.  Default value: 100.\n"
+            "\n"
+            "--PROGRAM_GROUP_COMMAND_LINE,-PG_COMMAND <String>\n"
+            "                              Value of CL tag of PG record to be created. If not supplied the command line will be\n"
+            "                              detected automatically.  Default value: null.\n"
+            "\n"
+            "--PROGRAM_GROUP_NAME,-PG_NAME <String>\n"
+            "                              Value of PN tag of PG record to be created.  Default value: MarkDuplicates.\n"
+            "\n"
+            "--PROGRAM_GROUP_VERSION,-PG_VERSION <String>\n"
+            "                              Value of VN tag of PG record to be created. If not specified, the version will be detected\n"
+            "                              automatically.  Default value: null.\n"
+            "\n"
+            "--PROGRAM_RECORD_ID,-PG <String>\n"
+            "                              The program record ID for the @PG record(s) created by this program. Set to null to\n"
+            "                              disable PG record creation.  This string may have a suffix appended to avoid collision\n"
+            "                              with other program record IDs.  Default value: MarkDuplicates.\n"
+            "\n"
+            "--QUIET <Boolean>             Whether to suppress job-summary info on System.err.  Default value: false. Possible\n"
+            "                              values: {true, false}\n"
+            "\n"
+            "--READ_NAME_REGEX <String>    MarkDuplicates can use the tile and cluster positions to estimate the rate of optical\n"
+            "                              duplication in addition to the dominant source of duplication, PCR, to provide a more\n"
+            "                              accurate estimation of library size. By default (with no READ_NAME_REGEX specified),\n"
+            "                              MarkDuplicates will attempt to extract coordinates using a split on ':' (see Note below).\n"
+            "                              Set READ_NAME_REGEX to 'null' to disable optical duplicate detection. Note that without\n"
+            "                              optical duplicate counts, library size estimation will be less accurate. If the read name\n"
+            "                              does not follow a standard Illumina colon-separation convention, but does contain tile and\n"
+            "                              x,y coordinates, a regular expression can be specified to extract three variables:\n"
+            "                              tile/region, x coordinate and y coordinate from a read name. The regular expression must\n"
+            "                              contain three capture groups for the three variables, in order. It must match the entire\n"
+            "                              read name.   e.g. if field names were separated by semi-colon (';') this example regex\n"
+            "                              could be specified      (?:.*;)?([0-9]+)[^;]*;([0-9]+)[^;]*;([0-9]+)[^;]*$ Note that if no\n"
+            "                              READ_NAME_REGEX is specified, the read name is split on ':'.   For 5 element names, the\n"
+            "                              3rd, 4th and 5th elements are assumed to be tile, x and y values.   For 7 element names\n"
+            "                              (CASAVA 1.8), the 5th, 6th, and 7th elements are assumed to be tile, x and y values.\n"
+            "                              Default value: <optimized capture of last three ':' separated fields as numeric values>.\n"
+            "\n"
+            "--READ_ONE_BARCODE_TAG <String>\n"
+            "                              Read one barcode SAM tag (ex. BX for 10X Genomics)  Default value: null.\n"
+            "\n"
+            "--READ_TWO_BARCODE_TAG <String>\n"
+            "                              Read two barcode SAM tag (ex. BX for 10X Genomics)  Default value: null.\n"
+            "\n"
+            "--REFERENCE_SEQUENCE,-R <File>Reference sequence file.  Default value: null.\n"
+            "\n"
+            "--REMOVE_DUPLICATES <Boolean> If true do not write duplicates to the output file instead of writing them with\n"
+            "                              appropriate flags set.  Default value: false. Possible values: {true, false}\n"
+            "\n"
+            "--REMOVE_SEQUENCING_DUPLICATES <Boolean>\n"
+            "                              If true remove 'optical' duplicates and other duplicates that appear to have arisen from\n"
+            "                              the sequencing process instead of the library preparation process, even if\n"
+            "                              REMOVE_DUPLICATES is false. If REMOVE_DUPLICATES is true, all duplicates are removed and\n"
+            "                              this option is ignored.  Default value: false. Possible values: {true, false}\n"
+            "\n"
+            "--SORTING_COLLECTION_SIZE_RATIO <Double>\n"
+            "                              This number, plus the maximum RAM available to the JVM, determine the memory footprint\n"
+            "                              used by some of the sorting collections.  If you are running out of memory, try reducing\n"
+            "                              this number.  Default value: 0.25.\n"
+            "\n"
+            "--TAG_DUPLICATE_SET_MEMBERS <Boolean>\n"
+            "                              If a read appears in a duplicate set, add two tags. The first tag, DUPLICATE_SET_SIZE_TAG\n"
+            "                              (DS), indicates the size of the duplicate set. The smallest possible DS value is 2 which\n"
+            "                              occurs when two reads map to the same portion of the reference only one of which is marked\n"
+            "                              as duplicate. The second tag, DUPLICATE_SET_INDEX_TAG (DI), represents a unique identifier\n"
+            "                              for the duplicate set to which the record belongs. This identifier is the index-in-file of\n"
+            "                              the representative read that was selected out of the duplicate set.  Default value: false.\n"
+            "                              Possible values: {true, false}\n"
+            "\n"
+            "--TAGGING_POLICY <DuplicateTaggingPolicy>\n"
+            "                              Determines how duplicate types are recorded in the DT optional attribute.  Default value:\n"
+            "                              DontTag. Possible values: {DontTag, OpticalOnly, All}\n"
+            "\n"
+            "--TMP_DIR <File>              One or more directories with space available to be used by this program for temporary\n"
+            "                              storage of working files  This argument may be specified 0 or more times. Default value:\n"
+            "                              null.\n"
+            "\n"
+            "--UNPAIRED_END_UNCERTAINTY <Integer>\n"
+            "                              Maximal difference of the read end position that counted as equal. Useful for flow based\n"
+            "                              reads where the end position might vary due to sequencing errors. (for this argument,\n"
+            "                              \" read end \" means 3' end)  Default value: 0.\n"
+            "\n"
+            "--USE_END_IN_UNPAIRED_READS <Boolean>\n"
+            "                              Make the end location of single end read be significant when considering duplicates, in\n"
+            "                              addition to the start location, which is always significant (i.e. require single-ended\n"
+            "                              reads to start andend on the same position to be considered duplicate) (for this argument,\n"
+            "                              \" read end \" means 3' end).  Default value: false. Possible values: {true, false}\n"
+            "\n"
+            "--USE_JDK_DEFLATER,-use_jdk_deflater <Boolean>\n"
+            "                              Use the JDK Deflater instead of the Intel Deflater for writing compressed output  Default\n"
+            "                              value: false. Possible values: {true, false}\n"
+            "\n"
+            "--USE_JDK_INFLATER,-use_jdk_inflater <Boolean>\n"
+            "                              Use the JDK Inflater instead of the Intel Inflater for reading compressed input  Default\n"
+            "                              value: false. Possible values: {true, false}\n"
+            "\n"
+            "--USE_UNPAIRED_CLIPPED_END <Boolean>\n"
+            "                              Use position of the clipping as the end position, when considering duplicates (or use the\n"
+            "                              unclipped end position) (for this argument, \" read end \" means 3' end).  Default value:\n"
+            "                              false. Possible values: {true, false}\n"
+            "\n"
+            "--VALIDATION_STRINGENCY <ValidationStringency>\n"
+            "                              Validation stringency for all SAM files read by this program.  Setting stringency to\n"
+            "                              SILENT can improve performance when processing a BAM file in which variable-length data\n"
+            "                              (read, qualities, tags) do not otherwise need to be decoded.  Default value: STRICT.\n"
+            "                              Possible values: {STRICT, LENIENT, SILENT}\n"
+            "\n"
+            "--VERBOSITY <LogLevel>        Control verbosity of logging.  Default value: INFO. Possible values: {ERROR, WARNING,\n"
+            "                              INFO, DEBUG}\n"
+            "\n"
+            "--version <Boolean>           display the version number for this tool  Default value: false. Possible values: {true,\n"
+            "                              false}\n"
+            "\n");
+}
\ No newline at end of file
diff --git a/src/sam/markdups/markdups_arg.h b/src/sam/markdups/markdups_arg.h
index e69de29..fa66cca 100644
--- a/src/sam/markdups/markdups_arg.h
+++ b/src/sam/markdups/markdups_arg.h
@@ -0,0 +1,315 @@
+/*
+Description: Markduplicate需要用到的一些参数
+
+Copyright : All right reserved by ICT
+
+Author : Zhang Zhonghai
+Date : 2023/10/23
+*/
+
+#include <string>
+#include <vector>
+
+using std::string;
+using std::vector;
+
+#define MARKDUPLICATE_VERSION "v0.1"
+
+class GlobalArg;
+
+namespace ns_md {
+    /* 用于markduplicate模块的参数，这个枚举用于getoption */
+    enum MarkDupsArgEnum
+    {
+        _START_NUM = 100,
+        MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP,
+        MAX_FILE_HANDLES_FOR_READ_ENDS_MAP,
+        SORTING_COLLECTION_SIZE_RATIO,
+        BARCODE_TAG,
+        READ_ONE_BARCODE_TAG,
+        READ_TWO_BARCODE_TAG,
+        TAG_DUPLICATE_SET_MEMBERS,
+        REMOVE_SEQUENCING_DUPLICATES,
+        TAGGING_POLICY,
+        CLEAR_DT,
+        DUPLEX_UMI,
+        MOLECULAR_IDENTIFIER_TAG,
+        METRICS_FILE,
+        REMOVE_DUPLICATES,
+        ASSUME_SORTED,
+        ASSUME_SORT_ORDER,
+        DUPLICATE_SCORING_STRATEGY,
+        PROGRAM_RECORD_ID,
+        PROGRAM_GROUP_VERSION,
+        PROGRAM_GROUP_COMMAND_LINE,
+        PROGRAM_GROUP_NAME,
+        COMMENT,
+        READ_NAME_REGEX,
+        OPTICAL_DUPLICATE_PIXEL_DISTANCE,
+        MAX_OPTICAL_DUPLICATE_SET_SIZE,
+        QUIET,
+        VALIDATION_STRINGENCY,
+        COMPRESSION_LEVEL,
+        MAX_RECORDS_IN_RAM,
+        CREATE_INDEX,
+        CREATE_MD5_FILE,
+        _END_NUM
+    };
+
+    /* How strict to be when reading a SAM or BAM, beyond bare minimum validation. */
+    enum ValidationStringency
+    {
+        /**
+         * Do the right thing, throw an exception if something looks wrong.
+         */
+        STRICT,
+        /**
+         * Emit warnings but keep going if possible.
+         */
+        LENIENT,
+        /**
+         * Like LENIENT, only don't emit warning messages.
+         */
+        SILENT,
+
+        DEFAULT_STRINGENCY = SILENT
+    };
+
+    /**
+     * Enum used to control how duplicates are flagged in the DT optional tag on each read.
+     */
+    enum DuplicateTaggingPolicy
+    {
+        DontTag,
+        OpticalOnly,
+        All
+    };
+
+    /* 排序的方式 */
+    enum SortOrder
+    {
+        unsorted,
+        queryname,
+        coordinate,
+        duplicate, // NB: this is not in the SAM spec!
+        unknown
+    };
+
+    /* 计算reads分数的方式（比那个read得分更高） */
+    enum ScoringStrategy
+    {
+        SUM_OF_BASE_QUALITIES,
+        TOTAL_MAPPED_REFERENCE_LENGTH,
+        RANDOM
+    };
+}
+
+// 用于线程内的各种变量
+struct AuxVar {
+    const static int MIN_QSUM_QSCORE = 13;
+    const static int REF_CONTEXT_PAD = 3;
+    const static int REFERENCE_HALF_WINDOW_LENGTH = 150;
+
+    double contaminantAlternateFraction;
+};
+
+/* markduplicate 需要的参数*/
+struct MarkDupsArg
+{
+    /**
+     * The optional attribute in SAM/BAM/CRAM files used to store the duplicate type.
+     */
+    string DUPLICATE_TYPE_TAG = "DT";
+    /**
+     * The duplicate type tag value for duplicate type: library.
+     */
+    string DUPLICATE_TYPE_LIBRARY = "LB";
+    /**
+     * The duplicate type tag value for duplicate type: sequencing (optical & pad-hopping, or "co-localized").
+     */
+    string DUPLICATE_TYPE_SEQUENCING = "SQ";
+    /**
+     * The attribute in the SAM/BAM file used to store which read was selected as representative out of a duplicate set
+     */
+    string DUPLICATE_SET_INDEX_TAG = "DI";
+    /**
+     * The attribute in the SAM/BAM file used to store the size of a duplicate set
+     */
+    string DUPLICATE_SET_SIZE_TAG = "DS";
+
+    /* OpticalDuplicateFinder */
+    int DEFAULT_OPTICAL_DUPLICATE_DISTANCE = 100;
+    int DEFAULT_BIG_DUPLICATE_SET_SIZE = 1000;
+    int DEFAULT_MAX_DUPLICATE_SET_SIZE = 300000; // larger than this number will generate over 100 billion comparisons in the n^2 algorithm below
+
+    /**
+     * If more than this many sequences in SAM file, don't spill to disk because there will not
+     * be enough file handles.
+     */
+
+    /* "This option is obsolete. ReadEnds will always be spilled to disk." */
+    int MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP = 50000;
+
+    /* "Maximum number of file handles to keep open when spilling read ends to disk. " +
+                    "Set this number a little lower than the per-process maximum number of file that may be open. " +
+                    "This number can be found by executing the 'ulimit -n' command on a Unix system." */
+    int MAX_FILE_HANDLES_FOR_READ_ENDS_MAP = 8000;
+
+    /* "This number, plus the maximum RAM available to the JVM, determine the memory footprint used by " +
+                    "some of the sorting collections.  If you are running out of memory, try reducing this number." */
+    double SORTING_COLLECTION_SIZE_RATIO = 0.25;
+
+    /* "Barcode SAM tag (ex. BC for 10X Genomics)", optional = true */
+    string BARCODE_TAG = "";
+
+    /* "Read one barcode SAM tag (ex. BX for 10X Genomics)", optional = true */
+    string READ_ONE_BARCODE_TAG = "";
+
+    /* "Read two barcode SAM tag (ex. BX for 10X Genomics)", optional = true */
+    string READ_TWO_BARCODE_TAG = "";
+
+    /* "If a read appears in a duplicate set, add two tags. The first tag, DUPLICATE_SET_SIZE_TAG (DS), " +
+                     "indicates the size of the duplicate set. The smallest possible DS value is 2 which occurs when two " +
+                     "reads map to the same portion of the reference only one of which is marked as duplicate. The second " +
+                     "tag, DUPLICATE_SET_INDEX_TAG (DI), represents a unique identifier for the duplicate set to which the " +
+                     "record belongs. This identifier is the index-in-file of the representative read that was selected out " +
+                     "of the duplicate set.",
+               optional = true) */
+    bool TAG_DUPLICATE_SET_MEMBERS = false;
+
+    /* "If true remove 'optical' duplicates and other duplicates that appear to have arisen from the " +
+                    "sequencing process instead of the library preparation process, even if REMOVE_DUPLICATES is false. " +
+                    "If REMOVE_DUPLICATES is true, all duplicates are removed and this option is ignored.") */
+    bool REMOVE_SEQUENCING_DUPLICATES = false;
+
+    /* "Determines how duplicate types are recorded in the DT optional attribute.") */
+    ns_md::DuplicateTaggingPolicy TAGGING_POLICY = ns_md::DuplicateTaggingPolicy::DontTag;
+
+    /* "Clear DT tag from input SAM records. Should be set to false if input SAM doesn't have this tag.  Default true") */
+    bool CLEAR_DT = true;
+
+    /* "Treat UMIs as being duplex stranded.  This option requires that the UMI consist of two equal length " +
+                    "strings that are separated by a hyphen (e.g. 'ATC-GTC'). Reads are considered duplicates if, in addition to standard " +
+                    "definition, have identical normalized UMIs.  A UMI from the 'bottom' strand is normalized by swapping its content " +
+                    "around the hyphen (eg. ATC-GTC becomes GTC-ATC).  A UMI from the 'top' strand is already normalized as it is. " +
+                    "Both reads from a read pair considered top strand if the read 1 unclipped 5' coordinate is less than the read " +
+                    "2 unclipped 5' coordinate. All chimeric reads and read fragments are treated as having come from the top strand. " +
+                    "With this option is it required that the BARCODE_TAG hold non-normalized UMIs. Default false.") */
+    bool DUPLEX_UMI = false;
+
+    /* "SAM tag to uniquely identify the molecule from which a read was derived.  Use of this option requires that " +
+                    "the BARCODE_TAG option be set to a non null value.  Default null.",
+              optional = true) */
+    string MOLECULAR_IDENTIFIER_TAG = "";
+
+    /* 继承自 AbstractMarkDuplicatesCommandLineProgram 的参数*/
+    /* "File to write duplication metrics to" */
+    string METRICS_FILE;
+
+    /* "If true do not write duplicates to the output file instead of writing them with appropriate flags set." */
+    bool REMOVE_DUPLICATES = false;
+
+    /* "If true, assume that the input file is coordinate sorted even if the header says otherwise. " +
+                    "Deprecated, used ASSUME_SORT_ORDER=coordinate instead." mutex = {"ASSUME_SORT_ORDER"} */
+    bool ASSUME_SORTED = false;
+
+    /* "If not null, assume that the input file has this order even if the header says otherwise.",
+              optional = true, mutex = {"ASSUME_SORTED"} */
+    ns_md::SortOrder ASSUME_SORT_ORDER = ns_md::SortOrder::unsorted;
+
+    /* "The scoring strategy for choosing the non-duplicate among candidates." */
+    ns_md::ScoringStrategy DUPLICATE_SCORING_STRATEGY = ns_md::ScoringStrategy::TOTAL_MAPPED_REFERENCE_LENGTH;
+
+    /* "The program record ID for the @PG record(s) created by this program. Set to null to disable " +
+                    "PG record creation.  This string may have a suffix appended to avoid collision with other " +
+                    "program record IDs.",
+              optional = true */
+    string PROGRAM_RECORD_ID = "MarkDuplicates";
+
+    /* "Value of VN tag of PG record to be created. If not specified, the version will be detected automatically.",
+              optional = true */
+    string PROGRAM_GROUP_VERSION;
+
+    /* "Value of CL tag of PG record to be created. If not supplied the command line will be detected automatically.",
+              optional = true */
+    string PROGRAM_GROUP_COMMAND_LINE;
+
+    /* "Value of PN tag of PG record to be created." */
+    string PROGRAM_GROUP_NAME = "MarkDuplicates";
+
+    /* "Comment(s) to include in the output file's header.",
+              optional = true */
+    vector<string> COMMENT;
+
+    /* 继承自 AbstractOpticalDuplicateFinderCommandLineProgram 的参数 */
+
+    /* "MarkDuplicates can use the tile and cluster positions to estimate the rate of optical duplication " +
+            "in addition to the dominant source of duplication, PCR, to provide a more accurate estimation of library size. " +
+            "By default (with no READ_NAME_REGEX specified), MarkDuplicates will attempt to extract coordinates " +
+            "using a split on ':' (see Note below).  " +
+            "Set READ_NAME_REGEX to 'null' to disable optical duplicate detection. " +
+            "Note that without optical duplicate counts, library size estimation will be less accurate. " +
+            "If the read name does not follow a standard Illumina colon-separation convention, but does contain tile and x,y coordinates, " +
+            "a regular expression can be specified to extract three variables: tile/region, x coordinate and y coordinate from a read name. " +
+            "The regular expression must contain three capture groups for the three variables, in order. " +
+            "It must match the entire read name. " +
+            "  e.g. if field names were separated by semi-colon (';') this example regex could be specified " +
+            "     (?:.*;)?([0-9]+)[^;]*;([0-9]+)[^;]*;([0-9]+)[^;]*$ " +
+            "Note that if no READ_NAME_REGEX is specified, the read name is split on ':'. " +
+            "  For 5 element names, the 3rd, 4th and 5th elements are assumed to be tile, x and y values. " +
+            "  For 7 element names (CASAVA 1.8), the 5th, 6th, and 7th elements are assumed to be tile, x and y values.",
+            optional = true */
+    string READ_NAME_REGEX = "(?:.*:)?([0-9]+)[^:]*:([0-9]+)[^:]*:([0-9]+)[^:]*$";
+
+    /* "The maximum offset between two duplicate clusters in order to consider them optical duplicates. The default " +
+                    "is appropriate for unpatterned versions of the Illumina platform. For the patterned flowcell models, 2500 is more" +
+                    "appropriate. For other platforms and models, users should experiment to find what works best." */
+    int OPTICAL_DUPLICATE_PIXEL_DISTANCE = DEFAULT_OPTICAL_DUPLICATE_DISTANCE;
+
+    /* "This number is the maximum size of a set of duplicate reads for which we will attempt to determine " +
+                    "which are optical duplicates.  Please be aware that if you raise this value too high and do encounter a very " +
+                    "large set of duplicate reads, it will severely affect the runtime of this tool.  To completely disable this check, " +
+                    "set the value to -1." */
+    long MAX_OPTICAL_DUPLICATE_SET_SIZE = DEFAULT_MAX_DUPLICATE_SET_SIZE;
+
+    /* 继承自 CommandLineProgram 的参数*/
+
+    /* "Whether to suppress job-summary info on System.err.", common = true */
+    bool QUIET = false;
+
+    /* "Validation stringency for all SAM files read by this program.  Setting stringency to SILENT " +
+            "can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) " +
+            "do not otherwise need to be decoded.", common=true */
+    ns_md::ValidationStringency VALIDATION_STRINGENCY = ns_md::ValidationStringency::DEFAULT_STRINGENCY;
+
+    /* "Compression level for all compressed files created (e.g. BAM and VCF).", common = true */
+    int COMPRESSION_LEVEL = 5;
+
+    /* "When writing files that need to be sorted, this will specify the number of records stored in RAM before spilling to disk. " +
+                    "Increasing this number reduces the number of file handles needed to sort the file, and increases the amount of RAM needed.",
+              optional = true, common = true */
+    int MAX_RECORDS_IN_RAM = 500000;
+
+    /* "Whether to create an index when writing VCF or coordinate sorted BAM output.", common = true */
+    bool CREATE_INDEX = false;
+
+    /* "Whether to create an MD5 digest for any BAM or FASTQ files created.  ", common = true */
+    bool CREATE_MD5_FILE = false;
+
+    // mark duplicate参数个数
+    const static int ARG_COUNT = ns_md::MarkDupsArgEnum::_END_NUM - ns_md::MarkDupsArgEnum::_START_NUM - 1;
+    // 解析参数
+    void parseArgument(int argc,
+                       char **argv,
+                       vector<AuxVar> *pvAuxVar,
+                       GlobalArg *pGArg);
+
+    static void PrintHelp();
+
+    static void PrintVersion();
+
+    // 释放资源，关闭文件等
+    static void Finalize(MarkDupsArg *pMdArg,
+                         vector<AuxVar> *pvAuxVar,
+                         GlobalArg *pGArg);
+};
\ No newline at end of file