重构，修改bambuf，支持清理某个read之前的缓存

2023-11-06 12:38:30 +08:00 · 2023-11-06 12:38:30 +08:00 · a3a0b64ef2
parent 023836a047
commit a3a0b64ef2
24 changed files with 3218 additions and 42 deletions
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -2,6 +2,8 @@
    "files.associations": {
        "cstring": "cpp",
        "vector": "cpp",
-        "random": "cpp"
+        "random": "cpp",
+        "ostream": "cpp",
+        "*.tcc": "cpp"
    }
 }
--- a/build.sh
+++ b/build.sh
@ -3,6 +3,6 @@ dir="/home/zzh/work/GeneKit/picard_cpp/build"
 #[ -d "$dir" ] && rm -rf "$dir"
 #mkdir "$dir"
 cd "$dir"
-cmake .. -DCMAKE_BUILD_TYPE=Debug
-#cmake .. -DCMAKE_BUILD_TYPE=Release
+#cmake .. -DCMAKE_BUILD_TYPE=Debug
+cmake .. -DCMAKE_BUILD_TYPE=Release
 make -j 8
--- a/out.bam
+++ b/out.bam
--- a/run.sh
+++ b/run.sh
@ -1,8 +1,13 @@
 /home/zzh/work/GeneKit/picard_cpp/build/bin/picard_cpp \
    MarkDuplicates \
-    --INPUT test.bam \
+    --INPUT /mnt/d/data/zy_normal.bam \
    --OUTPUT out.bam \
    --num_threads 12 \
    --max_mem 4G \
    --verbosity DEBUG \
    --asyncio true
+
+
+# --INPUT /mnt/d/data/100w.bam \
+#    --INPUT /mnt/d/data/zy_normal.bam \
+
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -4,6 +4,8 @@ SET(EXECUTABLE_OUTPUT_PATH "${PROJECT_BINARY_DIR}/bin")
 # 源码目录
 AUX_SOURCE_DIRECTORY(${PROJECT_SOURCE_DIR}/src MAIN_SRC)
 AUX_SOURCE_DIRECTORY(${PROJECT_SOURCE_DIR}/src/common COMMON)
+AUX_SOURCE_DIRECTORY(${PROJECT_SOURCE_DIR}/src/common/utils UTILS)
+AUX_SOURCE_DIRECTORY(${PROJECT_SOURCE_DIR}/src/common/hts HTS)
 AUX_SOURCE_DIRECTORY(${PROJECT_SOURCE_DIR}/src/sam SAM_SRC)
 AUX_SOURCE_DIRECTORY(${PROJECT_SOURCE_DIR}/src/sam/markdups SAM_MARKDUPS_SRC)

@ -19,12 +21,15 @@ LINK_DIRECTORIES("${PROJECT_SOURCE_DIR}/lib/htslib")
 set(PG_NAME "picard_cpp")

 # 为程序添加依赖关系
-ADD_EXECUTABLE(${PG_NAME} ${MAIN_SRC} ${COMMON} ${SAM_SRC} ${SAM_MARKDUPS_SRC})
+ADD_EXECUTABLE(${PG_NAME} ${MAIN_SRC} ${COMMON} ${UTILS} ${HTS}
+    ${SAM_SRC} ${SAM_MARKDUPS_SRC})

 # 链接库
 TARGET_LINK_LIBRARIES(${PG_NAME} libhts.a)

 # 检测系统是否包含必需的库
+
+# pthread库
 find_package(Threads REQUIRED)
 if(THREADS_HAVE_PTHREAD_ARG)
    set_property(TARGET ${PG_NAME} PROPERTY COMPILE_OPTIONS "-pthread")
@ -33,3 +38,31 @@ endif()
 if(CMAKE_THREAD_LIBS_INIT)
    TARGET_LINK_LIBRARIES(${PG_NAME} "${CMAKE_THREAD_LIBS_INIT}")
 endif()
+
+# bzip2库
+find_package(BZip2 REQUIRED)
+if(BZip2_FOUND)
+    INCLUDE_DIRECTORIES(${BZIP2_INCLUDE_DIR})
+    TARGET_LINK_LIBRARIES(${PG_NAME} ${BZIP2_LIBRARIES})
+endif()
+
+#openmp
+find_package(OpenMP)
+if(OPENMP_FOUND)
+    set_target_properties(${PG_NAME} PROPERTIES COMPILE_OPTIONS ${OpenMP_CXX_FLAGS})
+    target_link_libraries(${PG_NAME} ${OpenMP_CXX_FLAGS})
+endif()
+
+# 链接库
+TARGET_LINK_LIBRARIES(${PG_NAME} -lz)
+TARGET_LINK_LIBRARIES(${PG_NAME} -lm)
+TARGET_LINK_LIBRARIES(${PG_NAME} -llzma)
+TARGET_LINK_LIBRARIES(${PG_NAME} -lbz2)
+TARGET_LINK_LIBRARIES(${PG_NAME} -lcurl)
+
+# 安装文件夹设置
+INSTALL(TARGETS ${PG_NAME}
+    RUNTIME DESTINATION bin
+    LIBRARY DESTINATION lib
+    ARCHIVE DESTINATION libstatic
+    )
--- a/src/common/hts/bam_buf.cpp
+++ b/src/common/hts/bam_buf.cpp
@ -0,0 +1,322 @@
+/*
+     Description: 读入sam/bam时，开辟一个大的buf，存放这些数据
+
+     Copyright : All right reserved by ICT
+
+     Author : Zhang Zhonghai
+     Date : 2019/11/27
+*/
+
+#include "bam_buf.h"
+
+/*
+ * BamBuf类
+ */
+// 读取数据直到读完，或者缓冲区满
+int BamBuf::ReadBam()
+{
+    int read_num = 0;
+    if (handle_last)
+    { // 处理上次读入的最后一个bam
+        if (has_enough_space())
+        { // 必须调用，在边界处调整memffset
+            ++read_num;
+            append_one_bam();
+        }
+        else
+        {
+            return read_num; // 还是没空间
+        }
+    }
+    while (read_stat_ >= 0 && (read_stat_ = sam_read1(fp, hdr, bw->b)) >= 0)
+    {
+        bw->end_pos_ = BamWrap::BamEndPos(bw->b);
+        if (has_enough_space())
+        { // 还有空间
+            append_one_bam();
+            ++read_num; // 放进缓存才算读取到
+        }
+        else
+        {
+            break;
+        }
+    }
+    if (read_stat_ >= 0)
+    {
+        handle_last = true;
+    }
+    else
+    {
+        handle_last = false;
+    }
+    return read_num;
+}
+
+// 初始化缓存
+void BamBuf::Init(samFile *fp,
+                  sam_hdr_t *hdr,
+                  int64_t mem_size)
+{
+    this->fp = fp;
+    this->hdr = hdr;
+    this->mem_size = mem_size;
+    this->mem = (uint8_t *)malloc(mem_size);
+    this->bw = (BamWrap *)malloc(sizeof(BamWrap));
+    this->bw->b = bam_init1();
+    if (bw == NULL ||
+        this->mem == NULL ||
+        this->bw->b == NULL)
+    {
+        fprintf(stderr, "allocate memory failed! Abort\n");
+        exit(-1);
+    }
+}
+
+void BamBuf::ClearBeforeIdx(size_t idxInBv)
+{
+    if (idxInBv < 1)
+        return;
+    int i = 0, j = idxInBv;
+    for (; j < bv.size(); ++i, ++j)
+    {
+        bv[i] = bv[j];
+    }
+    bv.resize(i);
+    prepare_read();
+}
+
+void BamBuf::ClearAll()
+{
+
+    bv.clear();
+    prepare_read();
+}
+
+// 为下一次读取做准备, 计算一些边界条件
+inline void BamBuf::prepare_read()
+{
+    // 计算余留的下次计算可能用到的bam所占的位置
+    if (bv.size() > 0)
+    {
+        BamWrap *bw = bv[0];
+        legacy_start = (int64_t)bw - (int64_t)mem;
+        bw = bv.back();
+        legacy_end = (int64_t)bw + bw->length() - (int64_t)mem;
+    }
+    else
+    {
+        legacy_start = legacy_end = 0;
+        mem_offset = 0; // 上次没剩下，那就从头存储
+    }
+}
+
+// 检查缓存是否还有空间
+inline bool BamBuf::has_enough_space()
+{
+    const uint32_t bam_len = bw->length();
+    int64_t potential_end = mem_offset + bam_len;
+    if (legacy_start <= legacy_end)
+        legacy_start += mem_size;
+    if (potential_end >= legacy_start)
+    {
+        return false;
+    }
+    if (potential_end >= mem_size)
+    {
+        mem_offset = 0;
+    }
+    int64_t virtual_offset = mem_offset;
+    if (virtual_offset < legacy_end)
+        virtual_offset += mem_size;
+    potential_end = virtual_offset + bam_len;
+    return potential_end < legacy_start;
+}
+
+// 处理一个读取后的bam
+inline void BamBuf::append_one_bam()
+{
+    BamWrap *bwp = (BamWrap *)(mem + mem_offset);
+    *bwp = *bw;
+    bwp->b = (bam1_t *)((char *)bwp + sizeof(*bwp));
+    bam1_t *bp = bwp->b;
+    *bp = *bw->b;
+    bp->data = (uint8_t *)((char *)bwp->b + sizeof(bam1_t));
+    memcpy(bp->data, bw->b->data, bw->b->l_data);
+    // 更新下次存储的位置
+    mem_offset = (mem_offset + bw->length() + 8 - 1) & ~((size_t)(8 - 1));
+
+    //    cout << "size: " << bv.size() << " " << buf_name << endl;
+    bv.push_back(bwp);
+}
+
+// 处理上次读入的最后一个read
+inline bool BamBuf::handle_last_read()
+{
+    if (handle_last)
+    { // 处理上次读入的最后一个bam
+        if (has_enough_space())
+        { // 必须调用，在边界处调整memffset
+            append_one_bam();
+            handle_last = false;
+            return true;
+        }
+    }
+    return false;
+}
+
+/*
+ * AsyncIoBamBuf 类
+ */
+// 初始化缓存
+void AsyncIoBamBuf::Init(samFile *fp,
+                         sam_hdr_t *hdr,
+                         int64_t mem_size)
+{
+    if (use_async_io_)
+    {
+        buf1_.Init(fp, hdr, mem_size >> 1);
+        buf2_.Init(fp, hdr, mem_size >> 1);
+        pi_ = &buf1_;
+        po_ = &buf2_;
+        tid_ = (pthread_t *)malloc(sizeof(pthread_t));
+    }
+    else
+    {
+        buf1_.Init(fp, hdr, mem_size);
+        pi_ = &buf1_;
+    }
+}
+
+// 读取数据
+int AsyncIoBamBuf::ReadBam()
+{
+    if (use_async_io_)
+    {
+        return async_read_bam();
+    }
+    else
+    {
+        return sync_read_bam();
+    }
+}
+
+int AsyncIoBamBuf::sync_read_bam()
+{
+    int read_num = 0;
+    if (clear_all_)
+    {
+        clear_all_ = false;
+        pi_->ClearAll();
+    }
+    else if (clear_before_idx_ > 0) 
+    {
+        pi_->ClearBeforeIdx(clear_before_idx_);
+        clear_before_idx_ = 0;
+    }
+    read_num = pi_->ReadBam();
+    refresh_bam_arr();
+    return read_num;
+}
+
+int AsyncIoBamBuf::async_read_bam()
+{
+    int read_num = 0;
+    if (first_read_)
+    {
+        read_num = pi_->ReadBam();
+        first_read_ = false;
+        refresh_bam_arr();
+    }
+    else
+    {
+        // join, 交换缓冲区指针
+        pthread_join(*tid_, 0);
+        resize_buf();
+
+        if (need_read_)
+        { // 需要交换指针
+            BamBuf *tmp = pi_;
+            pi_ = po_;
+            po_ = tmp;
+        }
+        read_num = last_read_num_;
+        refresh_bam_arr();
+    }
+    // 异步读
+    pthread_create(tid_, 0, async_read, this);
+    return read_num;
+}
+
+void *AsyncIoBamBuf::async_read(void *data)
+{
+    AsyncIoBamBuf *ab = (AsyncIoBamBuf *)data;
+    if (ab->need_read_ && ab->ReadStat() >= 0)
+    { // 需要读取
+        ab->last_read_num_ = ab->po_->ReadBam();
+    }
+    else
+    {
+        ab->last_read_num_ = 0;
+    }
+    pthread_exit(0);
+}
+
+// 为下一次读取做准备, 计算一些边界条件，延迟操作，因为此时可能po_对应的buf正在读取
+void AsyncIoBamBuf::ClearBeforeIdx(size_t idxInBv)
+{
+    clear_before_idx_ = idxInBv;
+}
+
+// 清空上一次所有读入的数据，延迟操作，因为此时可能po_对应的buf正在读取
+void AsyncIoBamBuf::ClearAll()
+{
+    clear_all_ = true;
+}
+
+inline void AsyncIoBamBuf::resize_buf()
+{
+    if (clear_all_)
+    { // 清理上一轮的数据
+        clear_all_ = false;
+        po_->ClearBeforeIdx(legacy_size_);
+        pi_->ClearAll();
+        if (pi_->handle_last_read()) // 上次读取有一个read没放入缓存
+        {
+            last_read_num_ += 1;
+            legacy_size_ = pi_->Size(); // 应该只有一个read
+            need_read_ = true;
+        }
+        else // 没空间存放，则不交换指针，或者文件已经读取完毕
+        {
+            legacy_size_ = 0;
+            need_read_ = false;
+        }
+    }
+    else if (clear_before_idx_ > 0)
+    {
+        if (clear_before_idx_ < legacy_size_)
+        {
+            po_->ClearBeforeIdx(clear_before_idx_);
+            legacy_size_ -= clear_before_idx_;
+            // 不需要交换指针，不需要读取
+            need_read_ = false;
+        }
+        else
+        {
+            po_->ClearBeforeIdx(legacy_size_);
+            pi_->ClearBeforeIdx(clear_before_idx_ - legacy_size_);
+            if (pi_->handle_last_read()) // 上次读取有一个read没放入缓存
+            {
+                last_read_num_ += 1;
+                legacy_size_ = pi_->Size(); // 应该只有一个read
+                need_read_ = true;
+            }
+            else // 没空间存放，则不交换指针，或者文件已经读取完毕
+            {
+                legacy_size_ = 0;
+                need_read_ = false;
+            }
+        }
+        clear_before_idx_ = 0;
+    }
+}
--- a/src/common/hts/bam_buf.h
+++ b/src/common/hts/bam_buf.h
@ -0,0 +1,169 @@
+/*
+     Description: 读入sam/bam时，开辟一个大的buf，存放这些数据
+
+     Copyright : All right reserved by ICT
+
+     Author : Zhang Zhonghai
+     Date : 2019/11/27
+*/
+
+#ifndef BAM_BUF_H_
+#define BAM_BUF_H_
+
+#include <vector>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <functional>
+#include <pthread.h>
+
+#include <iostream>
+#include <sstream>
+#include <fstream>
+
+#include <htslib/sam.h>
+
+#include "interval.h"
+#include "bam_wrap.h"
+
+using std::vector;
+using namespace std;
+
+/*
+ * 存放读入的bam数据
+ */
+struct BamBuf
+{
+    sam_hdr_t *hdr;           // sam文件的header信息
+    samFile *fp;              // sam文件指针
+    BamWrap *bw = nullptr;    // 用来循环读入bam
+    uint8_t *mem = nullptr;   // 用来存放bam的数据, 程序结束后自动释放，所以没在析构函数里释放
+    int64_t mem_offset = 0;   // 下一次要存放的位置
+    int64_t mem_size;         // 缓存大小
+    int read_stat_ = 0;       // 读取状态，是否读完
+    vector<BamWrap *> bv;     // 方便对bam数据的访问
+    int64_t legacy_start = 0; // 没处理完的bam在mem中的起始位置, 闭区间
+    int64_t legacy_end = 0;   // 没处理完的bam在mem中的结束位置, 开区间
+    bool handle_last = false; // 上次最后读入的bam是否需要处理
+
+    // 初始化缓存
+    void Init(samFile *fp,
+              sam_hdr_t *hdr,
+              int64_t mem_size);
+    // 读取数据直到读完，或者缓冲区满
+    int ReadBam();
+    // 为下一次读取做准备, 计算一些边界条件
+    void ClearBeforeIdx(size_t idxInBv);
+    // 清空上一次所有读入的数据
+    void ClearAll();
+    inline int64_t Size() { return bv.size(); }  // 包含多少个read
+    inline int ReadStat() { return read_stat_; } // 文件的读取状态，是否可读（读取完全）
+    ~BamBuf()
+    {
+        if (this->mem != nullptr)
+        {
+            free(this->mem);
+        }
+        if (this->bw != nullptr) 
+        {
+            bam_destroy1(bw->b);
+            free(bw);
+        }
+    }
+    void prepare_read();
+    // 检查缓存是否还有空间
+    bool has_enough_space();
+    // 处理一个读取后的bam
+    void append_one_bam();
+    // 处理上次读入的最后一个read
+    bool handle_last_read();
+
+    // 针对bv的操作
+    inline BamWrap *operator[](int64_t pos) { return bv[pos]; }
+    inline void push_back(BamWrap *val) { bv.push_back(val); }
+    inline void clear() { bv.clear(); }
+    inline void resize(int64_t s) { bv.resize(s); }
+};
+
+/*
+ * io异步缓冲区
+ */
+struct AsyncIoBamBuf
+{
+    BamBuf buf1_;
+    BamBuf buf2_;
+    BamBuf *pi_; // 当前用的buf
+    BamBuf *po_; // 后台在读取的buf
+    pthread_t *tid_ = NULL;
+    int64_t legacy_size_ = 0; // 上一轮运算之后，缓存中还剩余的上次读取的read数量
+    bool first_read_ = true;
+    int last_read_num_ = 0; // 上一次读取了多少reads
+    bool need_read_ = true;
+    bool use_async_io_ = true;
+    int64_t clear_before_idx_ = 0; // 用户异步读取，下一轮读取之前清理掉clear_before_idx_之前的所有reads
+    bool clear_all_ = false;       // 用于异步读取，下一轮读取之前清理掉之前的所有reads
+
+    vector<BamWrap *> bam_arr_; // 用来访问buf中的bam
+
+    AsyncIoBamBuf() {}
+    AsyncIoBamBuf(bool use_async) : use_async_io_(use_async) {}
+    // 析构
+    ~AsyncIoBamBuf()
+    {
+        if (tid_ != NULL)
+        {
+            pthread_join(*tid_, 0);
+            free(tid_);
+        }
+        // 其他的内存就等程序结束自动释放
+        // buf的析构函数会自动调用
+    }
+
+    // 初始化缓存
+    void Init(samFile *fp,
+              sam_hdr_t *hdr,
+              int64_t mem_size);
+
+    // 读取数据
+    int ReadBam();
+    // 为下一次读取做准备, 计算一些边界条件
+    void ClearBeforeIdx(size_t idxInBv);
+    // 清空上一次所有读入的数据
+    void ClearAll();
+    // 包含的read数量
+    inline int64_t Size() { return legacy_size_ + pi_->Size(); }
+    inline int ReadStat() { return pi_->read_stat_; }
+    inline BamWrap *operator[](int64_t pos)
+    {
+        return bam_arr_[pos];
+    }
+
+    // 同步读取
+    int sync_read_bam();
+    // 异步读取
+    int async_read_bam();
+    // 异步读取线程函数
+    static void *async_read(void *data);
+    void resize_buf();
+    inline void refresh_bam_arr()
+    {
+        if (this->Size() != bam_arr_.size())
+        {
+            bam_arr_.resize(this->Size());
+            for (int i = 0; i < bam_arr_.size(); ++i)
+            {
+                if (i < legacy_size_)
+                    bam_arr_[i] = (*po_)[i];
+                else
+                    bam_arr_[i] = (*pi_)[i - legacy_size_];
+            }
+        }
+    }
+};
+
+typedef AsyncIoBamBuf BamBufType;
+
+typedef vector<BamWrap *> BamArray;
+
+#endif
--- a/src/common/hts/bam_wrap.h
+++ b/src/common/hts/bam_wrap.h
@ -0,0 +1,338 @@
+/*
+     Description: 读入sam/bam时，开辟一个大的buf，存放这些数据
+
+     Copyright : All right reserved by ICT
+
+     Author : Zhang Zhonghai
+     Date : 2019/11/27
+*/
+
+#ifndef BAM_WRAP_H_
+#define BAM_WRAP_H_
+#include <map>
+#include <string>
+#include <vector>
+#include <sstream>
+
+#include <limits.h>
+#include <math.h>
+
+#include <htslib/sam.h>
+
+using namespace std;
+
+/*
+    这里的成员函数命名有点混乱，特此说明，小写加下划线的函数命名，无论是静态函数，还是普通成员函数，更侧重说明
+    这是类似bam的一个属性，而大写加驼峰命名的函数，更侧重说明这是通过计算得出的。
+*/
+/*
+ * sam read的封装
+ */
+struct BamWrap
+{
+    // 将contig左移后加上pos作为全局位置
+    const static int MAX_CONTIG_LEN_SHIFT = 30;
+    const static int READ_MAX_LENGTH = 200;
+    const static int READ_MAX_DEPTH = 1000; // 这只是用来初始化空间用的，深度大于这个值也没关系
+
+    // 成员变量尽量少，减少占用内存空间
+    bam1_t *b;
+    int64_t end_pos_; // bam的全局结束位置, 闭区间
+
+    // 全局开始位置
+    inline int64_t start_pos()
+    {
+        return bam_global_pos(b);
+    }
+    // 全局结束位置
+    inline int64_t end_pos()
+    {
+        return end_pos_;
+    }
+    // 和reference对应的序列长度
+    inline int16_t read_len()
+    {
+        return (end_pos_ - start_pos() + 1);
+    }
+
+    // 在contig内的开始位置
+    inline int32_t contig_pos()
+    {
+        return b->core.pos;
+    }
+    // 在contig内部的结束位置
+    inline int32_t contig_end_pos()
+    {
+        return bam_pos(end_pos_);
+    }
+    // 序列的长度（AGTC字母个数）
+    inline int16_t seq_len()
+    {
+        return b->core.l_qseq;
+    }
+
+    // 算上开头的softclip
+    inline int32_t softclip_start()
+    {
+        const uint32_t *cigar = bam_get_cigar(b);
+        const bam1_core_t &bc = b->core;
+        const char c = bam_cigar_opchr(cigar[0]);
+        const int len = bam_cigar_oplen(cigar[0]);
+        if (c == 'S')
+            return bc.pos - len;
+        return bc.pos;
+    }
+
+    // 算上结尾的softclip
+    inline int32_t softclip_end()
+    {
+        const uint32_t *cigar = bam_get_cigar(b);
+        const bam1_core_t &bc = b->core;
+        const char c = bam_cigar_opchr(cigar[bc.n_cigar - 1]);
+        const int len = bam_cigar_oplen(cigar[bc.n_cigar - 1]);
+        if (c == 'S')
+            return bam_pos(end_pos_) + len;
+        return bam_pos(end_pos_);
+    }
+
+    // 算上结尾的softclip
+    inline int32_t right_softclip_len()
+    {
+        const uint32_t *cigar = bam_get_cigar(b);
+        const bam1_core_t &bc = b->core;
+        const char c = bam_cigar_opchr(cigar[bc.n_cigar - 1]);
+        const int len = bam_cigar_oplen(cigar[bc.n_cigar - 1]);
+        if (c == 'S')
+            return len;
+        return 0;
+    }
+
+    // 获取序列
+    inline std::string sequence()
+    {
+        ostringstream oss;
+        char *seq = (char *)bam_get_seq(b);
+        const bam1_core_t &bc = b->core;
+        const char base_to_char[16] = {'N', 'A', 'C', 'N', 'G', 'N', 'N', 'N', 'T', 'N', 'N', 'N', 'N', 'N', 'N', 'N'};
+        for (int i = 0; i < bc.l_qseq; ++i)
+        {
+            char base = base_to_char[bam_seqi(seq, i)];
+            oss << base;
+        }
+        return oss.str();
+    }
+
+    // 获取名字
+    inline std::string query_name()
+    {
+        return bam_get_qname(b);
+    }
+    // 获取cigar 字符串
+    inline string cigar_str()
+    {
+        ostringstream oss;
+        const uint32_t *cigar = bam_get_cigar(b);
+        const bam1_core_t &bc = b->core;
+        for (int i = 0; i < bc.n_cigar; ++i)
+        {
+            const char c = bam_cigar_opchr(cigar[i]);
+            const int len = bam_cigar_oplen(cigar[i]);
+            oss << len << c;
+        }
+        return oss.str();
+    }
+
+    // 占用的内存大小
+    inline int16_t length()
+    {
+        return sizeof(*this) +
+               sizeof(bam1_t) +
+               b->l_data;
+    }
+
+    // 获取cigar中insert的总长度
+    inline int32_t insert_cigar_len()
+    {
+        const uint32_t *cigar = bam_get_cigar(b);
+        const bam1_core_t &bc = b->core;
+        int ret = 0;
+        for (int i = 0; i < bc.n_cigar; ++i)
+        {
+            const char c = bam_cigar_opchr(cigar[i]);
+            const int len = bam_cigar_oplen(cigar[i]);
+            if (c == 'I')
+                ret += len;
+        }
+        return ret;
+    }
+
+    // 获取cigar中delete的总长度
+    inline int32_t del_cigar_len()
+    {
+        const uint32_t *cigar = bam_get_cigar(b);
+        const bam1_core_t &bc = b->core;
+        int ret = 0;
+        for (int i = 0; i < bc.n_cigar; ++i)
+        {
+            const char c = bam_cigar_opchr(cigar[i]);
+            const int len = bam_cigar_oplen(cigar[i]);
+            if (c == 'D')
+                ret += len;
+        }
+        return ret;
+    }
+
+    // 计算sam read的终点位置
+    static inline int64_t BamEndPos(const bam1_t *b)
+    {
+        const uint32_t *cigar = bam_get_cigar(b);
+        const bam1_core_t &bc = b->core;
+        int start_offset = -1;
+        for (int i = 0; i < bc.n_cigar; ++i)
+        {
+            const char c = bam_cigar_opchr(cigar[i]);
+            const int len = bam_cigar_oplen(cigar[i]);
+            if (c == 'D' || c == 'N' || c == 'M' || c == '=' || c == 'X')
+                start_offset += len;
+        }
+        return (((int64_t)b->core.tid << MAX_CONTIG_LEN_SHIFT) | (int64_t)(b->core.pos + start_offset));
+    };
+
+    bool HasWellDefinedFragmentSize()
+    {
+        const bam1_core_t &bc = b->core;
+        bool hasWellDefinedFragmentSize = true;
+        if (bc.isize == 0 ||
+            !(bc.flag & BAM_FPAIRED) ||
+            ((bc.flag & BAM_FUNMAP) || (bc.flag & BAM_FMUNMAP)) ||
+            ((bool)(bc.flag & BAM_FREVERSE) == (bool)(bc.flag & BAM_FMREVERSE)))
+        {
+            hasWellDefinedFragmentSize = false;
+        }
+        else if (bc.flag & BAM_FREVERSE)
+        {
+            hasWellDefinedFragmentSize = contig_end_pos() > bc.mpos ? true : false;
+        }
+        else
+        {
+            hasWellDefinedFragmentSize = bc.pos <= bc.mpos + bc.isize ? true : false;
+        }
+        return hasWellDefinedFragmentSize;
+    }
+
+    // 计算bam的adapterBoundary
+    int GetAdapterBoundary()
+    {
+        const bam1_core_t &bc = b->core;
+        int adapterBoundary;
+        if (!HasWellDefinedFragmentSize())
+            adapterBoundary = INT_MIN;
+        else if (bc.flag & BAM_FREVERSE)
+            adapterBoundary = bc.mpos - 1;
+        else
+            adapterBoundary = bc.pos + abs(bc.isize); // GATK4.0 和 GATK3.5不一样，3.5的这里+1
+        return adapterBoundary;
+    }
+
+    // 获取开头的I的长度
+    inline int GetHeadInsertLen()
+    {
+        int insLen = 0;
+        const uint32_t *cigar = bam_get_cigar(b);
+        const bam1_core_t &bc = b->core;
+        for (int i = 0; i < bc.n_cigar; ++i)
+        {
+            const char c = bam_cigar_opchr(cigar[i]);
+            const int len = bam_cigar_oplen(cigar[i]);
+            if (c == 'I')
+            {
+                insLen = len;
+                break;
+            }
+            else if (c != 'H' && c != 'S')
+                break;
+        }
+        return insLen;
+    }
+
+    // 获取soft clip开始位置(能处理H和S相连的情况，有这种情况么？, 注意开头的I要当做S？)
+    inline int64_t GetSoftStart()
+    {
+        int64_t softStart = b->core.pos;
+        const uint32_t *cigar = bam_get_cigar(b);
+        const bam1_core_t &bc = b->core;
+        for (int i = 0; i < bc.n_cigar; ++i)
+        {
+            const char c = bam_cigar_opchr(cigar[i]);
+            const int len = bam_cigar_oplen(cigar[i]);
+            if (c == 'S' || c == 'I')
+                softStart -= len;
+            else if (c != 'H')
+                break;
+        }
+        return softStart;
+    }
+
+    // 获取unclipped开始位置(包括hardclip)
+    inline int64_t GetUnclippedStart()
+    {
+        int64_t start = b->core.pos;
+        const uint32_t *cigar = bam_get_cigar(b);
+        const bam1_core_t &bc = b->core;
+        for (int i = 0; i < bc.n_cigar; ++i)
+        {
+            const char c = bam_cigar_opchr(cigar[i]);
+            const int len = bam_cigar_oplen(cigar[i]);
+            if (c == 'S' || c == 'H')
+                start -= len;
+            else
+                break;
+        }
+        return start;
+    }
+
+    // 获取unclipped结束位置(包括hardclip)
+    inline int64_t GetUnclippedEnd()
+    {
+        int64_t end_pos = bam_endpos(b);
+        const uint32_t *cigar = bam_get_cigar(b);
+        const bam1_core_t &bc = b->core;
+        for (int i = bc.n_cigar; i >= 0; --i)
+        {
+            const char c = bam_cigar_opchr(cigar[i]);
+            const int len = bam_cigar_oplen(cigar[i]);
+            if (c == 'S' || c == 'H')
+                end_pos += len;
+            else
+                break;
+        }
+        return end_pos;
+    }
+
+    // 计算bam的全局位置，算上染色体序号和比对位置
+    static inline int64_t bam_global_pos(bam1_t *b)
+    {
+        return (((int64_t)b->core.tid << MAX_CONTIG_LEN_SHIFT) | (int64_t)b->core.pos);
+    }
+    static inline int64_t bam_global_pos(int tid, int pos)
+    {
+        return (((int64_t)tid << MAX_CONTIG_LEN_SHIFT) | (int64_t)pos);
+    }
+    // 根据全局位置获取bam的染色体序号
+    static inline int32_t bam_tid(int64_t global_pos)
+    {
+        const int64_t mask = ~(((int64_t)1 << MAX_CONTIG_LEN_SHIFT) - 1);
+        const int64_t high_tid = global_pos & mask;
+        return (int32_t)(high_tid >> MAX_CONTIG_LEN_SHIFT);
+    }
+    // 根据全局位置获取bam的比对位置(染色体内)
+    static inline int32_t bam_pos(int64_t global_pos)
+    {
+        const int64_t mask = ((int64_t)1 << MAX_CONTIG_LEN_SHIFT) - 1;
+        return (int32_t)(global_pos & mask);
+    }
+};
+
+typedef std::map<const std::string, std::vector<BamWrap *>> SampleBamMap;
+
+#endif
--- a/src/common/hts/interval.cpp
+++ b/src/common/hts/interval.cpp
@ -0,0 +1,295 @@
+/* 
+     Description: 处理intervals
+
+     Copyright : All right reserved by ICT
+
+     Author : Zhang Zhonghai
+     Date : 2019/11/24
+*/
+
+#include "interval.h"
+
+#include <algorithm>
+#include <sstream>
+#include <fstream>
+#include <string>
+#include <iostream>
+
+#include <htslib/sam.h>
+
+#include "../utils/util.h"
+#include "bam_wrap.h"
+
+using std::min;
+using std::max;
+using std::string;
+using std::ifstream;
+using std::stringstream;
+
+using namespace std;
+
+// 构造函数
+Interval::Interval() : Interval(0, 0) {}
+Interval::Interval(int64_t l, int64_t r) : left(l), right(r) {}
+
+// 比较函数
+bool Interval::operator<(const Interval& other) {
+    if (left == other.left) {
+        return right < other.right;
+    }
+    return left < other.left;
+}
+
+// 是否有重叠
+bool Interval::overlaps(const Interval &other) {
+    return left <= other.right && right >= other.left;
+}
+
+// 两个interval的合并
+Interval& Interval::spanWith(const Interval &other) {
+    left = min(left, other.left);
+    right = max(right, other.right);
+    return *this;
+}
+
+// 返回两个interval的交集，不改变当前interval
+Interval Interval::intersect(const Interval &that) const {
+    Interval val;
+    val.left = max(left, that.left);
+    val.right = min(right, that.right);
+    return val;
+}
+
+
+/*
+ * 合并两个interval arr，取相交区域的交集, interval arr都是排序后的
+ */
+void Interval::IntersectIntervals(const IntervalArr &a_arr, 
+                                  const IntervalArr &b_arr, 
+                                  IntervalArr *r_arr) {
+    if (a_arr.size() < 1 || b_arr.size() < 1) return;
+    int ai=0, bi=0;
+    const Interval *last, *cur;
+    if (a_arr[ai].left < b_arr[bi].left) last = &a_arr[ai++];
+    else last = &b_arr[bi++];
+    while (ai < a_arr.size() && bi < b_arr.size()) {
+        if (a_arr[ai].left < b_arr[bi].left) cur = &a_arr[ai++];
+        else cur = &b_arr[bi++];
+        if (last->right < cur->left) {
+            last = cur; continue;
+        } else if (last->right > cur->right) {
+            r_arr->push_back(*cur); 
+        } else {
+            r_arr->push_back(Interval(cur->left, last->right));
+            last = cur;
+        }
+    }
+    const IntervalArr *arrp;
+    int ii;
+    if (ai < a_arr.size()) { arrp = &a_arr; ii = ai;} 
+    else { arrp = &b_arr; ii = bi; }
+    const IntervalArr &arr = *arrp;
+    while(ii < arr.size()) {
+        cur = &arr[ii++];
+        if (last->right < cur->left) {
+            break;
+        } else if (last->right > cur->right) {
+            r_arr->push_back(*cur); 
+        } else {
+            r_arr->push_back(Interval(cur->left, last->right));
+            break;
+        }
+    }
+}
+
+/*
+ * 合并两个interval arr，取并集
+ */
+void Interval::UnionIntervals(const IntervalArr &a_arr,
+                              const IntervalArr &b_arr,
+                              IntervalArr *r_arr) {
+    Interval tmp;
+    const Interval *cur;
+    Interval *last;
+    int ai=0, bi=0;
+    if (a_arr.size() < 1) { *r_arr = b_arr; return; }
+    if (b_arr.size() < 1) { *r_arr = a_arr; return; }
+    r_arr->clear();
+
+    if (a_arr[ai].left < b_arr[bi].left) tmp = a_arr[ai++];
+    else tmp = b_arr[bi++];
+    last = &tmp;
+    while(ai < a_arr.size() && bi < b_arr.size()) {
+        if (a_arr[ai].left < b_arr[bi].left) cur = &a_arr[ai++];
+        else cur = &b_arr[bi++];
+        if (last->right < cur->left) {
+            r_arr->push_back(*last);
+            *last = *cur;
+        } else {
+            last->right = max(last->right, cur->right);
+        }
+    }
+    const IntervalArr *arrp;
+    int ii;
+    if (ai < a_arr.size()) { arrp = &a_arr; ii = ai; } 
+    else { arrp = &b_arr; ii = bi; }
+    const IntervalArr &arr = *arrp;
+
+    while(ii < arr.size()) {
+        cur = &arr[ii++];
+        if (last->right < cur->left) {
+            r_arr->push_back(*last);
+            *last = *cur;
+        } else {
+            last->right = max(last->right, cur->right);
+        }
+    }
+    r_arr->push_back(*last);
+}
+
+/*
+ * 将有read覆盖的区域和参数提供的interval文件中的区域做一个交集
+ */
+int64_t Interval::MergeIntervals(const IntervalArr &n_arr,
+                                  const IntervalArr &t_arr,
+                                  IntervalArr &in_arr,
+                                  int64_t start_loc, // 闭区间
+                                  int64_t *end_loc, // 开区间
+                                  IntervalArr *r_arr) {
+    IntervalArr tmp_arr;
+    const int64_t end_loc_val = *end_loc;
+    if (in_arr.size() < 1) { // 如果输入的interval为空，则使用tumor normal覆盖的interval
+        UnionIntervals(n_arr, t_arr, &tmp_arr);
+    } else {
+        IntervalArr mid_arr;
+        UnionIntervals(n_arr, t_arr, &mid_arr);
+        IntersectIntervals(mid_arr, in_arr, &tmp_arr);
+    }
+    for(int i=tmp_arr.size()-1; i>=0; --i) {
+        if (tmp_arr[i].left >= end_loc_val) {
+            tmp_arr.pop_back(); // 删除该元素
+            continue;
+        }
+        tmp_arr[i].right = min(tmp_arr[i].right, end_loc_val - 1); // end_loc是开区间
+        break;
+    }
+    for (int i=0; i<tmp_arr.size(); ++i) {
+        if (tmp_arr[i].right < start_loc) {
+            continue;
+        }
+        if (tmp_arr[i].left < start_loc) {
+            r_arr->push_back(Interval(start_loc, tmp_arr[i].right));
+        } else {
+            r_arr->push_back(tmp_arr[i]);
+        }
+    }
+
+    int next_i = 0;
+    while(next_i < in_arr.size() && in_arr[next_i].right < end_loc_val) ++next_i;
+    if (next_i < in_arr.size()) {
+        if (end_loc_val < in_arr[next_i].left) {
+            *end_loc = in_arr[next_i].left; // 更新本次处理的终点
+        } else {
+            in_arr[next_i].left = end_loc_val; // 更新panel
+        }
+        int i=0, j=next_i;
+        for (; j<in_arr.size(); ++i, ++j) {
+            in_arr[i] = in_arr[j];
+        }
+        in_arr.resize(i);
+    } else {
+        in_arr.clear();
+    }
+
+    int64_t locus_num = 0;
+    for (int i=0; i<r_arr->size(); ++i) {
+        locus_num += (*r_arr)[i].right - (*r_arr)[i].left + 1; 
+    }
+    return locus_num;
+}
+
+/*
+ * 读取interval文件
+ */
+void Interval::ReadInterval(const string &interval_fn, 
+                            bam_hdr_t* header, 
+                            int interval_padding,
+                            IntervalArr *r_arr) {
+    ifstream interval_fs(interval_fn);
+    string one_line;
+    IntervalArr intervals;
+    getline(interval_fs, one_line);
+    while (!interval_fs.eof()) {
+        if (one_line[0] == '@') {
+            getline(interval_fs, one_line);
+            continue;
+        }
+        stringstream ss_line(one_line);
+        string contig_name;
+        ss_line >> contig_name;
+        int itid = sam_hdr_name2tid(header, contig_name.c_str());
+        if (itid < 0) Error("[%s] interval file has unknown contig name [%s]\n", __func__, contig_name.c_str());
+        int64_t tid = (int64_t)itid;
+        tid <<= CONTIG_SHIFT;
+        int64_t start, stop;
+        ss_line >> start >> stop;
+        // interval文件是1-based，所以这里要减去1
+        intervals.push_back(Interval(tid + start - 1, tid + stop -1));
+        getline(interval_fs, one_line);
+    }
+    sort(intervals.begin(), intervals.end());
+    if (intervals.size() > 0) {
+        Interval new_span(intervals[0].left-interval_padding, intervals[0].right+interval_padding);
+        for (int i=1; i<intervals.size(); ++i) {
+            if (intervals[i].left - interval_padding > new_span.right) {
+                r_arr->push_back(new_span);
+                new_span.left = intervals[i].left - interval_padding;
+                new_span.right = intervals[i].right + interval_padding;
+            } else {
+                new_span.right = max(new_span.right, intervals[i].right + interval_padding);
+            }
+        }
+        r_arr->push_back(new_span);
+    }
+    interval_fs.close();
+}
+
+/*
+ * 将interval相连的区域合并
+ */
+void Interval::ShrinkInterval(IntervalArr *ivap) {
+    if (ivap->size() < 1) return;
+    IntervalArr &iva = *ivap;
+    IntervalArr tiva = iva;
+    iva.clear();
+    Interval iv;
+    iv.left = tiva[0].left;
+    iv.right = tiva[0].right;
+    for (int i=1; i<tiva.size(); ++i) {
+        if (iv.right+1 < tiva[i].left) {
+            iva.push_back(iv);
+            iv.left = tiva[i].left;
+        }
+        iv.right = tiva[i].right;
+    }
+    iva.push_back(iv);
+}
+
+/*
+ * 根据header信息，扩展interval
+ */
+Interval Interval::ExpandInterval(int64_t start, int64_t end, int expandVal, bam_hdr_t* header) {
+    Interval result;
+    result.left = start;
+    result.right = end;
+
+    int64_t ext_left = start - expandVal;
+    int64_t ext_right = end + expandVal;
+    int tid = BamWrap::bam_tid(start);
+    uint32_t contig_len = header->target_len[tid];
+    result.left = max(BamWrap::bam_global_pos(tid, 0), ext_left);
+    result.right = min(ext_right, contig_len - 1 + BamWrap::bam_global_pos(tid, 0));
+
+    return result;
+}
+
--- a/src/common/hts/interval.h
+++ b/src/common/hts/interval.h
@ -0,0 +1,101 @@
+/* 
+     Description: 处理intervals
+
+     Copyright : All right reserved by ICT
+
+     Author : Zhang Zhonghai
+     Date : 2019/11/24
+*/
+
+#ifndef INTERVAL_H_
+#define INTERVAL_H_
+
+#include <stdint.h>
+#include <vector>
+#include <string>
+#include <sstream>
+
+#include <htslib/sam.h>
+
+#include "bam_wrap.h"
+
+using namespace std;
+
+// 前向声明
+class Interval;
+typedef std::vector<Interval> IntervalArr;
+/*
+ * 闭区间
+ */
+struct Interval {
+    // const常量
+    const static int CONTIG_SHIFT = 30;
+
+    // 类变量
+    int64_t left;
+    int64_t right;
+
+    // 构造函数
+    Interval();
+    explicit Interval(int64_t l, int64_t r);
+    // 比较函数
+    bool operator<(const Interval &other);
+    // 是否有重叠
+    bool overlaps(const Interval &other);
+    // 两个interval的合并, 会改变当前interval
+    Interval& spanWith(const Interval &other);
+    // 返回两个interval的交集，不改变当前interval
+    Interval intersect(const Interval &that) const;
+
+    // for debug
+    string toString() const {
+        ostringstream oss;
+        oss << BamWrap::bam_tid(left) + 1 << ":"
+            << BamWrap::bam_pos(left) + 1 << "-"
+            << BamWrap::bam_pos(right) + 1;
+
+        return oss.str();
+    }
+    /*
+     * 合并两个interval arr，取相交区域的交集, interval arr都是排序后的
+     */
+    static void IntersectIntervals(const IntervalArr &a_arr, 
+                                   const IntervalArr &b_arr, 
+                                   IntervalArr *r_arr);
+    /*
+     * 合并两个interval arr，相交的区域取并集
+     */
+    static void UnionIntervals(const IntervalArr &a_arr,
+                               const IntervalArr &b_arr,
+                               IntervalArr *r_arr);
+    /*
+     * 将有read覆盖的区域和参数提供的interval文件中的区域做一个交集
+     */
+    static int64_t MergeIntervals(const IntervalArr &n_arr,
+                                   const IntervalArr &t_arr,
+                                   IntervalArr &in_arr, // 会更改
+                                   int64_t start_loc, // 闭区间
+                                   int64_t *end_loc, // 开区间, 会更改
+                                   IntervalArr *r_arr);
+    /*
+     * 读取interval文件
+     */
+    static void ReadInterval(const std::string &interval_fn, 
+                             bam_hdr_t* header, 
+                             int interval_padding,
+                             IntervalArr *r_arr);
+    /*
+     * 将interval相连的区域合并
+     */
+    static void ShrinkInterval(IntervalArr *iva);
+
+    /*
+     * 根据header信息，扩展interval
+     */
+    static Interval ExpandInterval(int64_t start, int64_t end, int expandVal, bam_hdr_t* header);
+};
+
+
+#endif
+
+
--- a/src/common/hts/read_ends.h
+++ b/src/common/hts/read_ends.h
@ -0,0 +1,59 @@
+/*
+Description: read ends结构体主要用来标记冗余，包含一些序列的测序过程中的物理信息等
+
+Copyright : All right reserved by ICT
+
+Author : Zhang Zhonghai
+Date : 2023/11/3
+*/
+
+#ifndef READ_ENDS_H_
+#define READ_ENDS_H_
+
+#include <stdint.h>
+
+/* 包含了所有read ends信息，如picard里边的 ReadEndsForMarkDuplicates*/
+struct ReadEnds 
+{
+    /* PhysicalLocationInt中的成员变量 */
+    /**
+     * Small class that provides access to the physical location information about a cluster.
+     * All values should be defaulted to -1 if unavailable.  Tile should only allow
+     * non-zero positive integers, x and y coordinates must be non-negative.
+     * This is different from PhysicalLocationShort in that the x and y positions are ints, not shorts
+     * thus, they do not overflow within a HiSeqX tile.
+     */
+    int16_t tile = -1;
+    int32_t x = -1;
+    int32_t y = -1;
+
+    /* ReadEnds中的成员变量 */
+    /** Little struct-like class to hold read pair (and fragment) end data for duplicate marking. */
+    static const int8_t F = 0, R = 1, FF = 2, FR = 3, RR = 4, RF = 5;
+    int16_t libraryId;
+    int8_t orientation;
+    int32_t read1ReferenceIndex = -1;
+    int32_t read1Coordinate = -1;
+    int32_t read2ReferenceIndex = -1;
+    int32_t read2Coordinate = -1; // This field is overloaded for flow based processing as the end coordinate of read 1. (paired reads not supported)
+    /* Additional information used to detect optical dupes */
+    int16_t readGroup = -1;
+    /** For optical duplicate detection the orientation matters regard to 1st or 2nd end of a mate */
+    int8_t orientationForOpticalDuplicates = -1;
+    /** A *transient* flag marking this read end as being an optical duplicate. */
+    bool isOpticalDuplicate = false;
+
+    /* ReadEndsForMarkDuplicates中的成员变量 */
+    /** Little struct-like class to hold read pair (and fragment) end data for MarkDuplicatesWithMateCigar **/
+    int16_t score = 0;
+    int64_t read1IndexInFile = -1;
+    int64_t read2IndexInFile = -1;
+    int64_t duplicateSetSize = -1;
+
+    /* ReadEndsForMarkDuplicatesWithBarcodes中的成员变量 (好像用不到) */
+    int32_t barcode = 0; // primary barcode for this read (and pair)
+    int32_t readOneBarcode = 0; // read one barcode, 0 if not present
+    int32_t readTwoBarcode = 0; // read two barcode, 0 if not present or not paired
+};
+
+#endif
--- a/src/common/utils/global_arg.cpp
+++ b/src/common/utils/global_arg.cpp
--- a/src/common/utils/global_arg.h
+++ b/src/common/utils/global_arg.h
--- a/src/common/utils/thpool.cpp
+++ b/src/common/utils/thpool.cpp
@ -0,0 +1,554 @@
+/* ********************************
+ * Author:       Johan Hanssen Seferidis
+ * License:	     MIT
+ * Description:  Library providing a threading pool where you can add
+ *               work. For usage, check the thpool.h file or README.md
+ *
+ */
+/** @file thpool.h */ /*
+                       *
+                       ********************************/
+
+#if defined(__APPLE__)
+#include <AvailabilityMacros.h>
+#else
+#ifndef _POSIX_C_SOURCE
+#define _POSIX_C_SOURCE 200809L
+#endif
+#endif
+#include <unistd.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <errno.h>
+#include <time.h>
+#if defined(__linux__)
+#include <sys/prctl.h>
+#endif
+
+#include "thpool.h"
+
+#ifdef THPOOL_DEBUG
+#define THPOOL_DEBUG 1
+#else
+#define THPOOL_DEBUG 0
+#endif
+
+#if !defined(DISABLE_PRINT) || defined(THPOOL_DEBUG)
+#define err(str) fprintf(stderr, str)
+#else
+#define err(str)
+#endif
+
+static volatile int threads_keepalive;
+static volatile int threads_on_hold;
+
+/* ========================== STRUCTURES ============================ */
+
+/* Binary semaphore */
+typedef struct bsem
+{
+    pthread_mutex_t mutex;
+    pthread_cond_t cond;
+    int v;
+} bsem;
+
+/* Job */
+typedef struct job
+{
+    struct job *prev;            /* pointer to previous job   */
+    void (*function)(void *arg); /* function pointer          */
+    void *arg;                   /* function's argument       */
+} job;
+
+/* Job queue */
+typedef struct jobqueue
+{
+    pthread_mutex_t rwmutex; /* used for queue r/w access */
+    job *front;              /* pointer to front of queue */
+    job *rear;               /* pointer to rear  of queue */
+    bsem *has_jobs;          /* flag as binary semaphore  */
+    int len;                 /* number of jobs in queue   */
+} jobqueue_t;
+
+/* Thread */
+typedef struct thread
+{
+    int id;                   /* friendly id               */
+    pthread_t pthread;        /* pointer to actual thread  */
+    struct thpool_ *thpool_p; /* access to thpool          */
+} thread;
+
+/* Threadpool */
+typedef struct thpool_
+{
+    thread **threads;                 /* pointer to threads        */
+    volatile int num_threads_alive;   /* threads currently alive   */
+    volatile int num_threads_working; /* threads currently working */
+    pthread_mutex_t thcount_lock;     /* used for thread count etc */
+    pthread_cond_t threads_all_idle;  /* signal to thpool_wait     */
+    jobqueue_t jobqueue;              /* job queue                 */
+} thpool_;
+
+/* ========================== PROTOTYPES ============================ */
+
+static int thread_init(thpool_ *thpool_p, struct thread **thread_p, int id);
+static void *thread_do(struct thread *thread_p);
+static void thread_hold(int sig_id);
+static void thread_destroy(struct thread *thread_p);
+
+static int jobqueue_init(jobqueue_t *jobqueue_p);
+static void jobqueue_clear(jobqueue_t *jobqueue_p);
+static void jobqueue_push(jobqueue_t *jobqueue_p, struct job *newjob_p);
+static struct job *jobqueue_pull(jobqueue_t *jobqueue_p);
+static void jobqueue_destroy(jobqueue_t *jobqueue_p);
+
+static void bsem_init(struct bsem *bsem_p, int value);
+static void bsem_reset(struct bsem *bsem_p);
+static void bsem_post(struct bsem *bsem_p);
+static void bsem_post_all(struct bsem *bsem_p);
+static void bsem_wait(struct bsem *bsem_p);
+
+/* ========================== THREADPOOL ============================ */
+
+/* Initialise thread pool */
+struct thpool_ *thpool_init(int num_threads)
+{
+
+    threads_on_hold = 0;
+    threads_keepalive = 1;
+
+    if (num_threads < 0)
+    {
+        num_threads = 0;
+    }
+
+    /* Make new thread pool */
+    thpool_ *thpool_p;
+    thpool_p = (struct thpool_ *)malloc(sizeof(struct thpool_));
+    if (thpool_p == NULL)
+    {
+        err("thpool_init(): Could not allocate memory for thread pool\n");
+        return NULL;
+    }
+    thpool_p->num_threads_alive = 0;
+    thpool_p->num_threads_working = 0;
+
+    /* Initialise the job queue */
+    if (jobqueue_init(&thpool_p->jobqueue) == -1)
+    {
+        err("thpool_init(): Could not allocate memory for job queue\n");
+        free(thpool_p);
+        return NULL;
+    }
+
+    /* Make threads in pool */
+    thpool_p->threads = (struct thread **)malloc(num_threads * sizeof(struct thread *));
+    if (thpool_p->threads == NULL)
+    {
+        err("thpool_init(): Could not allocate memory for threads\n");
+        jobqueue_destroy(&thpool_p->jobqueue);
+        free(thpool_p);
+        return NULL;
+    }
+
+    pthread_mutex_init(&(thpool_p->thcount_lock), NULL);
+    pthread_cond_init(&thpool_p->threads_all_idle, NULL);
+
+    /* Thread init */
+    int n;
+    for (n = 0; n < num_threads; n++)
+    {
+        thread_init(thpool_p, &thpool_p->threads[n], n);
+#if THPOOL_DEBUG
+        printf("THPOOL_DEBUG: Created thread %d in pool \n", n);
+#endif
+    }
+
+    /* Wait for threads to initialize */
+    while (thpool_p->num_threads_alive != num_threads)
+    {
+    }
+
+    return thpool_p;
+}
+
+/* Add work to the thread pool */
+int thpool_add_work(thpool_ *thpool_p, void (*function_p)(void *), void *arg_p)
+{
+    job *newjob;
+
+    newjob = (struct job *)malloc(sizeof(struct job));
+    if (newjob == NULL)
+    {
+        err("thpool_add_work(): Could not allocate memory for new job\n");
+        return -1;
+    }
+
+    /* add function and argument */
+    newjob->function = function_p;
+    newjob->arg = arg_p;
+
+    /* add job to queue */
+    jobqueue_push(&thpool_p->jobqueue, newjob);
+
+    return 0;
+}
+
+/* Wait until all jobs have finished */
+void thpool_wait(thpool_ *thpool_p)
+{
+    pthread_mutex_lock(&thpool_p->thcount_lock);
+    while (thpool_p->jobqueue.len || thpool_p->num_threads_working)
+    {
+        pthread_cond_wait(&thpool_p->threads_all_idle, &thpool_p->thcount_lock);
+    }
+    pthread_mutex_unlock(&thpool_p->thcount_lock);
+}
+
+/* Destroy the threadpool */
+void thpool_destroy(thpool_ *thpool_p)
+{
+    /* No need to destroy if it's NULL */
+    if (thpool_p == NULL)
+        return;
+
+    volatile int threads_total = thpool_p->num_threads_alive;
+
+    /* End each thread 's infinite loop */
+    threads_keepalive = 0;
+
+    /* Give one second to kill idle threads */
+    double TIMEOUT = 1.0;
+    time_t start, end;
+    double tpassed = 0.0;
+    time(&start);
+    while (tpassed < TIMEOUT && thpool_p->num_threads_alive)
+    {
+        bsem_post_all(thpool_p->jobqueue.has_jobs);
+        time(&end);
+        tpassed = difftime(end, start);
+    }
+
+    /* Poll remaining threads */
+    while (thpool_p->num_threads_alive)
+    {
+        bsem_post_all(thpool_p->jobqueue.has_jobs);
+        sleep(1);
+    }
+
+    /* Job queue cleanup */
+    jobqueue_destroy(&thpool_p->jobqueue);
+    /* Deallocs */
+    int n;
+    for (n = 0; n < threads_total; n++)
+    {
+        thread_destroy(thpool_p->threads[n]);
+    }
+    free(thpool_p->threads);
+    free(thpool_p);
+}
+
+/* Pause all threads in threadpool */
+void thpool_pause(thpool_ *thpool_p)
+{
+    int n;
+    for (n = 0; n < thpool_p->num_threads_alive; n++)
+    {
+        pthread_kill(thpool_p->threads[n]->pthread, SIGUSR1);
+    }
+}
+
+/* Resume all threads in threadpool */
+void thpool_resume(thpool_ *thpool_p)
+{
+    // resuming a single threadpool hasn't been
+    // implemented yet, meanwhile this suppresses
+    // the warnings
+    (void)thpool_p;
+
+    threads_on_hold = 0;
+}
+
+int thpool_num_threads_working(thpool_ *thpool_p)
+{
+    return thpool_p->num_threads_working;
+}
+
+/* ============================ THREAD ============================== */
+
+/* Initialize a thread in the thread pool
+ *
+ * @param thread        address to the pointer of the thread to be created
+ * @param id            id to be given to the thread
+ * @return 0 on success, -1 otherwise.
+ */
+static int thread_init(thpool_ *thpool_p, struct thread **thread_p, int id)
+{
+
+    *thread_p = (struct thread *)malloc(sizeof(struct thread));
+    if (*thread_p == NULL)
+    {
+        err("thread_init(): Could not allocate memory for thread\n");
+        return -1;
+    }
+
+    (*thread_p)->thpool_p = thpool_p;
+    (*thread_p)->id = id;
+
+    pthread_create(&(*thread_p)->pthread, NULL, (void *(*)(void *))thread_do, (*thread_p));
+    pthread_detach((*thread_p)->pthread);
+    return 0;
+}
+
+/* Sets the calling thread on hold */
+static void thread_hold(int sig_id)
+{
+    (void)sig_id;
+    threads_on_hold = 1;
+    while (threads_on_hold)
+    {
+        sleep(1);
+    }
+}
+
+/* What each thread is doing
+ *
+ * In principle this is an endless loop. The only time this loop gets interuppted is once
+ * thpool_destroy() is invoked or the program exits.
+ *
+ * @param  thread        thread that will run this function
+ * @return nothing
+ */
+static void *thread_do(struct thread *thread_p)
+{
+
+    /* Set thread name for profiling and debugging */
+    char thread_name[16] = {0};
+    snprintf(thread_name, 16, "thpool-%d", thread_p->id);
+
+#if defined(__linux__)
+    /* Use prctl instead to prevent using _GNU_SOURCE flag and implicit declaration */
+    prctl(PR_SET_NAME, thread_name);
+#elif defined(__APPLE__) && defined(__MACH__)
+    pthread_setname_np(thread_name);
+#else
+    err("thread_do(): pthread_setname_np is not supported on this system");
+#endif
+
+    /* Assure all threads have been created before starting serving */
+    thpool_ *thpool_p = thread_p->thpool_p;
+
+    /* Register signal handler */
+    struct sigaction act;
+    sigemptyset(&act.sa_mask);
+    act.sa_flags = 0;
+    act.sa_handler = thread_hold;
+    if (sigaction(SIGUSR1, &act, NULL) == -1)
+    {
+        err("thread_do(): cannot handle SIGUSR1");
+    }
+
+    /* Mark thread as alive (initialized) */
+    pthread_mutex_lock(&thpool_p->thcount_lock);
+    thpool_p->num_threads_alive += 1;
+    pthread_mutex_unlock(&thpool_p->thcount_lock);
+
+    while (threads_keepalive)
+    {
+
+        bsem_wait(thpool_p->jobqueue.has_jobs);
+
+        if (threads_keepalive)
+        {
+
+            pthread_mutex_lock(&thpool_p->thcount_lock);
+            thpool_p->num_threads_working++;
+            pthread_mutex_unlock(&thpool_p->thcount_lock);
+
+            /* Read job from queue and execute it */
+            void (*func_buff)(void *);
+            void *arg_buff;
+            job *job_p = jobqueue_pull(&thpool_p->jobqueue);
+            if (job_p)
+            {
+                func_buff = job_p->function;
+                arg_buff = job_p->arg;
+                func_buff(arg_buff);
+                free(job_p);
+            }
+
+            pthread_mutex_lock(&thpool_p->thcount_lock);
+            thpool_p->num_threads_working--;
+            if (!thpool_p->num_threads_working)
+            {
+                pthread_cond_signal(&thpool_p->threads_all_idle);
+            }
+            pthread_mutex_unlock(&thpool_p->thcount_lock);
+        }
+    }
+    pthread_mutex_lock(&thpool_p->thcount_lock);
+    thpool_p->num_threads_alive--;
+    pthread_mutex_unlock(&thpool_p->thcount_lock);
+
+    return NULL;
+}
+
+/* Frees a thread  */
+static void thread_destroy(thread *thread_p)
+{
+    free(thread_p);
+}
+
+/* ============================ JOB QUEUE =========================== */
+
+/* Initialize queue */
+static int jobqueue_init(jobqueue_t *jobqueue_p)
+{
+    jobqueue_p->len = 0;
+    jobqueue_p->front = NULL;
+    jobqueue_p->rear = NULL;
+
+    jobqueue_p->has_jobs = (struct bsem *)malloc(sizeof(struct bsem));
+    if (jobqueue_p->has_jobs == NULL)
+    {
+        return -1;
+    }
+
+    pthread_mutex_init(&(jobqueue_p->rwmutex), NULL);
+    bsem_init(jobqueue_p->has_jobs, 0);
+
+    return 0;
+}
+
+/* Clear the queue */
+static void jobqueue_clear(jobqueue_t *jobqueue_p)
+{
+
+    while (jobqueue_p->len)
+    {
+        free(jobqueue_pull(jobqueue_p));
+    }
+
+    jobqueue_p->front = NULL;
+    jobqueue_p->rear = NULL;
+    bsem_reset(jobqueue_p->has_jobs);
+    jobqueue_p->len = 0;
+}
+
+/* Add (allocated) job to queue
+ */
+static void jobqueue_push(jobqueue_t *jobqueue_p, struct job *newjob)
+{
+
+    pthread_mutex_lock(&jobqueue_p->rwmutex);
+    newjob->prev = NULL;
+
+    switch (jobqueue_p->len)
+    {
+
+    case 0: /* if no jobs in queue */
+        jobqueue_p->front = newjob;
+        jobqueue_p->rear = newjob;
+        break;
+
+    default: /* if jobs in queue */
+        jobqueue_p->rear->prev = newjob;
+        jobqueue_p->rear = newjob;
+    }
+    jobqueue_p->len++;
+
+    bsem_post(jobqueue_p->has_jobs);
+    pthread_mutex_unlock(&jobqueue_p->rwmutex);
+}
+
+/* Get first job from queue(removes it from queue)
+ * Notice: Caller MUST hold a mutex
+ */
+static struct job *jobqueue_pull(jobqueue_t *jobqueue_p)
+{
+
+    pthread_mutex_lock(&jobqueue_p->rwmutex);
+    job *job_p = jobqueue_p->front;
+
+    switch (jobqueue_p->len)
+    {
+
+    case 0: /* if no jobs in queue */
+        break;
+
+    case 1: /* if one job in queue */
+        jobqueue_p->front = NULL;
+        jobqueue_p->rear = NULL;
+        jobqueue_p->len = 0;
+        break;
+
+    default: /* if >1 jobs in queue */
+        jobqueue_p->front = job_p->prev;
+        jobqueue_p->len--;
+        /* more than one job in queue -> post it */
+        bsem_post(jobqueue_p->has_jobs);
+    }
+
+    pthread_mutex_unlock(&jobqueue_p->rwmutex);
+    return job_p;
+}
+
+/* Free all queue resources back to the system */
+static void jobqueue_destroy(jobqueue_t *jobqueue_p)
+{
+    jobqueue_clear(jobqueue_p);
+    free(jobqueue_p->has_jobs);
+}
+
+/* ======================== SYNCHRONISATION ========================= */
+
+/* Init semaphore to 1 or 0 */
+static void bsem_init(bsem *bsem_p, int value)
+{
+    if (value < 0 || value > 1)
+    {
+        err("bsem_init(): Binary semaphore can take only values 1 or 0");
+        exit(1);
+    }
+    pthread_mutex_init(&(bsem_p->mutex), NULL);
+    pthread_cond_init(&(bsem_p->cond), NULL);
+    bsem_p->v = value;
+}
+
+/* Reset semaphore to 0 */
+static void bsem_reset(bsem *bsem_p)
+{
+    bsem_init(bsem_p, 0);
+}
+
+/* Post to at least one thread */
+static void bsem_post(bsem *bsem_p)
+{
+    pthread_mutex_lock(&bsem_p->mutex);
+    bsem_p->v = 1;
+    pthread_cond_signal(&bsem_p->cond);
+    pthread_mutex_unlock(&bsem_p->mutex);
+}
+
+/* Post to all threads */
+static void bsem_post_all(bsem *bsem_p)
+{
+    pthread_mutex_lock(&bsem_p->mutex);
+    bsem_p->v = 1;
+    pthread_cond_broadcast(&bsem_p->cond);
+    pthread_mutex_unlock(&bsem_p->mutex);
+}
+
+/* Wait on semaphore until semaphore has value 0 */
+static void bsem_wait(bsem *bsem_p)
+{
+    pthread_mutex_lock(&bsem_p->mutex);
+    while (bsem_p->v != 1)
+    {
+        pthread_cond_wait(&bsem_p->cond, &bsem_p->mutex);
+    }
+    bsem_p->v = 0;
+    pthread_mutex_unlock(&bsem_p->mutex);
+}
--- a/src/common/utils/thpool.h
+++ b/src/common/utils/thpool.h
@ -0,0 +1,179 @@
+/**********************************
+ * @author      Johan Hanssen Seferidis
+ * License:     MIT
+ *
+ **********************************/
+
+#ifndef _THPOOL_
+#define _THPOOL_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+     /* =================================== API ======================================= */
+
+     typedef struct thpool_ *threadpool;
+
+     /**
+      * @brief  Initialize threadpool
+      *
+      * Initializes a threadpool. This function will not return until all
+      * threads have initialized successfully.
+      *
+      * @example
+      *
+      *    ..
+      *    threadpool thpool;                     //First we declare a threadpool
+      *    thpool = thpool_init(4);               //then we initialize it to 4 threads
+      *    ..
+      *
+      * @param  num_threads   number of threads to be created in the threadpool
+      * @return threadpool    created threadpool on success,
+      *                       NULL on error
+      */
+     threadpool thpool_init(int num_threads);
+
+     /**
+      * @brief Add work to the job queue
+      *
+      * Takes an action and its argument and adds it to the threadpool's job queue.
+      * If you want to add to work a function with more than one arguments then
+      * a way to implement this is by passing a pointer to a structure.
+      *
+      * NOTICE: You have to cast both the function and argument to not get warnings.
+      *
+      * @example
+      *
+      *    void print_num(int num){
+      *       printf("%d\n", num);
+      *    }
+      *
+      *    int main() {
+      *       ..
+      *       int a = 10;
+      *       thpool_add_work(thpool, (void*)print_num, (void*)a);
+      *       ..
+      *    }
+      *
+      * @param  threadpool    threadpool to which the work will be added
+      * @param  function_p    pointer to function to add as work
+      * @param  arg_p         pointer to an argument
+      * @return 0 on success, -1 otherwise.
+      */
+     int thpool_add_work(threadpool, void (*function_p)(void *), void *arg_p);
+
+     /**
+      * @brief Wait for all queued jobs to finish
+      *
+      * Will wait for all jobs - both queued and currently running to finish.
+      * Once the queue is empty and all work has completed, the calling thread
+      * (probably the main program) will continue.
+      *
+      * Smart polling is used in wait. The polling is initially 0 - meaning that
+      * there is virtually no polling at all. If after 1 seconds the threads
+      * haven't finished, the polling interval starts growing exponentially
+      * until it reaches max_secs seconds. Then it jumps down to a maximum polling
+      * interval assuming that heavy processing is being used in the threadpool.
+      *
+      * @example
+      *
+      *    ..
+      *    threadpool thpool = thpool_init(4);
+      *    ..
+      *    // Add a bunch of work
+      *    ..
+      *    thpool_wait(thpool);
+      *    puts("All added work has finished");
+      *    ..
+      *
+      * @param threadpool     the threadpool to wait for
+      * @return nothing
+      */
+     void thpool_wait(threadpool);
+
+     /**
+      * @brief Pauses all threads immediately
+      *
+      * The threads will be paused no matter if they are idle or working.
+      * The threads return to their previous states once thpool_resume
+      * is called.
+      *
+      * While the thread is being paused, new work can be added.
+      *
+      * @example
+      *
+      *    threadpool thpool = thpool_init(4);
+      *    thpool_pause(thpool);
+      *    ..
+      *    // Add a bunch of work
+      *    ..
+      *    thpool_resume(thpool); // Let the threads start their magic
+      *
+      * @param threadpool    the threadpool where the threads should be paused
+      * @return nothing
+      */
+     void thpool_pause(threadpool);
+
+     /**
+      * @brief Unpauses all threads if they are paused
+      *
+      * @example
+      *    ..
+      *    thpool_pause(thpool);
+      *    sleep(10);              // Delay execution 10 seconds
+      *    thpool_resume(thpool);
+      *    ..
+      *
+      * @param threadpool     the threadpool where the threads should be unpaused
+      * @return nothing
+      */
+     void thpool_resume(threadpool);
+
+     /**
+      * @brief Destroy the threadpool
+      *
+      * This will wait for the currently active threads to finish and then 'kill'
+      * the whole threadpool to free up memory.
+      *
+      * @example
+      * int main() {
+      *    threadpool thpool1 = thpool_init(2);
+      *    threadpool thpool2 = thpool_init(2);
+      *    ..
+      *    thpool_destroy(thpool1);
+      *    ..
+      *    return 0;
+      * }
+      *
+      * @param threadpool     the threadpool to destroy
+      * @return nothing
+      */
+     void thpool_destroy(threadpool);
+
+     /**
+      * @brief Show currently working threads
+      *
+      * Working threads are the threads that are performing work (not idle).
+      *
+      * @example
+      * int main() {
+      *    threadpool thpool1 = thpool_init(2);
+      *    threadpool thpool2 = thpool_init(2);
+      *    ..
+      *    printf("Working threads: %d\n", thpool_num_threads_working(thpool1));
+      *    ..
+      *    return 0;
+      * }
+      *
+      * @param threadpool     the threadpool of interest
+      * @return integer       number of threads working
+      */
+     int thpool_num_threads_working(threadpool);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/src/common/utils/timer.cpp
+++ b/src/common/utils/timer.cpp
@ -0,0 +1,78 @@
+/* 
+     Description: 用来统计程序段执行的时间，测试用
+
+     Copyright : All right reserved by ICT
+
+     Author : Zhang Zhonghai
+     Date : 2019/01/30
+*/
+
+#include "timer.h"
+
+#include <stdlib.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stdio.h>
+
+/*
+ * class Timer implementation
+ */
+double Timer::mseconds_elapsed() {
+    return (get_mseconds() -start_);
+}
+
+double Timer::seconds_elapsed() {
+    return (mseconds_elapsed() / 1000);
+} 
+
+void Timer::reinit() {
+    start_ = get_mseconds();
+}
+
+void Timer::reinit_acc() {
+    acc_count_ = 0.0;
+}
+
+void Timer::acc_start() {
+    acc_start_ = get_mseconds();
+}
+
+void Timer::acc_end() {
+    acc_count_ += get_mseconds() - acc_start_;
+}
+
+double Timer::acc_mseconds_elapsed() {
+    return acc_count_;
+}
+
+double Timer::acc_seconds_elapsed() {
+    return acc_count_ / 1000;
+}
+
+void Timer::log_time(const char *desc) {
+    printf("[%s] ", desc);
+    print_current_time();
+}
+
+void Timer::print_current_time() {
+    time_t time_val;
+    struct tm *at;
+    char now[80];
+    time(&time_val);
+    at = localtime(&time_val);
+    strftime(now, 79, "%Y-%m-%d %H:%M:%S", at);
+    puts(now);
+}
+
+double Timer::get_mseconds() {
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return (double) 1000 * (tv.tv_sec + ((1e-6) * tv.tv_usec));
+}
+
+double Timer::get_seconds() {
+    return 1000 * get_mseconds();
+}
+
+
+/********** end of Timer implementation *************/
--- a/src/common/utils/timer.h
+++ b/src/common/utils/timer.h
@ -0,0 +1,39 @@
+/* 
+     Description: 用来统计程序段执行的时间，测试用
+
+     Copyright : All right reserved by ICT
+
+     Author : Zhang Zhonghai
+     Date : 2019/01/30
+*/
+
+#ifndef TIMER_H_
+#define TIMER_H_
+
+/*
+* @brief: Record the run time of this program
+*/
+class Timer {
+public:
+	Timer() { reinit(); reinit_acc();}
+    double mseconds_elapsed();
+    double seconds_elapsed();
+    void reinit(); // restart time count
+    void reinit_acc();
+    void acc_start();
+    void acc_end();
+    double acc_mseconds_elapsed();
+    double acc_seconds_elapsed();
+
+    static void log_time(const char *desc);
+    static void print_current_time();
+    static double get_mseconds();
+    static double get_seconds();
+
+private:
+    double start_;
+    double acc_start_; // used for accumulate time count
+    double acc_count_;
+};
+
+#endif
--- a/src/common/utils/util.cpp
+++ b/src/common/utils/util.cpp
@ -0,0 +1,89 @@
+/* 
+     Description: 全局用到的工具
+
+     Copyright : All right reserved by ICT
+
+     Author : Zhang Zhonghai
+     Date : 2019/11/26
+*/
+
+#include "util.h"
+
+#include <vector>
+#include <string>
+#include <sstream>
+#include <algorithm>
+#include <stdarg.h>
+#include <stdio.h>
+
+using std::vector;
+using std::string;
+using std::ostringstream;
+using std::for_each;
+
+#define PRINT_INFO(info_type)         \
+    do                                \
+    {                                 \
+        fprintf(stderr, info_type);   \
+        va_list ap;                   \
+        va_start(ap, format);         \
+        vfprintf(stderr, format, ap); \
+        va_end(ap);                   \
+    } while (0)
+
+/*
+ * 打印错误信息
+ */
+void Error(const char *format, ...) {
+    PRINT_INFO("[Error]: ");
+}
+
+/* 警告信息 */
+void Warn(const char *format, ...)
+{
+    PRINT_INFO("[ Warn]: ");
+}
+
+/* 普通信息 */
+void Info(const char *format, ...)
+{
+    PRINT_INFO("[ Info]: ");
+}
+
+/* 调试信息 */
+void Debug(const char *format, ...)
+{
+    PRINT_INFO("[Debug]: ");
+}
+
+/*
+ * string util类
+ */ 
+
+// 合并字符串数组
+void StringUtil::Join(vector<string>& strs, 
+                      string& str, 
+                      const char sep) {
+    str = "";
+    ostringstream oss;
+    for_each(strs.begin(), strs.end()-1, [&](const string& s) {
+            oss << s << sep;
+            });
+    oss << strs[strs.size() - 1];
+    str = oss.str();
+}
+
+// 合并int数组成一个字符串
+void StringUtil::Join(vector<int>& strs, 
+                      string& str, 
+                      const char sep) {
+    str = "";
+    ostringstream oss;
+    for_each(strs.begin(), strs.end()-1, [&](const int& s) {
+            oss << s << sep;
+            });
+    oss << strs[strs.size()-1];
+    str = oss.str();
+}
+
+
--- a/src/common/utils/util.h
+++ b/src/common/utils/util.h
@ -0,0 +1,182 @@
+/* 
+     Description: 全局用到的的工具
+
+     Copyright : All right reserved by ICT
+
+     Author : Zhang Zhonghai
+     Date : 2019/11/26
+*/
+#ifndef UTIL_H_
+#define UTIL_H_
+
+#include <vector>
+#include <string>
+#include <iomanip>
+#include <fstream>
+#include <sstream>
+#include <iostream>
+#include <stdlib.h>
+#include <stdint.h>
+
+using std::ostringstream;
+using std::vector;
+using std::string;
+using std::ifstream;
+using std::ofstream;
+
+using namespace std;
+
+/*
+ * 打印错误信息，退出程序
+ */
+void Error(const char *format, ...);
+void Warn(const char *format, ...);
+void Info(const char *format, ...);
+void Debug(const char *format, ...);
+
+/*
+ * StringUtil 用来合并字符串等操作
+ */
+struct StringUtil {
+    // 合并字符串数组
+    static void Join(vector<string>& strs, 
+                     string& str, 
+                     const char sep);
+    // 合并int数组成一个字符串
+    static void Join(vector<int>& strs, 
+                     string& str, 
+                     const char sep);
+    // format Float
+    static string Format(float val) {
+        ostringstream oss; 
+        oss << fixed << setprecision(3); 
+        oss << val; 
+        const string &s = oss.str();
+        if (s.back() == '0')
+            return s.substr(0, s.size()-1);
+        return s;
+    };
+
+    static string Format(int val) {
+        ostringstream oss;
+        oss << val;
+        return oss.str();
+    };
+
+    // 判断string区间字符串是否相等
+    static bool EqualRange(const string &left,
+                           const int leftOffset,
+                           const string &right,
+                           const int rightOffset,
+                           const int len) {
+        for (int i=0; i<len; ++i) {
+            if (left[leftOffset + i] != right[rightOffset + i]) return false;
+        }
+        return true;
+    }
+};
+
+// 二进制读写相关
+struct BinaryUtil {
+    static void WriteInt(ofstream &out, int val) {
+        uint32_t i = (uint32_t) val;
+        out << (char)(i & 0xFF)
+            << (char)((i >> 8) & 0xFF)
+            << (char)((i >> 16) & 0xFF)
+            << (char)((i >> 24) & 0xFF);
+    };
+
+    static void WriteLong(ofstream &out, uint64_t val) {
+        uint64_t i = val;
+        out << (char)(i & 0xFF)
+            << (char)((i >> 8) & 0xFF)
+            << (char)((i >> 16) & 0xFF)
+            << (char)((i >> 24) & 0xFF)
+            << (char)((i >> 32) & 0xFF)
+            << (char)((i >> 40) & 0xFF)
+            << (char)((i >> 48) & 0xFF)
+            << (char)((i >> 56) & 0xFF);
+    };
+
+    static void WriteStr(ofstream &out, const string &s) {
+        for (int i=0; i<s.size(); ++i) out << s[i];
+        out << (char)0;
+    }
+
+    static bool ReadInt(char *buf, uint64_t total, uint64_t *cur, int *res) {
+        char b1, b2, b3, b4;
+        if (*cur + 4 > total) return false;
+        b1=buf[(*cur)++];b2=buf[(*cur)++];b3=buf[(*cur)++];b4=buf[(*cur)++];
+        *res = (((uint32_t)(uint8_t)b4) << 24)
+            + (((uint32_t)(uint8_t)b3) << 16)
+            + (((uint32_t)(uint8_t)b2) << 8)
+            + (((uint32_t)(uint8_t)b1));
+        return true;
+    };
+
+    static bool ReadInt(ifstream &ifs, int *res) {
+        //if(ifs.read((char*)res, sizeof(*res))) return true; 
+        char b[4];
+        if(!ifs.read(&b[0], 1)) return false; 
+        if(!ifs.read(&b[1], 1)) return false; 
+        if(!ifs.read(&b[2], 1)) return false; 
+        if(!ifs.read(&b[3], 1)) return false; 
+        uint64_t cur = 0;
+        return ReadInt((char*)b, 4, &cur, res);
+    }
+
+    static bool ReadLong(char *buf, uint64_t total, uint64_t *cur, uint64_t *res) {
+        char b1, b2, b3, b4, b5, b6, b7, b8;
+        if (*cur + 8 > total) return false;
+        b1=buf[(*cur)++];b2=buf[(*cur)++];b3=buf[(*cur)++];b4=buf[(*cur)++];
+        b5=buf[(*cur)++];b6=buf[(*cur)++];b7=buf[(*cur)++];b8=buf[(*cur)++];
+        *res = (((uint64_t)(uint8_t)b8) << 56)
+            + (((uint64_t)(uint8_t)b7) << 48)
+            + (((uint64_t)(uint8_t)b6) << 40)
+            + (((uint64_t)(uint8_t)b5) << 32)
+            + (((uint64_t)(uint8_t)b4) << 24)
+            + (((uint64_t)(uint8_t)b3) << 16)
+            + (((uint64_t)(uint8_t)b2) << 8)
+            + (((uint64_t)(uint8_t)b1));
+        return true;
+    };
+
+    static bool ReadLong(ifstream &ifs, uint64_t *res) {
+        //if(ifs.read((char*)res, sizeof(*res))) return true; 
+        char b[8];
+        if(!ifs.read(&b[0], 1)) return false; 
+        if(!ifs.read(&b[1], 1)) return false; 
+        if(!ifs.read(&b[2], 1)) return false; 
+        if(!ifs.read(&b[3], 1)) return false; 
+        if(!ifs.read(&b[4], 1)) return false; 
+        if(!ifs.read(&b[5], 1)) return false; 
+        if(!ifs.read(&b[6], 1)) return false; 
+        if(!ifs.read(&b[7], 1)) return false; 
+        uint64_t cur = 0;
+        return ReadLong((char*)b, 8, &cur, res);
+    }
+    static bool ReadStr(ifstream &ifs, string *res) {
+        char b;
+        res->clear();
+        if (!ifs.read(&b, 1)) return false;
+        while ((int)b != 0) {
+            res->push_back(b);
+            if (!ifs.read(&b, 1)) return false;
+        }
+        return true;
+    }
+    static bool ReadStr(char *buf, uint64_t total, uint64_t *cur, string *res) {
+        char b;
+        res->clear();
+        if (*cur >= total) return false;
+        b = buf[(*cur)++];
+        while ((int)b != 0) {
+            res->push_back(b);
+            if (*cur >= total) return false;
+            b = buf[(*cur)++];
+        }
+        return true;
+    }
+};
+
+#endif
--- a/src/common/utils/yarn.cpp
+++ b/src/common/utils/yarn.cpp
@ -0,0 +1,398 @@
+/* yarn.c -- generic thread operations implemented using pthread functions
+ * Copyright (C) 2008, 2011, 2012, 2015, 2018, 2019, 2020 Mark Adler
+ * Version 1.7  12 Apr 2020  Mark Adler
+ * For conditions of distribution and use, see copyright notice in yarn.h
+ */
+
+/* Basic thread operations implemented using the POSIX pthread library.  All
+   pthread references are isolated within this module to allow alternate
+   implementations with other thread libraries.  See yarn.h for the description
+   of these operations. */
+
+/* Version history:
+   1.0    19 Oct 2008  First version
+   1.1    26 Oct 2008  No need to set the stack size -- remove
+                       Add yarn_abort() function for clean-up on error exit
+   1.2    19 Dec 2011  (changes reversed in 1.3)
+   1.3    13 Jan 2012  Add large file #define for consistency with pigz.c
+                       Update thread portability #defines per IEEE 1003.1-2008
+                       Fix documentation in yarn.h for yarn_prefix
+   1.4    19 Jan 2015  Allow yarn_abort() to avoid error message to stderr
+                       Accept and do nothing for NULL argument to free_lock()
+   1.5     8 May 2018  Remove destruct() to avoid use of pthread_cancel()
+                       Normalize the code style
+   1.6     3 Apr 2019  Add debugging information to fail() error messages
+   1.7    12 Apr 2020  Fix use after free bug in ignition()
+ */
+
+// For thread portability.
+#define _XOPEN_SOURCE 700
+#define _POSIX_C_SOURCE 200809L
+#define _THREAD_SAFE
+
+// Use large file functions if available.
+#define _FILE_OFFSET_BITS 64
+
+// External libraries and entities referenced.
+#include <stdio.h>      // fprintf(), stderr
+#include <stdlib.h>     // exit(), malloc(), free(), NULL
+#include <pthread.h>    // pthread_t, pthread_create(), pthread_join(),
+    // pthread_attr_t, pthread_attr_init(), pthread_attr_destroy(),
+    // PTHREAD_CREATE_JOINABLE, pthread_attr_setdetachstate(),
+    // pthread_self(), pthread_equal(),
+    // pthread_mutex_t, PTHREAD_MUTEX_INITIALIZER, pthread_mutex_init(),
+    // pthread_mutex_lock(), pthread_mutex_unlock(), pthread_mutex_destroy(),
+    // pthread_cond_t, PTHREAD_COND_INITIALIZER, pthread_cond_init(),
+    // pthread_cond_broadcast(), pthread_cond_wait(), pthread_cond_destroy()
+#include <errno.h>      // EPERM, ESRCH, EDEADLK, ENOMEM, EBUSY, EINVAL, EAGAIN
+
+// Interface definition.
+#include "yarn.h"
+
+// Constants.
+#define local static            // for non-exported functions and globals
+
+// Error handling external globals, resettable by application.
+char *yarn_prefix = (char*)"yarn";
+void (*yarn_abort)(int) = NULL;
+
+
+// Immediately exit -- use for errors that shouldn't ever happen.
+local void fail(int err, char const *file, long line, char const *func) {
+    fprintf(stderr, "%s: ", yarn_prefix);
+    switch (err) {
+        case EPERM:
+            fputs("already unlocked", stderr);
+            break;
+        case ESRCH:
+            fputs("no such thread", stderr);
+            break;
+        case EDEADLK:
+            fputs("resource deadlock", stderr);
+            break;
+        case ENOMEM:
+            fputs("out of memory", stderr);
+            break;
+        case EBUSY:
+            fputs("can't destroy locked resource", stderr);
+            break;
+        case EINVAL:
+            fputs("invalid request", stderr);
+            break;
+        case EAGAIN:
+            fputs("resource unavailable", stderr);
+            break;
+        default:
+            fprintf(stderr, "internal error %d", err);
+    }
+    fprintf(stderr, " (%s:%ld:%s)\n", file, line, func);
+    if (yarn_abort != NULL)
+        yarn_abort(err);
+    exit(err);
+}
+
+// Memory handling routines provided by user. If none are provided, malloc()
+// and free() are used, which are therefore assumed to be thread-safe.
+typedef void *(*malloc_t)(size_t);
+typedef void (*free_t)(void *);
+local malloc_t my_malloc_f = malloc;
+local free_t my_free = free;
+
+// Use user-supplied allocation routines instead of malloc() and free().
+void yarn_mem(malloc_t lease, free_t vacate) {
+    my_malloc_f = lease;
+    my_free = vacate;
+}
+
+// Memory allocation that cannot fail (from the point of view of the caller).
+local void *my_malloc(size_t size, char const *file, long line) {
+    void *block;
+
+    if ((block = my_malloc_f(size)) == NULL)
+        fail(ENOMEM, file, line, "malloc");
+    return block;
+}
+
+// -- Lock functions --
+
+struct lock_s {
+    pthread_mutex_t mutex;
+    pthread_cond_t cond;
+    long value;
+};
+
+lock *new_lock_(long initial, char const *file, long line) {
+    lock *bolt = (lock *)my_malloc(sizeof(struct lock_s), file, line);
+    int ret = pthread_mutex_init(&(bolt->mutex), NULL);
+    if (ret)
+        fail(ret, file, line, "mutex_init");
+    ret = pthread_cond_init(&(bolt->cond), NULL);
+    if (ret)
+        fail(ret, file, line, "cond_init");
+    bolt->value = initial;
+    return bolt;
+}
+
+void possess_(lock *bolt, char const *file, long line) {
+    int ret = pthread_mutex_lock(&(bolt->mutex));
+    if (ret)
+        fail(ret, file, line, "mutex_lock");
+}
+
+void release_(lock *bolt, char const *file, long line) {
+    int ret = pthread_mutex_unlock(&(bolt->mutex));
+    if (ret)
+        fail(ret, file, line, "mutex_unlock");
+}
+
+void twist_(lock *bolt, enum twist_op op, long val,
+            char const *file, long line) {
+    if (op == TO)
+        bolt->value = val;
+    else if (op == BY)
+        bolt->value += val;
+    int ret = pthread_cond_broadcast(&(bolt->cond));
+    if (ret)
+        fail(ret, file, line, "cond_broadcast");
+    ret = pthread_mutex_unlock(&(bolt->mutex));
+    if (ret)
+        fail(ret, file, line, "mutex_unlock");
+}
+
+#define until(a) while(!(a))
+
+void wait_for_(lock *bolt, enum wait_op op, long val,
+               char const *file, long line) {
+    switch (op) {
+        case TO_BE:
+            until (bolt->value == val) {
+                int ret = pthread_cond_wait(&(bolt->cond), &(bolt->mutex));
+                if (ret)
+                    fail(ret, file, line, "cond_wait");
+            }
+            break;
+        case NOT_TO_BE:
+            until (bolt->value != val) {
+                int ret = pthread_cond_wait(&(bolt->cond), &(bolt->mutex));
+                if (ret)
+                    fail(ret, file, line, "cond_wait");
+            }
+            break;
+        case TO_BE_MORE_THAN:
+            until (bolt->value > val) {
+                int ret = pthread_cond_wait(&(bolt->cond), &(bolt->mutex));
+                if (ret)
+                    fail(ret, file, line, "cond_wait");
+            }
+            break;
+        case TO_BE_LESS_THAN:
+            until (bolt->value < val) {
+                int ret = pthread_cond_wait(&(bolt->cond), &(bolt->mutex));
+                if (ret)
+                    fail(ret, file, line, "cond_wait");
+            }
+    }
+}
+
+long peek_lock(lock *bolt) {
+    return bolt->value;
+}
+
+void free_lock_(lock *bolt, char const *file, long line) {
+    if (bolt == NULL)
+        return;
+    int ret = pthread_cond_destroy(&(bolt->cond));
+    if (ret)
+        fail(ret, file, line, "cond_destroy");
+    ret = pthread_mutex_destroy(&(bolt->mutex));
+    if (ret)
+        fail(ret, file, line, "mutex_destroy");
+    my_free(bolt);
+}
+
+// -- Thread functions (uses the lock functions above) --
+
+struct thread_s {
+    pthread_t id;
+    int done;                   // true if this thread has exited
+    thread *next;               // for list of all launched threads
+};
+
+// List of threads launched but not joined, count of threads exited but not
+// joined (incremented by ignition() just before exiting).
+local lock threads_lock = {
+    PTHREAD_MUTEX_INITIALIZER,
+    PTHREAD_COND_INITIALIZER,
+    0                           // number of threads exited but not joined
+};
+local thread *threads = NULL;       // list of extant threads
+
+// Structure in which to pass the probe and its payload to ignition().
+struct capsule {
+    void (*probe)(void *);
+    void *payload;
+    char const *file;
+    long line;
+};
+
+// Mark the calling thread as done and alert join_all().
+local void reenter(void *arg) {
+    struct capsule *capsule = (struct capsule *)arg;
+
+    // find this thread in the threads list by matching the thread id
+    pthread_t me = pthread_self();
+    possess_(&(threads_lock), capsule->file, capsule->line);
+    thread **prior = &(threads);
+    thread *match;
+    while ((match = *prior) != NULL) {
+        if (pthread_equal(match->id, me))
+            break;
+        prior = &(match->next);
+    }
+    if (match == NULL)
+        fail(ESRCH, capsule->file, capsule->line, "reenter lost");
+
+    // mark this thread as done and move it to the head of the list
+    match->done = 1;
+    if (threads != match) {
+        *prior = match->next;
+        match->next = threads;
+        threads = match;
+    }
+
+    // update the count of threads to be joined and alert join_all()
+    twist_(&(threads_lock), BY, +1, capsule->file, capsule->line);
+
+    // free the capsule resource, even if the thread is cancelled (though yarn
+    // doesn't use pthread_cancel() -- you never know)
+    my_free(capsule);
+}
+
+// All threads go through this routine. Just before a thread exits, it marks
+// itself as done in the threads list and alerts join_all() so that the thread
+// resources can be released. Use a cleanup stack so that the marking occurs
+// even if the thread is cancelled.
+local void *ignition(void *arg) {
+    struct capsule *capsule = (struct capsule *)arg;
+
+    // run reenter() before leaving
+    pthread_cleanup_push(reenter, arg);
+
+    // execute the requested function with argument
+    capsule->probe(capsule->payload);
+
+    // mark this thread as done, letting join_all() know, and free capsule
+    pthread_cleanup_pop(1);
+
+    // exit thread
+    return NULL;
+}
+
+// Not all POSIX implementations create threads as joinable by default, so that
+// is made explicit here.
+thread *launch_(void (*probe)(void *), void *payload,
+                char const *file, long line) {
+    // construct the requested call and argument for the ignition() routine
+    // (allocated instead of automatic so that we're sure this will still be
+    // there when ignition() actually starts up -- ignition() will free this
+    // allocation)
+    struct capsule *capsule = (struct capsule *)my_malloc(sizeof(struct capsule), file, line);
+    capsule->probe = probe;
+    capsule->payload = payload;
+    capsule->file = file;
+    capsule->line = line;
+
+    // assure this thread is in the list before join_all() or ignition() looks
+    // for it
+    possess_(&(threads_lock), file, line);
+
+    // create the thread and call ignition() from that thread
+    thread *th = (thread *)my_malloc(sizeof(struct thread_s), file, line);
+    pthread_attr_t attr;
+    int ret = pthread_attr_init(&attr);
+    if (ret)
+        fail(ret, file, line, "attr_init");
+    ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
+    if (ret)
+        fail(ret, file, line, "attr_setdetachstate");
+    ret = pthread_create(&(th->id), &attr, ignition, capsule);
+    if (ret)
+        fail(ret, file, line, "create");
+    ret = pthread_attr_destroy(&attr);
+    if (ret)
+        fail(ret, file, line, "attr_destroy");
+
+    // put the thread in the threads list for join_all()
+    th->done = 0;
+    th->next = threads;
+    threads = th;
+    release_(&(threads_lock), file, line);
+    return th;
+}
+
+void join_(thread *ally, char const *file, long line) {
+    // wait for thread to exit and return its resources
+    int ret = pthread_join(ally->id, NULL);
+    if (ret)
+        fail(ret, file, line, "join");
+
+    // find the thread in the threads list
+    possess_(&(threads_lock), file, line);
+    thread **prior = &(threads);
+    thread *match;
+    while ((match = *prior) != NULL) {
+        if (match == ally)
+            break;
+        prior = &(match->next);
+    }
+    if (match == NULL)
+        fail(ESRCH, file, line, "join lost");
+
+    // remove thread from list and update exited count, free thread
+    if (match->done)
+        threads_lock.value--;
+    *prior = match->next;
+    release_(&(threads_lock), file, line);
+    my_free(ally);
+}
+
+// This implementation of join_all() only attempts to join threads that have
+// announced that they have exited (see ignition()). When there are many
+// threads, this is faster than waiting for some random thread to exit while a
+// bunch of other threads have already exited.
+int join_all_(char const *file, long line) {
+    // grab the threads list and initialize the joined count
+    int count = 0;
+    possess_(&(threads_lock), file, line);
+
+    // do until threads list is empty
+    while (threads != NULL) {
+        // wait until at least one thread has reentered
+        wait_for_(&(threads_lock), NOT_TO_BE, 0, file, line);
+
+        // find the first thread marked done (should be at or near the top)
+        thread **prior = &(threads);
+        thread *match;
+        while ((match = *prior) != NULL) {
+            if (match->done)
+                break;
+            prior = &(match->next);
+        }
+        if (match == NULL)
+            fail(ESRCH, file, line, "join_all lost");
+
+        // join the thread (will be almost immediate), remove from the threads
+        // list, update the reenter count, and free the thread
+        int ret = pthread_join(match->id, NULL);
+        if (ret)
+            fail(ret, file, line, "join");
+        threads_lock.value--;
+        *prior = match->next;
+        my_free(match);
+        count++;
+    }
+
+    // let go of the threads list and return the number of threads joined
+    release_(&(threads_lock), file, line);
+    return count;
+}
--- a/src/common/utils/yarn.h
+++ b/src/common/utils/yarn.h
@ -0,0 +1,138 @@
+/* yarn.h -- generic interface for thread operations
+ * Copyright (C) 2008, 2011, 2012, 2015, 2018, 2019, 2020 Mark Adler
+ * Version 1.7  12 Apr 2020  Mark Adler
+ */
+
+/*
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the author be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+
+  Mark Adler
+  madler@alumni.caltech.edu
+ */
+
+/* Basic thread operations
+
+   This interface isolates the local operating system implementation of threads
+   from the application in order to facilitate platform independent use of
+   threads.  All of the implementation details are deliberately hidden.
+
+   Assuming adequate system resources and proper use, none of these functions
+   can fail.  As a result, any errors encountered will cause an exit() to be
+   executed, or the execution of your own optionally-provided abort function.
+
+   These functions allow the simple launching and joining of threads, and the
+   locking of objects and synchronization of changes of objects.  The latter is
+   implemented with a single lock type that contains an integer value.  The
+   value can be ignored for simple exclusive access to an object, or the value
+   can be used to signal and wait for changes to an object.
+
+   -- Arguments --
+
+   thread *thread;          identifier for launched thread, used by join
+   void probe(void *);      pointer to function "probe", run when thread starts
+   void *payload;           single argument passed to the probe function
+   lock *lock;              a lock with a value -- used for exclusive access to
+                            an object and to synchronize threads waiting for
+                            changes to an object
+   long val;                value to set lock, increment lock, or wait for
+   int n;                   number of threads joined
+
+   -- Thread functions --
+
+   thread = launch(probe, payload) - launch a thread -- exit via probe() return
+   join(thread) - join a thread and by joining end it, waiting for the thread
+        to exit if it hasn't already -- will free the resources allocated by
+        launch() (don't try to join the same thread more than once)
+   n = join_all() - join all threads launched by launch() that are not joined
+        yet and free the resources allocated by the launches, usually to clean
+        up when the thread processing is done -- join_all() returns an int with
+        the count of the number of threads joined (join_all() should only be
+        called from the main thread, and should only be called after any calls
+        of join() have completed)
+
+   -- Lock functions --
+
+   lock = new_lock(val) - create a new lock with initial value val (lock is
+        created in the released state)
+   possess(lock) - acquire exclusive possession of a lock, waiting if necessary
+   twist(lock, [TO | BY], val) - set lock to or increment lock by val, signal
+        all threads waiting on this lock and then release the lock -- must
+        possess the lock before calling (twist releases, so don't do a
+        release() after a twist() on the same lock)
+   wait_for(lock, [TO_BE | NOT_TO_BE | TO_BE_MORE_THAN | TO_BE_LESS_THAN], val)
+        - wait on lock value to be, not to be, be greater than, or be less than
+        val -- must possess the lock before calling, will possess the lock on
+        return but the lock is released while waiting to permit other threads
+        to use twist() to change the value and signal the change (so make sure
+        that the object is in a usable state when waiting)
+   release(lock) - release a possessed lock (do not try to release a lock that
+        the current thread does not possess)
+   val = peek_lock(lock) - return the value of the lock (assumes that lock is
+        already possessed, no possess or release is done by peek_lock())
+   free_lock(lock) - free the resources allocated by new_lock() (application
+        must assure that the lock is released before calling free_lock())
+
+   -- Memory allocation ---
+
+   yarn_mem(better_malloc, better_free) - set the memory allocation and free
+        routines for use by the yarn routines where the supplied routines have
+        the same interface and operation as malloc() and free(), and may be
+        provided in order to supply thread-safe memory allocation routines or
+        for any other reason -- by default malloc() and free() will be used
+
+   -- Error control --
+
+   yarn_prefix - a char pointer to a string that will be the prefix for any
+        error messages that these routines generate before exiting -- if not
+        changed by the application, "yarn" will be used
+   yarn_abort - an external function that will be executed when there is an
+        internal yarn error, due to out of memory or misuse -- this function
+        may exit to abort the application, or if it returns, the yarn error
+        handler will exit (set to NULL by default for no action)
+ */
+
+extern char *yarn_prefix;
+extern void (*yarn_abort)(int);
+
+void yarn_mem(void *(*)(size_t), void (*)(void *));
+
+typedef struct thread_s thread;
+thread *launch_(void (*)(void *), void *, char const *, long);
+#define launch(a, b) launch_(a, b, __FILE__, __LINE__)
+void join_(thread *, char const *, long);
+#define join(a) join_(a, __FILE__, __LINE__)
+int join_all_(char const *, long);
+#define join_all() join_all_(__FILE__, __LINE__)
+
+typedef struct lock_s lock;
+lock *new_lock_(long, char const *, long);
+#define new_lock(a) new_lock_(a, __FILE__, __LINE__)
+void possess_(lock *, char const *, long);
+#define possess(a) possess_(a, __FILE__, __LINE__)
+void release_(lock *, char const *, long);
+#define release(a) release_(a, __FILE__, __LINE__)
+enum twist_op { TO, BY };
+void twist_(lock *, enum twist_op, long, char const *, long);
+#define twist(a, b, c) twist_(a, b, c, __FILE__, __LINE__)
+enum wait_op {
+    TO_BE, /* or */ NOT_TO_BE, /* that is the question */
+    TO_BE_MORE_THAN, TO_BE_LESS_THAN };
+void wait_for_(lock *, enum wait_op, long, char const *, long);
+#define wait_for(a, b, c) wait_for_(a, b, c, __FILE__, __LINE__)
+long peek_lock(lock *);
+void free_lock_(lock *, char const *, long);
+#define free_lock(a) free_lock_(a, __FILE__, __LINE__)
--- a/src/sam/markdups/markdups.cpp
+++ b/src/sam/markdups/markdups.cpp
@ -7,29 +7,235 @@ Author : Zhang Zhonghai
 Date : 2023/10/23
 */
 #include "markdups_arg.h"
-#include <common/global_arg.h>
+
+#include <common/utils/global_arg.h>
+#include <common/utils/thpool.h>
+#include <common/utils/timer.h>
+#include <common/utils/util.h>
+#include <common/hts/bam_buf.h>
+#include <common/hts/read_ends.h>
+#include <common/utils/yarn.h>
+
+#include <htslib/sam.h>
+#include "htslib/thread_pool.h"

 #include <iostream>
+#include <vector>
+#include <set>
+#include <queue>

 using namespace std;

+#define BAM_BLOCK_SIZE 2 * 1024 * 1024
+
+/* 前向声明 */
+class ThMarkDupArg;
+/* 全局本地变量 */
+static queue<ThMarkDupArg *> qpThMarkDupArg; // 存放线程变量的队列
+static lock *queueFirstLock = new_lock(-1); // 队列的第一个任务是否完成
+
+/* 多线程处理冗余参数结构体 */
+struct ThMarkDupArg
+{
+    vector<BamWrap *> *pvBam;
+    int startIdx; // 闭区间
+    int endIdx; // 开区间
+    long seq; // 当前任务在所有任务的排序
+    bool more; // 后面还有任务
+    volatile bool finish; // 当前任务有没有处理完
+    set<int> sDupIdx; // 冗余read的索引
+};
 /*
- * mark duplicate 入口
+ * 多线程查找和标记冗余函数
+ */
+void thread_markdups(void *arg)
+{
+    auto &p = *(ThMarkDupArg *)arg;
+
+    p.sDupIdx.insert(1);
+    /* 处理数据 */
+
+    /* 本段数据处理完成，告诉输出线程 */
+    possess(queueFirstLock);
+    p.finish = true;
+    cout << "process: " << p.seq << endl;
+    auto front = qpThMarkDupArg.front();
+    if (front->finish)
+    {
+        twist(queueFirstLock, TO, front->seq);
+    } else {
+        release(queueFirstLock);
+    }
+}
+
+/*
+ * 多线程将结果写入文件，写之前需要合并相邻线程的未处理的结果
+ */
+void thread_write(void *)
+{
+    bool more = false;
+    long seq = 0;
+    possess(queueFirstLock);
+    wait_for(queueFirstLock, TO_BE, seq++); // 等待首个任务完成
+    auto lastP = qpThMarkDupArg.front(); // 取队首的数据
+    qpThMarkDupArg.pop(); // 删除队首
+    twist(queueFirstLock, TO, seq);
+    more = lastP->more;
+    while (more) // 循环处理，将结果写入文件
+    {
+        possess(queueFirstLock);
+        if (qpThMarkDupArg.empty()) // 有可能新任务没来得及添加进队列
+        {
+            release(queueFirstLock);
+            continue;
+        }
+        wait_for(queueFirstLock, TO_BE, seq); // 等待任务完成
+        auto p = qpThMarkDupArg.front();
+        if (!p->finish) // 有可能这个任务没有完成，是下边那个twist导致进到这里，因为这一段代码可能运行比较快
+        {
+            twist(queueFirstLock, TO, -1); // 此时队首任务没完成，-1可以让锁无法进入到这里，避免无效获得锁
+            continue;
+        }
+        qpThMarkDupArg.pop();
+        twist(queueFirstLock, TO, seq + 1);
+
+        /* 处理结果数据 */
+        cout << "finish: " <<  seq - 1 << endl;
+
+        /* 准备下一轮循环 */
+        delete lastP;
+        more = p->more;
+        lastP = p;
+        seq++;
+    }
+
+    // 处理最后一个数据
+    cout << "finish: " << seq - 1 << endl;
+    pthread_exit(0);
+}
+
+/*
+ * Builds a read ends object that represents a single read.
+ */
+static void buildReadEnds(BamWrap &bw, int64_t index, ReadEnds *pKey)
+{
+    auto &k = *pKey;
+    auto &bc = bw.b->core;
+    k.read1ReferenceIndex = bc.tid;
+    k.read1Coordinate = (bc.flag & BAM_FREVERSE) ? bw.GetUnclippedEnd() : bw.GetUnclippedStart();
+    k.orientation = (bc.flag & BAM_FREVERSE) ? ReadEnds::R : ReadEnds::F;
+    k.read1IndexInFile = index;
+}
+
+/*
+ * mark duplicate 入口，假定bam是按照比对后的坐标排序的，同一个样本的话不需要考虑barcode的问题
 */
 int MarkDuplicates(int argc, char *argv[])
 {
-    // cout << argc << endl;
-    // for (int i = 0; i < argc; ++i) {
-    //     cout << argv[i] << '\t';
-    // }
-    // cout << endl;
-
+    Timer::log_time("程序开始");
+    Timer time_all;
+    /* 初始化参数 */
    GlobalArg &gArg = GlobalArg::Instance();
    MarkDupsArg mdArg;
    vector<AuxVar> vAuxVar;
-    mdArg.parseArgument(argc, argv, &vAuxVar, &gArg);
+    mdArg.parseArgument(argc, argv, &gArg); // 解析命令行参数

-    // cout << ns_md::ValidationStringency::DEFAULT_STRINGENCY << '\t' << ns_md::ValidationStringency::SILENT << endl;
+    // if (gArg.num_threads > 1) // 多线程处理
+    if (false)
+    {
+        threadpool thpool = thpool_init(gArg.num_threads); // 创建mark dup所需的线程池
+        thread *writeth = launch(thread_write, nullptr);   // 启动处理结果的的线程
+        for (int i = 0; i < 40; ++i)
+        {
+            ThMarkDupArg *thArg = new ThMarkDupArg({nullptr, i, i * 10, i, true, false});
+            if (i == 39)
+                thArg->more = false;
+            possess(queueFirstLock);                                 // 加锁
+            qpThMarkDupArg.push(thArg);                              // 将新任务需要的参数添加到队列
+            release(queueFirstLock);                                 // 解锁
+            thpool_add_work(thpool, thread_markdups, (void *)thArg); // 添加新任务
+        }

+        /* 同步所有线程 */
+        thpool_wait(thpool);
+        thpool_destroy(thpool);
+        join(writeth);
+    } else { // 单线程串行处理
+        /* 打开输入bam文件 */
+        sam_hdr_t *inBamHeader;
+        samFile *inBamFp;
+        inBamFp = sam_open_format(gArg.in_fn.c_str(), "r", nullptr);
+        if (! inBamFp) {
+            Error("[%s] load sam/bam file failed.\n", __func__);
+            return -1;
+        }
+        hts_set_opt(inBamFp, HTS_OPT_BLOCK_SIZE, BAM_BLOCK_SIZE);
+        inBamHeader = sam_hdr_read(inBamFp);
+        htsThreadPool htsPoolRead = {NULL, 0}; // 多线程读取，创建线程池
+        htsThreadPool htsPoolWrite = {NULL, 0};
+        htsPoolRead.pool = hts_tpool_init(gArg.num_threads);
+        htsPoolWrite.pool = hts_tpool_init(gArg.num_threads);
+        if (!htsPoolRead.pool || !htsPoolWrite.pool)
+        {
+            Error("[%d] failed to set up thread pool", __LINE__);
+            return -1;
+        }
+        hts_set_opt(inBamFp, HTS_OPT_THREAD_POOL, &htsPoolRead);
+
+        /* 创建输出文件 */
+        samFile *outBamFp;
+        htsFormat outFormat = {};
+        hts_parse_format(&outFormat, "bam");
+        outBamFp = sam_open_format(gArg.out_fn.c_str(), "wb", &outFormat);
+        hts_set_opt(outBamFp, HTS_OPT_BLOCK_SIZE, BAM_BLOCK_SIZE);
+        hts_set_opt(outBamFp, HTS_OPT_THREAD_POOL, &htsPoolWrite); // 用同样的线程池处理输出文件
+
+        // /* 读取缓存初始化 */
+        BamBufType inBamBuf(gArg.use_asyncio);
+        inBamBuf.Init(inBamFp, inBamHeader, gArg.max_mem);
+
+        /* 循环读入信息，并处理 */
+        while (inBamBuf.ReadStat() >= 0)
+        {
+            int readNum = inBamBuf.ReadBam();
+            cout << readNum << endl;
+            // inBamBuf.ClearAll();
+            // cout << inBamBuf.Size() << endl;
+            inBamBuf.ClearBeforeIdx(inBamBuf.Size());
+            // break;
+            for (int i = 0; i < inBamBuf.Size(); ++i) {
+                if (sam_write1(outBamFp, inBamHeader, inBamBuf[i]->b) < 0)
+                {
+                    Error("failed writing to \"%s\"", gArg.out_fn.c_str());
+                    sam_close(outBamFp);
+                    return -1;
+                }
+            }
+            if (readNum == 0)
+                break;
+        }
+        
+        // int res = -1;
+        // bam1_t *b = bam_init1();
+        // size_t num = 0;
+        // while ((res = sam_read1(inBamFp, inBamHeader, b)) >= 0)
+        // {
+        //     ++num;
+        // }
+        // cout << num << endl;
+
+        /* 为每个read创建ReadEnd信息 */
+
+        /* 标记冗余, 将处理后的结果写入文件 */
+
+        /* 关闭文件，收尾清理 */
+        sam_close(outBamFp);
+        sam_close(inBamFp);
+    }
+
+    // cout << "read ends size: " << sizeof(ReadEnds) << endl;
+
+    cout << "总时间: " << time_all.seconds_elapsed() << endl;
+    Timer::log_time("程序结束");
    return 0;
 }
--- a/src/sam/markdups/markdups_arg.cpp
+++ b/src/sam/markdups/markdups_arg.cpp
@ -8,7 +8,7 @@ Date : 2023/10/27
 */

 #include "markdups_arg.h"
-#include "common/global_arg.h"
+#include "common/utils/global_arg.h"

 #include <cstring>
 #include <vector>
@ -77,10 +77,8 @@ void setBoolArg(bool *arg) {
 // 解析参数
 void MarkDupsArg::parseArgument(int argc,
                                char **argv,
-                                vector<AuxVar> *pvAuxVar,
                                GlobalArg *pGArg) 
 {
-    auto &vAuxVar = *pvAuxVar;
    auto &gArg = *pGArg;

    struct option allOpt[MarkDupsArg::ARG_COUNT + GlobalArg::GLOBAL_ARG_CNT];
@ -258,12 +256,12 @@ void MarkDupsArg::PrintHelp()
            "\n"
            "Required Arguments:\n"
            "\n"
-            "--INPUT,-I <String>           One or more input SAM, BAM or CRAM files to analyze. Must be coordinate sorted.  This\n"
+            "--INPUT <String>              One or more input SAM, BAM or CRAM files to analyze. Must be coordinate sorted.  This\n"
            "                              argument must be specified at least once.Required.\n"
            "\n"
-            "--METRICS_FILE,-M <File>      File to write duplication metrics to  Required.\n"
+            "--METRICS_FILE <File>         File to write duplication metrics to  Required.\n"
            "\n"
-            "--OUTPUT,-O <File>            The output file to write marked records to  Required.\n"
+            "--OUTPUT <File>               The output file to write marked records to  Required.\n"
            "\n"
            "\n"
            "Optional Arguments:\n"
@ -275,13 +273,13 @@ void MarkDupsArg::PrintHelp()
            "--arguments_file <File>       read one or more arguments files and add them to the command line  This argument may be\n"
            "                              specified 0 or more times. Default value: null.\n"
            "\n"
-            "--ASSUME_SORT_ORDER,-ASO <SortOrder>\n"
+            "--ASSUME_SORT_ORDER <SortOrder>\n"
            "                              If not null, assume that the input file has this order even if the header says otherwise.\n"
            "                              Default value: null. Possible values: {unsorted, queryname, coordinate, duplicate,\n"
            "                              unknown}  Cannot be used in conjunction with argument(s) ASSUME_SORTED (AS)\n"
            "\n"
            "\n"
-            "--ASSUME_SORTED,-AS <Boolean> If true, assume that the input file is coordinate sorted even if the header says\n"
+            "--ASSUME_SORTED <Boolean>     If true, assume that the input file is coordinate sorted even if the header says\n"
            "                              otherwise. Deprecated, used ASSUME_SORT_ORDER=coordinate instead.  Default value: false.\n"
            "                              Possible values: {true, false}  Cannot be used in conjunction with argument(s)\n"
            "                              ASSUME_SORT_ORDER (ASO)\n"
@ -291,7 +289,7 @@ void MarkDupsArg::PrintHelp()
            "--CLEAR_DT <Boolean>          Clear DT tag from input SAM records. Should be set to false if input SAM doesn't have this\n"
            "                              tag.  Default true  Default value: true. Possible values: {true, false}\n"
            "\n"
-            "--COMMENT,-CO <String>        Comment(s) to include in the output file's header.  This argument may be specified 0 or\n"
+            "--COMMENT <String>            Comment(s) to include in the output file's header.  This argument may be specified 0 or\n"
            "                              more times. Default value: null.\n"
            "\n"
            "--COMPRESSION_LEVEL <Integer> Compression level for all compressed files created (e.g. BAM and VCF).  Default value: 5.\n"
@ -313,7 +311,7 @@ void MarkDupsArg::PrintHelp()
            "                              BARCODE_TAG hold non-normalized UMIs. Default false.  Default value: false. Possible\n"
            "                              values: {true, false}\n"
            "\n"
-            "--DUPLICATE_SCORING_STRATEGY,-DS <ScoringStrategy>\n"
+            "--DUPLICATE_SCORING_STRATEGY <ScoringStrategy>\n"
            "                              The scoring strategy for choosing the non-duplicate among candidates.  Default value:\n"
            "                              SUM_OF_BASE_QUALITIES. Possible values: {SUM_OF_BASE_QUALITIES,\n"
            "                              TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM}\n"
@ -343,7 +341,7 @@ void MarkDupsArg::PrintHelp()
            "\n"
            "--help,-h <Boolean>           display the help message  Default value: false. Possible values: {true, false}\n"
            "\n"
-            "--MAX_FILE_HANDLES_FOR_READ_ENDS_MAP,-MAX_FILE_HANDLES <Integer>\n"
+            "--MAX_FILE_HANDLES_FOR_READ_ENDS_MAP <Integer>\n"
            "                              Maximum number of file handles to keep open when spilling read ends to disk. Set this\n"
            "                              number a little lower than the per-process maximum number of file that may be open. This\n"
            "                              number can be found by executing the 'ulimit -n' command on a Unix system.  Default value:\n"
@ -360,7 +358,7 @@ void MarkDupsArg::PrintHelp()
            "                              in RAM before spilling to disk. Increasing this number reduces the number of file handles\n"
            "                              needed to sort the file, and increases the amount of RAM needed.  Default value: 500000.\n"
            "\n"
-            "--MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP,-MAX_SEQS <Integer>\n"
+            "--MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP <Integer>\n"
            "                              This option is obsolete. ReadEnds will always be spilled to disk.  Default value: 50000.\n"
            "\n"
            "--MOLECULAR_IDENTIFIER_TAG <String>\n"
@ -374,18 +372,18 @@ void MarkDupsArg::PrintHelp()
            "                              For the patterned flowcell models, 2500 is moreappropriate. For other platforms and\n"
            "                              models, users should experiment to find what works best.  Default value: 100.\n"
            "\n"
-            "--PROGRAM_GROUP_COMMAND_LINE,-PG_COMMAND <String>\n"
+            "--PROGRAM_GROUP_COMMAND_LINE <String>\n"
            "                              Value of CL tag of PG record to be created. If not supplied the command line will be\n"
            "                              detected automatically.  Default value: null.\n"
            "\n"
-            "--PROGRAM_GROUP_NAME,-PG_NAME <String>\n"
+            "--PROGRAM_GROUP_NAME <String>\n"
            "                              Value of PN tag of PG record to be created.  Default value: MarkDuplicates.\n"
            "\n"
-            "--PROGRAM_GROUP_VERSION,-PG_VERSION <String>\n"
+            "--PROGRAM_GROUP_VERSION <String>\n"
            "                              Value of VN tag of PG record to be created. If not specified, the version will be detected\n"
            "                              automatically.  Default value: null.\n"
            "\n"
-            "--PROGRAM_RECORD_ID,-PG <String>\n"
+            "--PROGRAM_RECORD_ID <String>\n"
            "                              The program record ID for the @PG record(s) created by this program. Set to null to\n"
            "                              disable PG record creation.  This string may have a suffix appended to avoid collision\n"
            "                              with other program record IDs.  Default value: MarkDuplicates.\n"
@ -416,7 +414,7 @@ void MarkDupsArg::PrintHelp()
            "--READ_TWO_BARCODE_TAG <String>\n"
            "                              Read two barcode SAM tag (ex. BX for 10X Genomics)  Default value: null.\n"
            "\n"
-            "--REFERENCE_SEQUENCE,-R <File>Reference sequence file.  Default value: null.\n"
+            "--REFERENCE_SEQUENCE <File>   Reference sequence file.  Default value: null.\n"
            "\n"
            "--REMOVE_DUPLICATES <Boolean> If true do not write duplicates to the output file instead of writing them with\n"
            "                              appropriate flags set.  Default value: false. Possible values: {true, false}\n"
@ -460,14 +458,6 @@ void MarkDupsArg::PrintHelp()
            "                              reads to start andend on the same position to be considered duplicate) (for this argument,\n"
            "                              \" read end \" means 3' end).  Default value: false. Possible values: {true, false}\n"
            "\n"
-            "--USE_JDK_DEFLATER,-use_jdk_deflater <Boolean>\n"
-            "                              Use the JDK Deflater instead of the Intel Deflater for writing compressed output  Default\n"
-            "                              value: false. Possible values: {true, false}\n"
-            "\n"
-            "--USE_JDK_INFLATER,-use_jdk_inflater <Boolean>\n"
-            "                              Use the JDK Inflater instead of the Intel Inflater for reading compressed input  Default\n"
-            "                              value: false. Possible values: {true, false}\n"
-            "\n"
            "--USE_UNPAIRED_CLIPPED_END <Boolean>\n"
            "                              Use position of the clipping as the end position, when considering duplicates (or use the\n"
            "                              unclipped end position) (for this argument, \" read end \" means 3' end).  Default value:\n"
--- a/src/sam/markdups/markdups_arg.h
+++ b/src/sam/markdups/markdups_arg.h
@ -301,7 +301,6 @@ struct MarkDupsArg
    // 解析参数
    void parseArgument(int argc,
                       char **argv,
-                       vector<AuxVar> *pvAuxVar,
                       GlobalArg *pGArg);

    static void PrintHelp();