diff --git a/.gitignore b/.gitignore
index 09de0bd..e32c3fb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
+# for fast-markdup
+*.sam
+*.log
 # ---> C++
 # Prerequisites
 *.d
diff --git a/.vscode/launch.json b/.vscode/launch.json
index ce739e7..08f219a 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -13,10 +13,10 @@
             "program": "${workspaceRoot}/build/bin/picard_cpp",
             "args": [
                 "MarkDuplicates",
-                "--INPUT", "/mnt/d/data/100w.bam",
+                "--INPUT", "~/data/bam/100w.bam",
                 "--OUTPUT", "out.bam",
                 "--METRICS_FILE", "metrics.txt",
-                "--num_threads", "12",
+                "--num_threads", "1",
                 "--max_mem", "4G",
                 "--verbosity", "DEBUG",
                 "--asyncio", "true",
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 37e59d1..90de365 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -88,6 +88,13 @@
         "typeindex": "cpp",
         "typeinfo": "cpp",
         "valarray": "cpp",
-        "variant": "cpp"
+        "variant": "cpp",
+        "__split_buffer": "cpp",
+        "executor": "cpp",
+        "io_context": "cpp",
+        "netfwd": "cpp",
+        "timer": "cpp",
+        "__nullptr": "cpp",
+        "__node_handle": "c"
     }
 }
\ No newline at end of file
diff --git a/build.sh b/build.sh
index b4dd51f..a71b197 100755
--- a/build.sh
+++ b/build.sh
@@ -1,8 +1,8 @@
 #!/bin/bash
-dir="/home/zzh/work/GeneKit/picard_cpp/build"
+dir="/home/zzh/work/ngs/picard_cpp/build"
 [ -d "$dir" ] && rm -rf "$dir"
 mkdir "$dir"
 cd "$dir"
 #cmake .. -DCMAKE_BUILD_TYPE=Debug
 cmake .. -DCMAKE_BUILD_TYPE=Release
-make -j 8
+make -j 16
diff --git a/run.sh b/run.sh
index a2fc5c3..46d89c0 100755
--- a/run.sh
+++ b/run.sh
@@ -1,9 +1,14 @@
-time /home/zzh/work/GeneKit/picard_cpp/build/bin/picard_cpp \
+input=~/data/bam/zy_normal.bam
+#input=~/data/bam/zy_tumor.bam
+#input=~/data/bam/100w.bam
+
+time /home/zzh/work/ngs/picard_cpp/build/bin/picard_cpp \
     MarkDuplicates \
-    --INPUT /mnt/d/data/zy_tumor.bam \
-    --OUTPUT /mnt/d/data/out.bam \
+    --INPUT $input \
+    --OUTPUT ~/data/bam/out.bam \
+    --INDEX_FORMAT BAI \
     --num_threads 1 \
-    --max_mem 4G \
+    --max_mem 2G \
     --verbosity DEBUG \
     --asyncio true #\
     #--READ_NAME_REGEX ".*?([0-9]+):([0-9]+):([0-9]+)$"
@@ -12,4 +17,4 @@ time /home/zzh/work/GeneKit/picard_cpp/build/bin/picard_cpp \
 # --INPUT /mnt/d/data/100w.bam \
 #    --INPUT /mnt/d/data/zy_normal.bam \
 # zy_tumor
-# tumor_region
\ No newline at end of file
+# tumor_region
diff --git a/src/common/hts/bam_buf.cpp b/src/common/hts/bam_buf.cpp
index 685cea9..dfe509e 100755
--- a/src/common/hts/bam_buf.cpp
+++ b/src/common/hts/bam_buf.cpp
@@ -13,116 +13,87 @@
  * BamBuf类
  */
 // 读取数据直到读完，或者缓冲区满
-int BamBuf::ReadBam()
-{
+int BamBuf::ReadBam() {
     int read_num = 0;
-    if (handle_last)
-    { // 处理上次读入的最后一个bam
-        if (has_enough_space())
-        { // 必须调用，在边界处调整memffset
+    if (handle_last) {             // 处理上次读入的最后一个bam
+        if (has_enough_space()) {  // 必须调用，在边界处调整memffset
             ++read_num;
             append_one_bam();
-        }
-        else
-        {
-            return read_num; // 还是没空间
+        } else {
+            return read_num;  // 还是没空间
         }
     }
-    while (read_stat_ >= 0 && (read_stat_ = sam_read1(fp, hdr, bw->b)) >= 0)
-    {
+    while (read_stat_ >= 0 && (read_stat_ = sam_read1(fp, hdr, bw->b)) >= 0) {
         bw->end_pos_ = BamWrap::BamEndPos(bw->b);
-        if (has_enough_space())
-        { // 还有空间
+        if (has_enough_space()) {  // 还有空间
             append_one_bam();
-            ++read_num; // 放进缓存才算读取到
-        }
-        else
-        {
+            ++read_num;  // 放进缓存才算读取到
+        } else {
             break;
         }
     }
-    if (read_stat_ >= 0)
-    {
+    if (read_stat_ >= 0) {
         handle_last = true;
-    }
-    else
-    {
+    } else {
         handle_last = false;
     }
     return read_num;
 }
 
 // 初始化缓存
-void BamBuf::Init(samFile *fp,
-                  sam_hdr_t *hdr,
-                  int64_t mem_size)
-{
+void BamBuf::Init(samFile *fp, sam_hdr_t *hdr, int64_t mem_size) {
     this->fp = fp;
     this->hdr = hdr;
     this->mem_size = mem_size;
     this->mem = (uint8_t *)malloc(mem_size);
     this->bw = (BamWrap *)malloc(sizeof(BamWrap));
     this->bw->b = bam_init1();
-    if (bw == NULL ||
-        this->mem == NULL ||
-        this->bw->b == NULL)
-    {
+    if (bw == NULL || this->mem == NULL || this->bw->b == NULL) {
         fprintf(stderr, "allocate memory failed! Abort\n");
         exit(-1);
     }
 }
 
-void BamBuf::ClearBeforeIdx(size_t idxInBv)
-{
+void BamBuf::ClearBeforeIdx(size_t idxInBv) {
     if (idxInBv < 1)
         return;
     int i = 0, j = idxInBv;
-    for (; j < bv.size(); ++i, ++j)
-    {
+    for (; j < bv.size(); ++i, ++j) {
         bv[i] = bv[j];
     }
     bv.resize(i);
     prepare_read();
 }
 
-void BamBuf::ClearAll()
-{
-
+void BamBuf::ClearAll() {
     bv.clear();
     prepare_read();
 }
 
 // 为下一次读取做准备, 计算一些边界条件
-inline void BamBuf::prepare_read()
-{
+inline void BamBuf::prepare_read() {
     // 计算余留的下次计算可能用到的bam所占的位置
-    if (bv.size() > 0)
-    {
+    if (bv.size() > 0) {
         BamWrap *bw = bv[0];
         legacy_start = (int64_t)bw - (int64_t)mem;
         bw = bv.back();
         legacy_end = (int64_t)bw + bw->length() - (int64_t)mem;
-    }
-    else
-    {
+    } else {
         legacy_start = legacy_end = 0;
-        mem_offset = 0; // 上次没剩下，那就从头存储
+        mem_offset = 0;  // 上次没剩下，那就从头存储
     }
 }
 
 // 检查缓存是否还有空间
-inline bool BamBuf::has_enough_space()
-{
+inline bool BamBuf::has_enough_space() {
     const uint32_t bam_len = bw->length();
     int64_t potential_end = mem_offset + bam_len;
     if (legacy_start <= legacy_end)
         legacy_start += mem_size;
-    if (potential_end >= legacy_start)
-    {
+    if (potential_end >= legacy_start) {
         return false;
     }
-    if (potential_end >= mem_size)
-    {
+    if (potential_end >= mem_size) {
         mem_offset = 0;
     }
     int64_t virtual_offset = mem_offset;
@@ -133,8 +104,7 @@ inline bool BamBuf::has_enough_space()
 }
 
 // 处理一个读取后的bam
-inline void BamBuf::append_one_bam()
-{
+inline void BamBuf::append_one_bam() {
     BamWrap *bwp = (BamWrap *)(mem + mem_offset);
     *bwp = *bw;
     bwp->b = (bam1_t *)((char *)bwp + sizeof(*bwp));
@@ -148,12 +118,9 @@ inline void BamBuf::append_one_bam()
 }
 
 // 处理上次读入的最后一个read
-inline bool BamBuf::handle_last_read()
-{
-    if (handle_last)
-    { // 处理上次读入的最后一个bam
-        if (has_enough_space())
-        { // 必须调用，在边界处调整memffset
+inline bool BamBuf::handle_last_read() {
+    if (handle_last) {             // 处理上次读入的最后一个bam
+        if (has_enough_space()) {  // 必须调用，在边界处调整memffset
             append_one_bam();
             handle_last = false;
             return true;
@@ -166,49 +133,35 @@ inline bool BamBuf::handle_last_read()
  * AsyncIoBamBuf 类
  */
 // 初始化缓存
-void AsyncIoBamBuf::Init(samFile *fp,
-                         sam_hdr_t *hdr,
-                         int64_t mem_size)
-{
-    if (use_async_io_)
-    {
+void AsyncIoBamBuf::Init(samFile *fp, sam_hdr_t *hdr, int64_t mem_size) {
+    if (use_async_io_) {
         buf1_.Init(fp, hdr, mem_size >> 1);
         buf2_.Init(fp, hdr, mem_size >> 1);
         pi_ = &buf1_;
         po_ = &buf2_;
         tid_ = (pthread_t *)malloc(sizeof(pthread_t));
-    }
-    else
-    {
+    } else {
         buf1_.Init(fp, hdr, mem_size);
         pi_ = &buf1_;
     }
 }
 
 // 读取数据
-int AsyncIoBamBuf::ReadBam()
-{
-    if (use_async_io_)
-    {
+int AsyncIoBamBuf::ReadBam() {
+    if (use_async_io_) {
         hasThread = true;
         return async_read_bam();
-    }
-    else
-    {
+    } else {
         return sync_read_bam();
     }
 }
 
-int AsyncIoBamBuf::sync_read_bam()
-{
+int AsyncIoBamBuf::sync_read_bam() {
     int read_num = 0;
-    if (clear_all_)
-    {
+    if (clear_all_) {
         clear_all_ = false;
         pi_->ClearAll();
-    }
-    else if (clear_before_idx_ > 0) 
-    {
+    } else if (clear_before_idx_ > 0) {
         pi_->ClearBeforeIdx(clear_before_idx_);
         clear_before_idx_ = 0;
     }
@@ -217,23 +170,18 @@ int AsyncIoBamBuf::sync_read_bam()
     return read_num;
 }
 
-int AsyncIoBamBuf::async_read_bam()
-{
+int AsyncIoBamBuf::async_read_bam() {
     int read_num = 0;
-    if (first_read_)
-    {
+    if (first_read_) {
         read_num = pi_->ReadBam();
         first_read_ = false;
         refresh_bam_arr();
-    }
-    else
-    {
+    } else {
         // join, 交换缓冲区指针
         pthread_join(*tid_, 0);
         resize_buf();
 
-        if (need_read_)
-        { // 需要交换指针
+        if (need_read_) {  // 需要交换指针
             BamBuf *tmp = pi_;
             pi_ = po_;
             po_ = tmp;
@@ -246,72 +194,52 @@ int AsyncIoBamBuf::async_read_bam()
     return read_num;
 }
 
-void *AsyncIoBamBuf::async_read(void *data)
-{
+void *AsyncIoBamBuf::async_read(void *data) {
     AsyncIoBamBuf *ab = (AsyncIoBamBuf *)data;
-    if (ab->need_read_ && ab->ReadStat() >= 0)
-    { // 需要读取
+    if (ab->need_read_ && ab->ReadStat() >= 0) {  // 需要读取
         ab->last_read_num_ = ab->po_->ReadBam();
-    }
-    else
-    {
+    } else {
         ab->last_read_num_ = 0;
     }
     pthread_exit(0);
 }
 
-// 为下一次读取做准备, 计算一些边界条件，延迟操作，因为此时可能po_对应的buf正在读取
-void AsyncIoBamBuf::ClearBeforeIdx(size_t idxInBv)
-{
+// 为下一次读取做准备,
+// 计算一些边界条件，延迟操作，因为此时可能po_对应的buf正在读取
+void AsyncIoBamBuf::ClearBeforeIdx(size_t idxInBv) {
     clear_before_idx_ = idxInBv;
 }
 
 // 清空上一次所有读入的数据，延迟操作，因为此时可能po_对应的buf正在读取
-void AsyncIoBamBuf::ClearAll()
-{
-    clear_all_ = true;
-}
+void AsyncIoBamBuf::ClearAll() { clear_all_ = true; }
 
-inline void AsyncIoBamBuf::resize_buf()
-{
-    if (clear_all_)
-    { // 清理上一轮的数据
+inline void AsyncIoBamBuf::resize_buf() {
+    if (clear_all_) {  // 清理上一轮的数据
         clear_all_ = false;
         po_->ClearBeforeIdx(legacy_size_);
         pi_->ClearAll();
-        if (pi_->handle_last_read()) // 上次读取有一个read没放入缓存
-        {
+        if (pi_->handle_last_read()) { // 上次读取有一个read没放入缓存
             last_read_num_ += 1;
-            legacy_size_ = pi_->Size(); // 应该只有一个read
+            legacy_size_ = pi_->Size();  // 应该只有一个read
             need_read_ = true;
-        }
-        else // 没空间存放，则不交换指针，或者文件已经读取完毕
-        {
+        } else { // 没空间存放，则不交换指针，或者文件已经读取完毕
             legacy_size_ = 0;
             need_read_ = false;
         }
-    }
-    else if (clear_before_idx_ > 0)
-    {
-        if (clear_before_idx_ < legacy_size_)
-        {
+    } else if (clear_before_idx_ > 0) {
+        if (clear_before_idx_ < legacy_size_) {
             po_->ClearBeforeIdx(clear_before_idx_);
             legacy_size_ -= clear_before_idx_;
             // 不需要交换指针，不需要读取
             need_read_ = false;
-        }
-        else
-        {
+        } else {
             po_->ClearBeforeIdx(legacy_size_);
             pi_->ClearBeforeIdx(clear_before_idx_ - legacy_size_);
-            if (pi_->handle_last_read()) // 上次读取有一个read没放入缓存
-            {
+            if (pi_->handle_last_read()) {// 上次读取有一个read没放入缓存
                 last_read_num_ += 1;
-                legacy_size_ = pi_->Size(); // 应该只有一个read
+                legacy_size_ = pi_->Size();  // 应该只有一个read
                 need_read_ = true;
-            }
-            else // 没空间存放，则不交换指针，或者文件已经读取完毕
-            {
+            } else { // 没空间存放，则不交换指针，或者文件已经读取完毕
                 legacy_size_ = 0;
                 need_read_ = false;
             }
diff --git a/src/common/hts/bam_buf.h b/src/common/hts/bam_buf.h
index 61c0934..5ea0c99 100644
--- a/src/common/hts/bam_buf.h
+++ b/src/common/hts/bam_buf.h
@@ -10,19 +10,18 @@
 #ifndef BAM_BUF_H_
 #define BAM_BUF_H_
 
-#include <vector>
-#include <stdint.h>
+#include <htslib/sam.h>
+#include <pthread.h>
 #include <stddef.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <string.h>
-#include <functional>
-#include <pthread.h>
 
+#include <fstream>
+#include <functional>
 #include <iostream>
 #include <sstream>
-#include <fstream>
-
-#include <htslib/sam.h>
+#include <vector>
 
 #include "bam_wrap.h"
 
@@ -32,24 +31,22 @@ using namespace std;
 /*
  * 存放读入的bam数据
  */
-struct BamBuf
-{
-    sam_hdr_t *hdr;           // sam文件的header信息
-    samFile *fp;              // sam文件指针
-    BamWrap *bw = nullptr;    // 用来循环读入bam
-    uint8_t *mem = nullptr;   // 用来存放bam的数据, 程序结束后自动释放，所以没在析构函数里释放
-    int64_t mem_offset = 0;   // 下一次要存放的位置
-    int64_t mem_size;         // 缓存大小
-    int read_stat_ = 0;       // 读取状态，是否读完
-    vector<BamWrap *> bv;     // 方便对bam数据的访问
-    int64_t legacy_start = 0; // 没处理完的bam在mem中的起始位置, 闭区间
-    int64_t legacy_end = 0;   // 没处理完的bam在mem中的结束位置, 开区间
-    bool handle_last = false; // 上次最后读入的bam是否需要处理
+struct BamBuf {
+    sam_hdr_t *hdr;          // sam文件的header信息
+    samFile *fp;             // sam文件指针
+    BamWrap *bw = nullptr;   // 用来循环读入bam
+    uint8_t *mem = nullptr;  // 用来存放bam的数据,
+                             // 程序结束后自动释放，所以没在析构函数里释放
+    int64_t mem_offset = 0;  // 下一次要存放的位置
+    int64_t mem_size;        // 缓存大小
+    int read_stat_ = 0;      // 读取状态，是否读完
+    vector<BamWrap *> bv;    // 方便对bam数据的访问
+    int64_t legacy_start = 0;  // 没处理完的bam在mem中的起始位置, 闭区间
+    int64_t legacy_end = 0;  // 没处理完的bam在mem中的结束位置, 开区间
+    bool handle_last = false;  // 上次最后读入的bam是否需要处理
 
     // 初始化缓存
-    void Init(samFile *fp,
-              sam_hdr_t *hdr,
-              int64_t mem_size);
+    void Init(samFile *fp, sam_hdr_t *hdr, int64_t mem_size);
     // 读取数据直到读完，或者缓冲区满
     int ReadBam();
     // 为下一次读取做准备, 计算一些边界条件
@@ -57,27 +54,24 @@ struct BamBuf
     // 清空上一次所有读入的数据
     void ClearAll();
     inline int64_t Size() { return bv.size(); }  // 包含多少个read
-    inline int ReadStat() { return read_stat_; } // 文件的读取状态，是否可读（读取完全）
-    ~BamBuf()
-    {
-        if (this->mem != nullptr)
-        {
+    inline int ReadStat() {
+        return read_stat_;
+    }  // 文件的读取状态，是否可读（读取完全）
+    ~BamBuf() {
+        if (this->mem != nullptr) {
             free(this->mem);
         }
-        if (this->bw != nullptr) 
-        {
+        if (this->bw != nullptr) {
             bam_destroy1(bw->b);
             free(bw);
         }
     }
-    void FreeMemory() // 释放开辟的内存
+    void FreeMemory()  // 释放开辟的内存
     {
-        if (this->mem != nullptr)
-        {
+        if (this->mem != nullptr) {
             free(this->mem);
         }
-        if (this->bw != nullptr)
-        {
+        if (this->bw != nullptr) {
             bam_destroy1(bw->b);
             free(bw);
         }
@@ -102,31 +96,31 @@ struct BamBuf
 /*
  * io异步缓冲区
  */
-struct AsyncIoBamBuf
-{
+struct AsyncIoBamBuf {
     BamBuf buf1_;
     BamBuf buf2_;
-    BamBuf *pi_; // 当前用的buf
-    BamBuf *po_; // 后台在读取的buf
+    BamBuf *pi_;  // 当前用的buf
+    BamBuf *po_;  // 后台在读取的buf
     pthread_t *tid_ = NULL;
     bool hasThread = false;
-    int64_t legacy_size_ = 0; // 上一轮运算之后，缓存中还剩余的上次读取的read数量
+    int64_t legacy_size_ =
+        0;  // 上一轮运算之后，缓存中还剩余的上次读取的read数量
     bool first_read_ = true;
-    int last_read_num_ = 0; // 上一次读取了多少reads
+    int last_read_num_ = 0;  // 上一次读取了多少reads
     bool need_read_ = true;
     bool use_async_io_ = true;
-    int64_t clear_before_idx_ = 0; // 用户异步读取，下一轮读取之前清理掉clear_before_idx_之前的所有reads
-    bool clear_all_ = false;       // 用于异步读取，下一轮读取之前清理掉之前的所有reads
+    int64_t clear_before_idx_ =
+        0;  // 用户异步读取，下一轮读取之前清理掉clear_before_idx_之前的所有reads
+    bool clear_all_ =
+        false;  // 用于异步读取，下一轮读取之前清理掉之前的所有reads
 
-    vector<BamWrap *> bam_arr_; // 用来访问buf中的bam
+    vector<BamWrap *> bam_arr_;  // 用来访问buf中的bam
 
     AsyncIoBamBuf() {}
     AsyncIoBamBuf(bool use_async) : use_async_io_(use_async) {}
     // 析构
-    ~AsyncIoBamBuf()
-    {
-        if (tid_ != NULL)
-        {
+    ~AsyncIoBamBuf() {
+        if (tid_ != NULL) {
             if (hasThread)
                 pthread_join(*tid_, 0);
             free(tid_);
@@ -136,35 +130,29 @@ struct AsyncIoBamBuf
     }
 
     // 初始化缓存
-    void Init(samFile *fp,
-              sam_hdr_t *hdr,
-              int64_t mem_size);
+    void Init(samFile *fp, sam_hdr_t *hdr, int64_t mem_size);
 
     // 读取数据
     int ReadBam();
     // 为下一次读取做准备, 计算一些边界条件
     void ClearBeforeIdx(size_t idxInBv);
-    vector<BamWrap *> &GetBamArr() { return bam_arr_; } // 获取bam array
+    vector<BamWrap *> &GetBamArr() { return bam_arr_; }  // 获取bam array
     // 清空上一次所有读入的数据
     void ClearAll();
     // 包含的read数量
     inline int64_t Size() { return legacy_size_ + pi_->Size(); }
     inline int ReadStat() { return pi_->read_stat_; }
-    inline BamWrap *operator[](int64_t pos)
-    {
-        return bam_arr_[pos];
-    }
+    inline BamWrap *operator[](int64_t pos) { return bam_arr_[pos]; }
     // 获取某一段reads
-    inline vector<BamWrap*> Slice(size_t startIdx, size_t endIdx)
-    {
+    inline vector<BamWrap *> Slice(size_t startIdx, size_t endIdx) {
         if (endIdx > startIdx) {
             auto begItr = bam_arr_.begin();
-            return std::move(vector<BamWrap *>(begItr + startIdx, begItr + endIdx));
+            return std::move(
+                vector<BamWrap *>(begItr + startIdx, begItr + endIdx));
         }
         return std::move(vector<BamWrap *>());
     }
-    void FreeMemory()
-    {
+    void FreeMemory() {
         buf1_.FreeMemory();
         buf2_.FreeMemory();
     }
@@ -176,11 +164,9 @@ struct AsyncIoBamBuf
     // 异步读取线程函数
     static void *async_read(void *data);
     void resize_buf();
-    inline void refresh_bam_arr()
-    {
+    inline void refresh_bam_arr() {
         bam_arr_.resize(this->Size());
-        for (int i = 0; i < bam_arr_.size(); ++i)
-        {
+        for (int i = 0; i < bam_arr_.size(); ++i) {
             if (i < legacy_size_)
                 bam_arr_[i] = (*po_)[i];
             else
diff --git a/src/common/hts/bam_wrap.h b/src/common/hts/bam_wrap.h
index d338256..1562514 100644
--- a/src/common/hts/bam_wrap.h
+++ b/src/common/hts/bam_wrap.h
@@ -9,15 +9,14 @@
 
 #ifndef BAM_WRAP_H_
 #define BAM_WRAP_H_
-#include <map>
-#include <string>
-#include <vector>
-#include <sstream>
-
+#include <htslib/sam.h>
 #include <limits.h>
 #include <math.h>
 
-#include <htslib/sam.h>
+#include <map>
+#include <sstream>
+#include <string>
+#include <vector>
 
 using namespace std;
 
@@ -28,52 +27,32 @@ using namespace std;
 /*
  * sam read的封装
  */
-struct BamWrap
-{
+struct BamWrap {
     // 将contig左移后加上pos作为全局位置
-    const static int MAX_CONTIG_LEN_SHIFT = 30;
+    const static int MAX_CONTIG_LEN_SHIFT = 40;  // 将染色体id左移多少位，和位点拼合在一起
     const static int READ_MAX_LENGTH = 200;
-    const static int READ_MAX_DEPTH = 1000; // 这只是用来初始化空间用的，深度大于这个值也没关系
+    const static int READ_MAX_DEPTH = 1000;  // 这只是用来初始化空间用的，深度大于这个值也没关系
 
     // 成员变量尽量少，减少占用内存空间
     bam1_t *b;
-    int64_t end_pos_; // bam的全局结束位置, 闭区间
+    int64_t end_pos_;  // bam的全局结束位置, 闭区间
 
     // 全局开始位置
-    inline int64_t start_pos()
-    {
-        return bam_global_pos(b);
-    }
+    inline int64_t start_pos() { return bam_global_pos(b); }
     // 全局结束位置
-    inline int64_t end_pos()
-    {
-        return end_pos_;
-    }
+    inline int64_t end_pos() { return end_pos_; }
     // 和reference对应的序列长度
-    inline int16_t read_len()
-    {
-        return (end_pos_ - start_pos() + 1);
-    }
+    inline int16_t read_len() { return (end_pos_ - start_pos() + 1); }
 
     // 在contig内的开始位置
-    inline int32_t contig_pos()
-    {
-        return b->core.pos;
-    }
+    inline int32_t contig_pos() { return b->core.pos; }
     // 在contig内部的结束位置
-    inline int32_t contig_end_pos()
-    {
-        return bam_pos(end_pos_);
-    }
+    inline int32_t contig_end_pos() { return bam_pos(end_pos_); }
     // 序列的长度（AGTC字母个数）
-    inline int16_t seq_len()
-    {
-        return b->core.l_qseq;
-    }
+    inline int16_t seq_len() { return b->core.l_qseq; }
 
     // 算上开头的softclip
-    inline int32_t softclip_start()
-    {
+    inline int32_t softclip_start() {
         const uint32_t *cigar = bam_get_cigar(b);
         const bam1_core_t &bc = b->core;
         const char c = bam_cigar_opchr(cigar[0]);
@@ -84,8 +63,7 @@ struct BamWrap
     }
 
     // 算上结尾的softclip
-    inline int32_t softclip_end()
-    {
+    inline int32_t softclip_end() {
         const uint32_t *cigar = bam_get_cigar(b);
         const bam1_core_t &bc = b->core;
         const char c = bam_cigar_opchr(cigar[bc.n_cigar - 1]);
@@ -96,8 +74,7 @@ struct BamWrap
     }
 
     // 算上结尾的softclip
-    inline int32_t right_softclip_len()
-    {
+    inline int32_t right_softclip_len() {
         const uint32_t *cigar = bam_get_cigar(b);
         const bam1_core_t &bc = b->core;
         const char c = bam_cigar_opchr(cigar[bc.n_cigar - 1]);
@@ -108,14 +85,12 @@ struct BamWrap
     }
 
     // 获取序列
-    inline std::string sequence()
-    {
+    inline std::string sequence() {
         ostringstream oss;
         char *seq = (char *)bam_get_seq(b);
         const bam1_core_t &bc = b->core;
         const char base_to_char[16] = {'N', 'A', 'C', 'N', 'G', 'N', 'N', 'N', 'T', 'N', 'N', 'N', 'N', 'N', 'N', 'N'};
-        for (int i = 0; i < bc.l_qseq; ++i)
-        {
+        for (int i = 0; i < bc.l_qseq; ++i) {
             char base = base_to_char[bam_seqi(seq, i)];
             oss << base;
         }
@@ -123,18 +98,13 @@ struct BamWrap
     }
 
     // 获取名字
-    inline const char *query_name()
-    {
-        return bam_get_qname(b);
-    }
+    inline const char *query_name() { return bam_get_qname(b); }
     // 获取cigar 字符串
-    inline string cigar_str()
-    {
+    inline string cigar_str() {
         ostringstream oss;
         const uint32_t *cigar = bam_get_cigar(b);
         const bam1_core_t &bc = b->core;
-        for (int i = 0; i < bc.n_cigar; ++i)
-        {
+        for (int i = 0; i < bc.n_cigar; ++i) {
             const char c = bam_cigar_opchr(cigar[i]);
             const int len = bam_cigar_oplen(cigar[i]);
             oss << len << c;
@@ -143,21 +113,14 @@ struct BamWrap
     }
 
     // 占用的内存大小
-    inline int16_t length()
-    {
-        return sizeof(*this) +
-               sizeof(bam1_t) +
-               b->l_data;
-    }
+    inline int16_t length() { return sizeof(*this) + sizeof(bam1_t) + b->l_data; }
 
     // 获取cigar中insert的总长度
-    inline int32_t insert_cigar_len()
-    {
+    inline int32_t insert_cigar_len() {
         const uint32_t *cigar = bam_get_cigar(b);
         const bam1_core_t &bc = b->core;
         int ret = 0;
-        for (int i = 0; i < bc.n_cigar; ++i)
-        {
+        for (int i = 0; i < bc.n_cigar; ++i) {
             const char c = bam_cigar_opchr(cigar[i]);
             const int len = bam_cigar_oplen(cigar[i]);
             if (c == 'I')
@@ -167,13 +130,11 @@ struct BamWrap
     }
 
     // 获取cigar中delete的总长度
-    inline int32_t del_cigar_len()
-    {
+    inline int32_t del_cigar_len() {
         const uint32_t *cigar = bam_get_cigar(b);
         const bam1_core_t &bc = b->core;
         int ret = 0;
-        for (int i = 0; i < bc.n_cigar; ++i)
-        {
+        for (int i = 0; i < bc.n_cigar; ++i) {
             const char c = bam_cigar_opchr(cigar[i]);
             const int len = bam_cigar_oplen(cigar[i]);
             if (c == 'D')
@@ -183,13 +144,11 @@ struct BamWrap
     }
 
     // 计算sam read的终点位置
-    static inline int64_t BamEndPos(const bam1_t *b)
-    {
+    static inline int64_t BamEndPos(const bam1_t *b) {
         const uint32_t *cigar = bam_get_cigar(b);
         const bam1_core_t &bc = b->core;
         int start_offset = -1;
-        for (int i = 0; i < bc.n_cigar; ++i)
-        {
+        for (int i = 0; i < bc.n_cigar; ++i) {
             const char c = bam_cigar_opchr(cigar[i]);
             const int len = bam_cigar_oplen(cigar[i]);
             if (c == 'D' || c == 'N' || c == 'M' || c == '=' || c == 'X')
@@ -198,31 +157,22 @@ struct BamWrap
         return (((int64_t)b->core.tid << MAX_CONTIG_LEN_SHIFT) | (int64_t)(b->core.pos + start_offset));
     };
 
-    bool HasWellDefinedFragmentSize()
-    {
+    bool HasWellDefinedFragmentSize() {
         const bam1_core_t &bc = b->core;
         bool hasWellDefinedFragmentSize = true;
-        if (bc.isize == 0 ||
-            !(bc.flag & BAM_FPAIRED) ||
-            ((bc.flag & BAM_FUNMAP) || (bc.flag & BAM_FMUNMAP)) ||
-            ((bool)(bc.flag & BAM_FREVERSE) == (bool)(bc.flag & BAM_FMREVERSE)))
-        {
+        if (bc.isize == 0 || !(bc.flag & BAM_FPAIRED) || ((bc.flag & BAM_FUNMAP) || (bc.flag & BAM_FMUNMAP)) ||
+            ((bool)(bc.flag & BAM_FREVERSE) == (bool)(bc.flag & BAM_FMREVERSE))) {
             hasWellDefinedFragmentSize = false;
-        }
-        else if (bc.flag & BAM_FREVERSE)
-        {
+        } else if (bc.flag & BAM_FREVERSE) {
             hasWellDefinedFragmentSize = contig_end_pos() > bc.mpos ? true : false;
-        }
-        else
-        {
+        } else {
             hasWellDefinedFragmentSize = bc.pos <= bc.mpos + bc.isize ? true : false;
         }
         return hasWellDefinedFragmentSize;
     }
 
     // 计算bam的adapterBoundary
-    int GetAdapterBoundary()
-    {
+    int GetAdapterBoundary() {
         const bam1_core_t &bc = b->core;
         int adapterBoundary;
         if (!HasWellDefinedFragmentSize())
@@ -230,39 +180,34 @@ struct BamWrap
         else if (bc.flag & BAM_FREVERSE)
             adapterBoundary = bc.mpos - 1;
         else
-            adapterBoundary = bc.pos + abs(bc.isize); // GATK4.0 和 GATK3.5不一样，3.5的这里+1
+            adapterBoundary = bc.pos + abs(bc.isize);  // GATK4.0 和 GATK3.5不一样，3.5的这里+1
         return adapterBoundary;
     }
 
     // 获取开头的I的长度
-    inline int GetHeadInsertLen()
-    {
+    inline int GetHeadInsertLen() {
         int insLen = 0;
         const uint32_t *cigar = bam_get_cigar(b);
         const bam1_core_t &bc = b->core;
-        for (int i = 0; i < bc.n_cigar; ++i)
-        {
+        for (int i = 0; i < bc.n_cigar; ++i) {
             const char c = bam_cigar_opchr(cigar[i]);
             const int len = bam_cigar_oplen(cigar[i]);
-            if (c == 'I')
-            {
+            if (c == 'I') {
                 insLen = len;
                 break;
-            }
-            else if (c != 'H' && c != 'S')
+            } else if (c != 'H' && c != 'S')
                 break;
         }
         return insLen;
     }
 
-    // 获取soft clip开始位置(能处理H和S相连的情况，有这种情况么？, 注意开头的I要当做S？)
-    inline int64_t GetSoftStart()
-    {
+    // 获取soft clip开始位置(能处理H和S相连的情况，有这种情况么？,
+    // 注意开头的I要当做S？)
+    inline int64_t GetSoftStart() {
         int64_t softStart = b->core.pos;
         const uint32_t *cigar = bam_get_cigar(b);
         const bam1_core_t &bc = b->core;
-        for (int i = 0; i < bc.n_cigar; ++i)
-        {
+        for (int i = 0; i < bc.n_cigar; ++i) {
             const char c = bam_cigar_opchr(cigar[i]);
             const int len = bam_cigar_oplen(cigar[i]);
             if (c == 'S' || c == 'I')
@@ -274,13 +219,11 @@ struct BamWrap
     }
 
     // 获取unclipped开始位置(包括hardclip)
-    inline int64_t GetUnclippedStart()
-    {
+    inline int64_t GetUnclippedStart() {
         int64_t start = b->core.pos;
         const uint32_t *cigar = bam_get_cigar(b);
         const bam1_core_t &bc = b->core;
-        for (int i = 0; i < bc.n_cigar; ++i)
-        {
+        for (int i = 0; i < bc.n_cigar; ++i) {
             const char c = bam_cigar_opchr(cigar[i]);
             const int len = bam_cigar_oplen(cigar[i]);
             if (c == 'S' || c == 'H')
@@ -292,13 +235,11 @@ struct BamWrap
     }
 
     // 获取unclipped结束位置(包括hardclip)
-    inline int64_t GetUnclippedEnd()
-    {
+    inline int64_t GetUnclippedEnd() {
         int64_t end_pos = bam_endpos(b);
         const uint32_t *cigar = bam_get_cigar(b);
         const bam1_core_t &bc = b->core;
-        for (int i = bc.n_cigar - 1; i >= 0; --i)
-        {
+        for (int i = bc.n_cigar - 1; i >= 0; --i) {
             const char c = bam_cigar_opchr(cigar[i]);
             const int len = bam_cigar_oplen(cigar[i]);
             if (c == 'S' || c == 'H')
@@ -311,12 +252,10 @@ struct BamWrap
 
     /* 获取碱基质量分数的加和 */
     /** Calculates a score for the read which is the sum of scores over Q15. */
-    inline int GetSumOfBaseQualities()
-    {
+    inline int GetSumOfBaseQualities() {
         int score = 0;
         uint8_t *qual = bam_get_qual(b);
-        for (int i = 0; i < b->core.l_qseq; ++i)
-        {
+        for (int i = 0; i < b->core.l_qseq; ++i) {
             if (qual[i] >= 15)
                 score += qual[i];
         }
@@ -327,93 +266,63 @@ struct BamWrap
     /* 与flag相关的检测 */
 
     /* 没有比对上 unmapped */
-    inline bool GetReadUnmappedFlag() 
-    {
-        return b->core.flag & BAM_FUNMAP;
-    }
+    inline bool GetReadUnmappedFlag() { return b->core.flag & BAM_FUNMAP; }
 
     /* Template having multiple segments in sequencing  */
-    inline bool GetReadPairedFlag()
-    {
-        return b->core.flag & BAM_FPAIRED;
-    }
+    inline bool GetReadPairedFlag() { return b->core.flag & BAM_FPAIRED; }
 
     /**
      * the read fails platform/vendor quality checks.
      */
-    inline bool GetReadFailsVendorQualityCheckFlag()
-    {
-        return b->core.flag & BAM_FQCFAIL;
-    }
+    inline bool GetReadFailsVendorQualityCheckFlag() { return b->core.flag & BAM_FQCFAIL; }
 
     /**
      * the mate is unmapped.
      */
-    bool GetMateUnmappedFlag()
-    {
-        return b->core.flag & BAM_FMUNMAP;
-    }
+    bool GetMateUnmappedFlag() { return b->core.flag & BAM_FMUNMAP; }
 
     /**
-     * @return whether the alignment is secondary (an alternative alignment of the read).
+     * @return whether the alignment is secondary (an alternative alignment of
+     * the read).
      */
-    bool IsSecondaryAlignment()
-    {
-        return b->core.flag & BAM_FSECONDARY;
-    }
+    bool IsSecondaryAlignment() { return b->core.flag & BAM_FSECONDARY; }
 
     /**
-     * @return whether the alignment is supplementary (a split alignment such as a chimeric alignment).
+     * @return whether the alignment is supplementary (a split alignment such as
+     * a chimeric alignment).
      */
-    bool GetSupplementaryAlignmentFlag()
-    {
-        return b->core.flag & BAM_FSUPPLEMENTARY;
-    }
+    bool GetSupplementaryAlignmentFlag() { return b->core.flag & BAM_FSUPPLEMENTARY; }
 
     /*
-     * Tests if this record is either a secondary and/or supplementary alignment;
+     * Tests if this record is either a secondary and/or supplementary
+     * alignment;
      */
-    bool IsSecondaryOrSupplementary()
-    {
-        return IsSecondaryAlignment() || GetSupplementaryAlignmentFlag();
-    }
+    bool IsSecondaryOrSupplementary() { return IsSecondaryAlignment() || GetSupplementaryAlignmentFlag(); }
 
     /**
      * the read is the first read in a pair.
      */
-    bool GetFirstOfPairFlag()
-    {
-        return b->core.flag & BAM_FREAD1;
-    }
+    bool GetFirstOfPairFlag() { return b->core.flag & BAM_FREAD1; }
 
     /**
      * strand of the query (false for forward; true for reverse strand).
      */
-    bool GetReadNegativeStrandFlag()
-    {
-        return b->core.flag & BAM_FREVERSE;
-    }
+    bool GetReadNegativeStrandFlag() { return b->core.flag & BAM_FREVERSE; }
 
     /**
      * strand of the mate (false for forward; true for reverse strand).
      */
-    bool GetMateNegativeStrandFlag()
-    {
-        return b->core.flag & BAM_FMREVERSE;
-    }
+    bool GetMateNegativeStrandFlag() { return b->core.flag & BAM_FMREVERSE; }
 
     /* 其他的一些信息 */
-    inline int GetReferenceLength()
-    {
+    inline int GetReferenceLength() {
         int length = 0;
         const uint32_t *cigar = bam_get_cigar(b);
         const bam1_core_t &bc = b->core;
-        for (int i = 0; i < bc.n_cigar; ++i)
-        {
+        for (int i = 0; i < bc.n_cigar; ++i) {
             const char c = bam_cigar_opchr(cigar[i]);
             const int len = bam_cigar_oplen(cigar[i]);
-            switch (c)
-            {
+            switch (c) {
             case 'M':
             case 'D':
             case 'N':
@@ -429,24 +338,20 @@ struct BamWrap
     }
 
     // 计算bam的全局位置，算上染色体序号和比对位置
-    static inline int64_t bam_global_pos(bam1_t *b)
-    {
+    static inline int64_t bam_global_pos(bam1_t *b) {
         return (((int64_t)b->core.tid << MAX_CONTIG_LEN_SHIFT) | (int64_t)b->core.pos);
     }
-    static inline int64_t bam_global_pos(int tid, int pos)
-    {
+    static inline int64_t bam_global_pos(int tid, int pos) {
         return (((int64_t)tid << MAX_CONTIG_LEN_SHIFT) | (int64_t)pos);
     }
     // 根据全局位置获取bam的染色体序号
-    static inline int32_t bam_tid(int64_t global_pos)
-    {
+    static inline int32_t bam_tid(int64_t global_pos) {
         const int64_t mask = ~(((int64_t)1 << MAX_CONTIG_LEN_SHIFT) - 1);
         const int64_t high_tid = global_pos & mask;
         return (int32_t)(high_tid >> MAX_CONTIG_LEN_SHIFT);
     }
     // 根据全局位置获取bam的比对位置(染色体内)
-    static inline int32_t bam_pos(int64_t global_pos)
-    {
+    static inline int32_t bam_pos(int64_t global_pos) {
         const int64_t mask = ((int64_t)1 << MAX_CONTIG_LEN_SHIFT) - 1;
         return (int32_t)(global_pos & mask);
     }
diff --git a/src/common/utils/debug.cpp b/src/common/utils/debug.cpp
new file mode 100644
index 0000000..e69de29
diff --git a/src/common/utils/debug.h b/src/common/utils/debug.h
new file mode 100644
index 0000000..e69de29
diff --git a/src/common/utils/global_arg.cpp b/src/common/utils/global_arg.cpp
index 1588147..d707dd6 100644
--- a/src/common/utils/global_arg.cpp
+++ b/src/common/utils/global_arg.cpp
@@ -23,8 +23,7 @@ using std::vector;
 struct option *GlobalArg::GLOBAL_OPT = nullptr;
 
 // 初始化参数
-void GlobalArg::initGlobalOptions()
-{
+void GlobalArg::initGlobalOptions() {
     vector<struct option> v;
     v.push_back({"INPUT", required_argument, NULL, ns_ga::GlobalOptEnum::OPT_INPUT}); // 输入文件
     v.push_back({"OUTPUT", required_argument, NULL, ns_ga::GlobalOptEnum::OPT_OUTPUT}); // 输出文件
@@ -51,59 +50,53 @@ void GlobalArg::initGlobalOptions()
 }
 
 // 解析参数
-void GlobalArg::parseArgument(int argNum)
-{
+void GlobalArg::parseArgument(int argNum) {
     using namespace ns_ga;
-    switch (argNum)
-    {
-        case OPT_INPUT:
-            in_fn = optarg;
-            break;
-        case OPT_OUTPUT:
-            out_fn = optarg;
-            break;
-        case OPT_NUM_THREADS:
-            num_threads = std::stoi(optarg);
-            break;
-        case OPT_MAX_MEM:
-        {
-            char *q;
-            size_t mem_arg = strtol(optarg, &q, 0);
-            if (*q == 'k' || *q == 'K')
-                mem_arg <<= 10;
-            else if (*q == 'm' || *q == 'M')
-                mem_arg <<= 20;
-            else if (*q == 'g' || *q == 'G')
-                mem_arg <<= 30;
-            if (mem_arg >= max_mem)
-                max_mem = mem_arg;
-            else
-            {
-                std::cerr << "[Warn] Too small mem size, use default" << std::endl;
-            }
-            break;
+    switch (argNum) {
+    case OPT_INPUT:
+        in_fn = optarg;
+        break;
+    case OPT_OUTPUT:
+        out_fn = optarg;
+        break;
+    case OPT_NUM_THREADS:
+        num_threads = std::stoi(optarg);
+        break;
+    case OPT_MAX_MEM: {
+        char *q;
+        size_t mem_arg = strtol(optarg, &q, 0);
+        if (*q == 'k' || *q == 'K')
+            mem_arg <<= 10;
+        else if (*q == 'm' || *q == 'M')
+            mem_arg <<= 20;
+        else if (*q == 'g' || *q == 'G')
+            mem_arg <<= 30;
+        if (mem_arg >= max_mem)
+            max_mem = mem_arg;
+        else {
+            std::cerr << "[Warn] Too small mem size, use default" << std::endl;
         }
-        case OPT_LOG_LEVEL:
-        {
-            if (strcmp("ERROR", optarg) == 0)
-                verbosity = ns_ga::ERROR;
-            else if (strcmp("WARNING", optarg) == 0)
-                verbosity = ns_ga::WARNING;
-            else if (strcmp("INFO", optarg) == 0)
-                verbosity = ns_ga::INFO;
-            else if (strcmp("DEBUG", optarg) == 0)
-                verbosity = ns_ga::DEBUG;
-            break;
-        }
-        case OPT_ASYNCIO:
-        {
-            if (strcmp("true", optarg) == 0)
-                use_asyncio = true;
-            else if (strcmp("false", optarg) == 0)
-                use_asyncio = false;
-            break;
-        }
-        default:
-            break;
+        break;
+    }
+    case OPT_LOG_LEVEL: {
+        if (strcmp("ERROR", optarg) == 0)
+            verbosity = ns_ga::ERROR;
+        else if (strcmp("WARNING", optarg) == 0)
+            verbosity = ns_ga::WARNING;
+        else if (strcmp("INFO", optarg) == 0)
+            verbosity = ns_ga::INFO;
+        else if (strcmp("DEBUG", optarg) == 0)
+            verbosity = ns_ga::DEBUG;
+        break;
+    }
+    case OPT_ASYNCIO: {
+        if (strcmp("true", optarg) == 0)
+            use_asyncio = true;
+        else if (strcmp("false", optarg) == 0)
+            use_asyncio = false;
+        break;
+    }
+    default:
+        break;
     }
 }
\ No newline at end of file
diff --git a/src/common/utils/global_arg.h b/src/common/utils/global_arg.h
index 8ff64be..903eb14 100644
--- a/src/common/utils/global_arg.h
+++ b/src/common/utils/global_arg.h
@@ -9,63 +9,57 @@ Date : 2023/10/23
 #ifndef GLOBAL_ARG_H_
 #define GLOBAL_ARG_H_
 
-#include <string>
-#include <map>
-#include <vector>
 #include <getopt.h>
 #include <stdio.h>
 
+#include <map>
+#include <string>
+#include <vector>
+
 using std::map;
 using std::string;
 using std::vector;
 
 namespace ns_ga {
-    enum GlobalOptEnum
-    {
-        _START_NUM = 1,
-        OPT_INPUT,
-        OPT_OUTPUT,
-        OPT_NUM_THREADS,
-        OPT_MAX_MEM,
-        OPT_LOG_LEVEL,
-        OPT_ASYNCIO,
-        OPT_VERSION,
-        OPT_HELP,
-        _END_NUM
-    };
+enum GlobalOptEnum {
+    _START_NUM = 1,
+    OPT_INPUT,
+    OPT_OUTPUT,
+    OPT_NUM_THREADS,
+    OPT_MAX_MEM,
+    OPT_LOG_LEVEL,
+    OPT_ASYNCIO,
+    OPT_VERSION,
+    OPT_HELP,
+    _END_NUM
+};
 
-    // log level
-    enum LogLevelEnum
-    {
-        ERROR,
-        WARNING,
-        INFO,
-        DEBUG
-    };
-}
+// log level
+enum LogLevelEnum { ERROR, WARNING, INFO, DEBUG };
+}  // namespace ns_ga
 
 /* 全局共享的一些参数 */
-struct GlobalArg
-{
-    const static int GLOBAL_ARG_CNT = ns_ga::GlobalOptEnum::_END_NUM - ns_ga::GlobalOptEnum::_START_NUM; // 这里不需要减1
+struct GlobalArg {
+    const static int GLOBAL_ARG_CNT =
+        ns_ga::GlobalOptEnum::_END_NUM -
+        ns_ga::GlobalOptEnum::_START_NUM;  // 这里不需要减1
     static struct option *GLOBAL_OPT;
 
-    string in_fn; // input bam filename
-    string out_fn; // output bam filename
-    int num_threads = 1; // 线程个数
-    size_t max_mem = ((size_t)2) << 30; // 最小2G
-    ns_ga::LogLevelEnum verbosity = ns_ga::INFO; // 打印信息级别
-    bool use_asyncio = true; // 是否使用异步io
+    string in_fn;                                 // input bam filename
+    string out_fn;                                // output bam filename
+    int num_threads = 1;                          // 线程个数
+    size_t max_mem = ((size_t)1) << 30;           // 最小1G
+    ns_ga::LogLevelEnum verbosity = ns_ga::INFO;  // 打印信息级别
+    bool use_asyncio = true;                      // 是否使用异步io
 
-    vector<string> vArgInfo; // 每个参数的帮助信息
+    vector<string> vArgInfo;  // 每个参数的帮助信息
 
     // 单例模式
     GlobalArg(const GlobalArg &) = delete;
     GlobalArg &operator=(const GlobalArg &) = delete;
 
     // 获取单例
-    static GlobalArg &Instance()
-    {
+    static GlobalArg &Instance() {
         static GlobalArg instance;
         return instance;
     }
@@ -76,8 +70,7 @@ struct GlobalArg
     void parseArgument(int argNum);
 
     // 获取对应参数在数组（option和help info）中的索引
-    int getArgIndx(ns_ga::GlobalOptEnum opt)
-    {
+    int getArgIndx(ns_ga::GlobalOptEnum opt) {
         return opt - ns_ga::GlobalOptEnum::OPT_INPUT;
     }
 
@@ -95,11 +88,9 @@ struct GlobalArg
         printf("--verbosity   = %d\n", verbosity);
         printf("--asyncio     = %d\n", use_asyncio);
     }
-private : 
-    GlobalArg()
-    {
-        initGlobalOptions();
-    };
+
+private:
+    GlobalArg() { initGlobalOptions(); };
 };
 
 #endif
\ No newline at end of file
diff --git a/src/common/utils/murmur3.h b/src/common/utils/murmur3.h
index 5ffadef..5dcdee3 100644
--- a/src/common/utils/murmur3.h
+++ b/src/common/utils/murmur3.h
@@ -7,36 +7,34 @@ Author : Zhang Zhonghai
 Date : 2023/11/6
 */
 
-#include <string>
 #include <random>
+#include <string>
 
 using std::string;
 
 /**
- * Provides an implementation of the Murmur3_32 hash algorithm that has desirable properties in terms of randomness
- * and uniformity of the distribution of output values that make it a useful hashing algorithm for downsampling.
+ * Provides an implementation of the Murmur3_32 hash algorithm that has
+ * desirable properties in terms of randomness and uniformity of the
+ * distribution of output values that make it a useful hashing algorithm for
+ * downsampling.
  */
 
-struct Murmur3
-{
+struct Murmur3 {
     int seed_ = 0;
     /** Hashes a character stream to an int using Murmur3. */
-    int HashUnencodedChars(const string &input)
-    {
+    int HashUnencodedChars(const string &input) {
         int h1 = this->seed_;
 
         // step through the CharSequence 2 chars at a time
         const int length = input.size();
-        for (int i = 1; i < length; i += 2)
-        {
+        for (int i = 1; i < length; i += 2) {
             int k1 = input.at(i - 1) | (input.at(i) << 16);
             k1 = mixK1(k1);
             h1 = mixH1(h1, k1);
         }
 
         // deal with any remaining characters
-        if ((length & 1) == 1)
-        {
+        if ((length & 1) == 1) {
             int k1 = input.at(length - 1);
             k1 = mixK1(k1);
             h1 ^= k1;
@@ -45,14 +43,12 @@ struct Murmur3
         return fmix(h1, 2 * length);
     }
 
-    static Murmur3 &Instance()
-    {
+    static Murmur3 &Instance() {
         static Murmur3 instance;
         return instance;
     }
 
-    static int mixK1(int k1)
-    {
+    static int mixK1(int k1) {
         const int c1 = 0xcc9e2d51;
         const int c2 = 0x1b873593;
         k1 *= c1;
@@ -61,8 +57,7 @@ struct Murmur3
         return k1;
     }
 
-    static int mixH1(int h1, int k1)
-    {
+    static int mixH1(int h1, int k1) {
         h1 ^= k1;
         h1 = h1 << 13;
         h1 = h1 * 5 + 0xe6546b64;
@@ -70,8 +65,7 @@ struct Murmur3
     }
 
     // Finalization mix - force all bits of a hash block to avalanche
-    static int fmix(int h1, int length)
-    {
+    static int fmix(int h1, int length) {
         h1 ^= length;
         h1 ^= (unsigned int)h1 >> 16;
         h1 *= 0x85ebca6b;
@@ -82,8 +76,7 @@ struct Murmur3
     }
 
 private:
-    Murmur3() 
-    {
+    Murmur3() {
         auto &&rd = std::random_device{};
         seed_ = rd();
     }
diff --git a/src/common/utils/profiling.cpp b/src/common/utils/profiling.cpp
new file mode 100644
index 0000000..e69de29
diff --git a/src/common/utils/profiling.h b/src/common/utils/profiling.h
new file mode 100644
index 0000000..e69de29
diff --git a/src/sam/markdups/dup_metrics.h b/src/sam/markdups/dup_metrics.h
new file mode 100644
index 0000000..508c4e8
--- /dev/null
+++ b/src/sam/markdups/dup_metrics.h
@@ -0,0 +1,70 @@
+#pragma once
+
+#include <string>
+#include <stdint.h>
+
+using std::string;
+
+/*
+ * 标记冗余过程中的一些数据统计
+ */
+struct DuplicationMetrics {
+    /**
+     * The library on which the duplicate marking was performed.
+     */
+    string LIBRARY = "";
+    /**
+     * The number of mapped reads examined which did not have a mapped mate pair,
+     * either because the read is unpaired, or the read is paired to an unmapped mate.
+     */
+    uint64_t UNPAIRED_READS_EXAMINED = 0;
+    /**
+     * The number of mapped read pairs examined. (Primary, non-supplemental)
+     */
+    uint64_t READ_PAIRS_EXAMINED = 0;
+    /**
+     * The number of reads that were either secondary or supplementary
+     */
+    uint64_t SECONDARY_OR_SUPPLEMENTARY_RDS = 0;
+    /**
+     * The total number of unmapped reads examined. (Primary, non-supplemental)
+     */
+    uint64_t UNMAPPED_READS = 0;
+    /**
+     * The number of fragments that were marked as duplicates.
+     */
+    uint64_t UNPAIRED_READ_DUPLICATES = 0;
+    /**
+     * The number of read pairs that were marked as duplicates.
+     */
+    uint64_t READ_PAIR_DUPLICATES = 0;
+    /**
+     * The number of read pairs duplicates that were caused by optical duplication.
+     * Value is always < READ_PAIR_DUPLICATES, which counts all duplicates regardless of source.
+     */
+    uint64_t READ_PAIR_OPTICAL_DUPLICATES = 0;
+    /**
+     * The fraction of mapped sequence that is marked as duplicate.
+     */
+    double PERCENT_DUPLICATION = 0.0;
+    /**
+     * The estimated number of unique molecules in the library based on PE duplication.
+     */
+    uint64_t ESTIMATED_LIBRARY_SIZE = 0;
+
+    // 其他的统计数据
+
+    // addSingletonToCount需要记录的数据
+    uint64_t DuplicateCountHist = 0;
+    uint64_t NonOpticalDuplicateCountHist = 0;
+
+    // track optical duplicates 需要记录的数据
+    uint64_t OpticalDuplicatesCountHist = 0;
+    uint64_t OpticalDuplicatesByLibraryId = 0;
+    
+    // 统计相关的函数
+    void AddSingletonToCount() { 
+        ++this->DuplicateCountHist;
+        ++this->NonOpticalDuplicateCountHist;
+    }
+};
\ No newline at end of file
diff --git a/src/sam/markdups/markdups.cpp b/src/sam/markdups/markdups.cpp
index 6dba1d5..7860205 100644
--- a/src/sam/markdups/markdups.cpp
+++ b/src/sam/markdups/markdups.cpp
@@ -1,100 +1,99 @@
 /*
-Description: 标记bam文件中的冗余信息，只处理按照坐标排序后的bam，且bam为单一样本数据
+Description:
+标记bam文件中的冗余信息，只处理按照坐标排序后的bam，且bam为单一样本数据
 
 Copyright : All right reserved by ICT
 
 Author : Zhang Zhonghai
 Date : 2023/10/23
 */
-#include "markdups_arg.h"
-// 有太多define冲突，放到最后include
-
-
 #include <common/hts/bam_buf.h>
 #include <common/utils/global_arg.h>
+#include <common/utils/murmur3.h>
 #include <common/utils/thpool.h>
 #include <common/utils/timer.h>
 #include <common/utils/util.h>
-#include <common/utils/murmur3.h>
 #include <common/utils/yarn.h>
+#include <htslib/sam.h>
+#include <htslib/thread_pool.h>
 #include <sam/utils/read_ends.h>
 #include <sam/utils/read_name_parser.h>
 
-#include <htslib/sam.h>
-#include <htslib/thread_pool.h>
-
 #include <iostream>
-#include <vector>
-#include <set>
 #include <queue>
+#include <set>
 #include <unordered_map>
 #include <unordered_set>
+#include <vector>
+
+#include "markdups_arg.h"
+#include "md_funcs.h"
+#include "parallel_md.h"
+#include "serial_md.h"
+#include "shared_args.h"
+#include "dup_metrics.h"
 
 using namespace std;
 using std::cout;
 
-
 #define SMA_TAG_PG "PG"
 
-#define BAM_BLOCK_SIZE 2 * 1024 * 1024
+#define BAM_BLOCK_SIZE 16L * 1024 * 1024
 #define NO_SUCH_INDEX INT64_MAX
 
-static Timer tm_arr[20]; // 用来测试性能
+Timer tm_arr[20];  // 用来测试性能
 /* 全局本地变量 */
-static vector<ReadNameParser> g_vRnParser; // 每个线程一个read name parser
-static samFile *g_inBamFp;                 // 输入的bam文件
-static sam_hdr_t *g_inBamHeader;           // 输入的bam文件头信息
-static samFile *g_outBamFp = nullptr;      // 输出文件, sam或者bam格式
-static sam_hdr_t *g_outBamHeader;          // 输出文件的header
+vector<ReadNameParser> g_vRnParser;  // 每个线程一个read name parser
+samFile *g_inBamFp;                  // 输入的bam文件
+sam_hdr_t *g_inBamHeader;            // 输入的bam文件头信息
+samFile *g_outBamFp = nullptr;       // 输出文件, sam或者bam格式
+sam_hdr_t *g_outBamHeader;           // 输出文件的header
 
 /* 参数对象作为全局对象，免得多次作为参数传入函数中 */
-static GlobalArg &g_gArg = GlobalArg::Instance();
-static MarkDupsArg g_mdArg;
-
-
-#include "md_funcs.h"
-#include "serial_md.h"
-#include "parallel_md.h"
-
-
-
+GlobalArg &g_gArg = GlobalArg::Instance();
+static MarkDupsArg g_mdArg_;
+MarkDupsArg &g_mdArg = g_mdArg_;
+static GlobalDataArg gData_;
+GlobalDataArg &gData = gData_;
+DuplicationMetrics gMetrics_;
+DuplicationMetrics &gMetrics = gMetrics_;
 /*
- * mark duplicate 入口，假定bam是按照比对后的坐标排序的，同一个样本的话不需要考虑barcode的问题
+ * mark duplicate
+ * 入口，假定bam是按照比对后的坐标排序的，同一个样本的话不需要考虑barcode的问题
  */
-int MarkDuplicates(int argc, char *argv[])
-{
+int MarkDuplicates(int argc, char *argv[]) {
     Timer::log_time("程序开始");
     Timer time_all;
-    
+
     /* 读取命令行参数 */
-    g_mdArg.parseArgument(argc, argv, &g_gArg); // 解析命令行参数
-    if (g_gArg.num_threads < 1) // 线程数不能小于1
-        g_gArg.num_threads = 1;
+    g_mdArg.parseArgument(argc, argv, &g_gArg);  // 解析命令行参数
+    if (g_gArg.num_threads < 1)
+        g_gArg.num_threads = 1;  // 线程数不能小于1
 
     /* 初始化一些参数和变量*/
     g_vRnParser.resize(g_gArg.num_threads);
     for (auto &parser : g_vRnParser)
-        parser.SetReadNameRegex(g_mdArg.READ_NAME_REGEX); // 用来解析read name中的tile，x，y信息
-    
+        parser.SetReadNameRegex(g_mdArg.READ_NAME_REGEX);  // 用来解析read name中的tile，x，y信息
+
     /* 打开输入bam文件 */
     g_inBamFp = sam_open_format(g_gArg.in_fn.c_str(), "r", nullptr);
-    if (!g_inBamFp)
-    {
+    if (!g_inBamFp) {
         Error("[%s] load sam/bam file failed.\n", __func__);
         return -1;
     }
     hts_set_opt(g_inBamFp, HTS_OPT_BLOCK_SIZE, BAM_BLOCK_SIZE);
-    g_inBamHeader = sam_hdr_read(g_inBamFp); // 读取header
+    g_inBamHeader = sam_hdr_read(g_inBamFp);  // 读取header
+    // 获取样本名称(libraryId)
+    gMetrics.LIBRARY = sam_hdr_line_name(g_inBamHeader, "RG", 0);
 
     /* 利用线程池对输入输出文件进行读写 */
-    htsThreadPool htsPoolRead = {NULL, 0};  // 多线程读取，创建线程池
-    htsThreadPool htsPoolWrite = {NULL, 0}; // 读写用不同的线程池
-    // htsPoolRead.pool = hts_tpool_init(g_gArg.num_threads);
-    htsPoolRead.pool = hts_tpool_init(16);
-    // htsPoolWrite.pool = hts_tpool_init(g_gArg.num_threads);
-    htsPoolWrite.pool = hts_tpool_init(16);
-    if (!htsPoolRead.pool || !htsPoolWrite.pool)
-    {
+    htsThreadPool htsPoolRead = {NULL, 0};   // 多线程读取，创建线程池
+    htsThreadPool htsPoolWrite = {NULL, 0};  // 读写用不同的线程池
+    //htsPoolRead.pool = hts_tpool_init(g_gArg.num_threads);
+    //htsPoolWrite.pool = hts_tpool_init(g_gArg.num_threads);
+    htsPoolRead.pool = hts_tpool_init(32);
+    htsPoolWrite.pool = hts_tpool_init(32);
+    if (!htsPoolRead.pool || !htsPoolWrite.pool) {
         Error("[%d] failed to set up thread pool", __LINE__);
         sam_close(g_inBamFp);
         return -1;
@@ -106,86 +105,94 @@ int MarkDuplicates(int argc, char *argv[])
     sam_open_mode(modeout + 1, g_gArg.out_fn.c_str(), NULL);
     g_outBamFp = sam_open(g_gArg.out_fn.c_str(), modeout);
     g_outBamHeader = sam_hdr_dup(g_inBamHeader);
+    // 用同样的线程池处理输出文件
     hts_set_opt(g_outBamFp, HTS_OPT_BLOCK_SIZE, BAM_BLOCK_SIZE);
-    hts_set_opt(g_outBamFp, HTS_OPT_THREAD_POOL, &htsPoolWrite); // 用同样的线程池处理输出文件
-
+    hts_set_opt(g_outBamFp, HTS_OPT_THREAD_POOL, &htsPoolWrite);
 
     /* 冗余检查和标记 */
-    if (g_gArg.num_threads == 1)
-    {
-        serialMarkDups(); // 串行运行
-    }
-    else
-    {
-        parallelMarkDups(); // 并行运行
+    if (g_gArg.num_threads == 1) {
+        serialMarkDups();  // 串行运行
+    } else {
+        parallelMarkDups();  // 并行运行
     }
 
     /* 标记冗余, 将处理后的结果写入文件 */
-    sam_close(g_inBamFp); // 重新打开bam文件
+    sam_close(g_inBamFp);  // 重新打开bam文件
     g_inBamFp = sam_open_format(g_gArg.in_fn.c_str(), "r", nullptr);
-    if (!g_inBamFp)
-    {
+    if (!g_inBamFp) {
         Error("[%s] load sam/bam file failed.\n", __func__);
         return -1;
     }
     hts_set_opt(g_inBamFp, HTS_OPT_BLOCK_SIZE, BAM_BLOCK_SIZE);
     hts_set_opt(g_inBamFp, HTS_OPT_THREAD_POOL, &htsPoolRead);
-    g_inBamHeader = sam_hdr_read(g_inBamFp); // 读取header
-    if (sam_hdr_write(g_outBamFp, g_outBamHeader) != 0)
-    {
+    g_inBamHeader = sam_hdr_read(g_inBamFp);  // 读取header
+    if (sam_hdr_write(g_outBamFp, g_outBamHeader) != 0) {
         Error("failed writing header to \"%s\"", g_gArg.out_fn.c_str());
         sam_close(g_outBamFp);
         sam_close(g_inBamFp);
         return -1;
     }
     // 输出index文件
-    string indexFn = g_gArg.out_fn + ".csi"; // 现在索引都是csi格式的
-    if (sam_idx_init(g_outBamFp, g_outBamHeader, 14 /*csi*/, indexFn.c_str()) < 0)
-    {
+    // string indexFn = g_gArg.out_fn + ".csi";  // 现在索引都是csi格式的
+    string indexFn = g_gArg.out_fn + ".bai";  // min_shift = 0 是bai格式
+    int index_min_shift = 0;
+    if (g_mdArg.INDEX_FORMAT == ns_md::IndexFormat::CSI) {
+        indexFn = g_gArg.out_fn + ".csi";
+        index_min_shift = 14;
+    }
+    if (sam_idx_init(g_outBamFp, g_outBamHeader, 0 /*csi 14*/, indexFn.c_str()) < 0) {
         Error("failed to open index \"%s\" for writing", indexFn.c_str());
         sam_close(g_outBamFp);
         sam_close(g_inBamFp);
         return -1;
     }
     // 读取输入文件
-    // BamBufType inBuf(false); // inBuf(g_gArg.use_asyncio);
+    // BamBufType inBuf(false);
     BamBufType inBuf(g_gArg.use_asyncio);
     inBuf.Init(g_inBamFp, g_inBamHeader, g_gArg.max_mem);
+    DupIdxQueue idxQue;
+    idxQue.Init(&gData.dupIdxArr);
     Timer tw;
-    while (inBuf.ReadStat() >= 0)
-    {
+    cout << "dupsize: " << idxQue.Size() << endl;
+    uint64_t bamIdx = 0;
+    uint64_t dupIdx = idxQue.Pop();
+    cout << "dup arr size: " << gData.dupIdxArr.size() << endl;
+    cout << "first dup: " << dupIdx << endl;
+    while (inBuf.ReadStat() >= 0) {
         Timer tw1;
         size_t readNum = inBuf.ReadBam();
         cout << "read: " << readNum << endl;
-        for (size_t i = 0; i < inBuf.Size(); ++i)
-        {
+        for (size_t i = 0; i < inBuf.Size(); ++i) {
             /* 判断是否冗余 */
-            if (sam_write1(g_outBamFp, g_outBamHeader, inBuf[i]->b) < 0)
-            {
+            if (bamIdx == dupIdx) {
+                // cout << "冗余" << bamIdx << endl;
+                dupIdx = idxQue.Pop();
+            }
+            if (sam_write1(g_outBamFp, g_outBamHeader, inBuf[i]->b) < 0) {
                 Error("failed writing header to \"%s\"", g_gArg.out_fn.c_str());
                 sam_close(g_outBamFp);
                 sam_close(g_inBamFp);
                 return -1;
             }
+            ++bamIdx;
         }
         inBuf.ClearAll();
         cout << "write round time: " << tw1.seconds_elapsed() << " s" << endl;
     }
-    if (sam_idx_save(g_outBamFp) < 0)
-    {
+    cout << "dupsize: " << idxQue.Size() << endl;
+    if (sam_idx_save(g_outBamFp) < 0) {
         Error("writing index failed");
         sam_close(g_outBamFp);
         sam_close(g_inBamFp);
         return -1;
     }
     cout << "write time: " << tw.seconds_elapsed() << " s" << endl;
-
     /* 关闭文件，收尾清理 */
     sam_close(g_outBamFp);
     sam_close(g_inBamFp);
 
     cout << "      总时间: " << time_all.seconds_elapsed() << endl;
-//    cout << "计算read end: " << tm_arr[0].acc_seconds_elapsed() << endl;
+    //    cout << "计算read end: " << tm_arr[0].acc_seconds_elapsed() << endl;
     Timer::log_time("程序结束");
     return 0;
-}
\ No newline at end of file
+    }
\ No newline at end of file
diff --git a/src/sam/markdups/markdups_arg.cpp b/src/sam/markdups/markdups_arg.cpp
index c8a9453..f00eb51 100644
--- a/src/sam/markdups/markdups_arg.cpp
+++ b/src/sam/markdups/markdups_arg.cpp
@@ -64,6 +64,7 @@ const static struct option kMdOpts[] = {
     {"COMPRESSION_LEVEL", required_argument, NULL, COMPRESSION_LEVEL},
     {"MAX_RECORDS_IN_RAM", required_argument, NULL, MAX_RECORDS_IN_RAM},
     {"CREATE_INDEX", required_argument, NULL, CREATE_INDEX},
+    {"INDEX_FORMAT", required_argument, NULL, INDEX_FORMAT},
     {"CREATE_MD5_FILE", required_argument, NULL, CREATE_MD5_FILE}};
 
 // 判断bool类型的参数
@@ -220,6 +221,11 @@ void MarkDupsArg::parseArgument(int argc,
         case ns_md::CREATE_INDEX:
             setBoolArg(&CREATE_INDEX);
             break;
+        case ns_md::INDEX_FORMAT:
+            if (strcmp("CSI", optarg) == 0)
+                INDEX_FORMAT = ns_md::IndexFormat::CSI;
+            else
+                INDEX_FORMAT = ns_md::IndexFormat::BAI;
         case ns_md::CREATE_MD5_FILE:
             setBoolArg(&CREATE_MD5_FILE);
             break;
@@ -235,7 +241,8 @@ void MarkDupsArg::parseArgument(int argc,
 /* 打印参数信息 */
 void MarkDupsArg::printArgValue()
 {
-    printf("--READ_NAME_REGEX       = %s\n", this->READ_NAME_REGEX.c_str());
+    printf("--READ_NAME_REGEX = %s\n", this->READ_NAME_REGEX.c_str());
+    printf("--INDEX_FORMAT    = %s\n", this->INDEX_FORMAT == ns_md::IndexFormat::BAI ? "bai" : "csi");
 }
 
 // 打印版本信息
@@ -272,6 +279,8 @@ void MarkDupsArg::PrintHelp()
             "\n"
             "Optional Arguments:\n"
             "\n"
+            "--INDEX_FORMAT <FORMAT>       Format for bam index file. Possible values: {BAI, CSI}\n"
+            "\n"
             "--ADD_PG_TAG_TO_READS <Boolean>\n"
             "                              Add PG tag to each read in a SAM or BAM  Default value: true. Possible values: {true,\n"
             "                              false}\n"
diff --git a/src/sam/markdups/markdups_arg.h b/src/sam/markdups/markdups_arg.h
index 1abf8a7..af5fb41 100644
--- a/src/sam/markdups/markdups_arg.h
+++ b/src/sam/markdups/markdups_arg.h
@@ -21,91 +21,92 @@ using std::vector;
 class GlobalArg;
 
 namespace ns_md {
-    /* 用于markduplicate模块的参数，这个枚举用于getoption */
-    enum MarkDupsArgEnum
-    {
-        _START_NUM = 100,
-        MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP,
-        MAX_FILE_HANDLES_FOR_READ_ENDS_MAP,
-        SORTING_COLLECTION_SIZE_RATIO,
-        BARCODE_TAG,
-        READ_ONE_BARCODE_TAG,
-        READ_TWO_BARCODE_TAG,
-        TAG_DUPLICATE_SET_MEMBERS,
-        REMOVE_SEQUENCING_DUPLICATES,
-        TAGGING_POLICY,
-        CLEAR_DT,
-        DUPLEX_UMI,
-        MOLECULAR_IDENTIFIER_TAG,
-        METRICS_FILE,
-        REMOVE_DUPLICATES,
-        ASSUME_SORTED,
-        ASSUME_SORT_ORDER,
-        DUPLICATE_SCORING_STRATEGY,
-        PROGRAM_RECORD_ID,
-        PROGRAM_GROUP_VERSION,
-        PROGRAM_GROUP_COMMAND_LINE,
-        PROGRAM_GROUP_NAME,
-        COMMENT,
-        READ_NAME_REGEX,
-        OPTICAL_DUPLICATE_PIXEL_DISTANCE,
-        MAX_OPTICAL_DUPLICATE_SET_SIZE,
-        QUIET,
-        VALIDATION_STRINGENCY,
-        COMPRESSION_LEVEL,
-        MAX_RECORDS_IN_RAM,
-        CREATE_INDEX,
-        CREATE_MD5_FILE,
-        _END_NUM
-    };
-
-    /* How strict to be when reading a SAM or BAM, beyond bare minimum validation. */
-    enum ValidationStringency
-    {
-        /**
-         * Do the right thing, throw an exception if something looks wrong.
-         */
-        STRICT,
-        /**
-         * Emit warnings but keep going if possible.
-         */
-        LENIENT,
-        /**
-         * Like LENIENT, only don't emit warning messages.
-         */
-        SILENT,
-
-        DEFAULT_STRINGENCY = SILENT
-    };
+/* 用于markduplicate模块的参数，这个枚举用于getoption */
+enum MarkDupsArgEnum {
+    _START_NUM = 100,
+    MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP,
+    MAX_FILE_HANDLES_FOR_READ_ENDS_MAP,
+    SORTING_COLLECTION_SIZE_RATIO,
+    BARCODE_TAG,
+    READ_ONE_BARCODE_TAG,
+    READ_TWO_BARCODE_TAG,
+    TAG_DUPLICATE_SET_MEMBERS,
+    REMOVE_SEQUENCING_DUPLICATES,
+    TAGGING_POLICY,
+    CLEAR_DT,
+    DUPLEX_UMI,
+    MOLECULAR_IDENTIFIER_TAG,
+    METRICS_FILE,
+    REMOVE_DUPLICATES,
+    ASSUME_SORTED,
+    ASSUME_SORT_ORDER,
+    DUPLICATE_SCORING_STRATEGY,
+    PROGRAM_RECORD_ID,
+    PROGRAM_GROUP_VERSION,
+    PROGRAM_GROUP_COMMAND_LINE,
+    PROGRAM_GROUP_NAME,
+    COMMENT,
+    READ_NAME_REGEX,
+    OPTICAL_DUPLICATE_PIXEL_DISTANCE,
+    MAX_OPTICAL_DUPLICATE_SET_SIZE,
+    QUIET,
+    VALIDATION_STRINGENCY,
+    COMPRESSION_LEVEL,
+    MAX_RECORDS_IN_RAM,
+    CREATE_INDEX,
+    INDEX_FORMAT,
+    CREATE_MD5_FILE,
+    _END_NUM
+};
 
+/* How strict to be when reading a SAM or BAM, beyond bare minimum validation.
+ */
+enum ValidationStringency {
     /**
-     * Enum used to control how duplicates are flagged in the DT optional tag on each read.
+     * Do the right thing, throw an exception if something looks wrong.
      */
-    enum DuplicateTaggingPolicy
-    {
-        DontTag,
-        OpticalOnly,
-        All
-    };
+    STRICT,
+    /**
+     * Emit warnings but keep going if possible.
+     */
+    LENIENT,
+    /**
+     * Like LENIENT, only don't emit warning messages.
+     */
+    SILENT,
 
-    /* 排序的方式 */
-    enum SortOrder
-    {
-        unsorted,
-        queryname,
-        coordinate,
-        duplicate, // NB: this is not in the SAM spec!
-        unknown
-    };
+    DEFAULT_STRINGENCY = SILENT
+};
 
-    /* 计算reads分数的方式（比那个read得分更高） */
-    enum ScoringStrategy
-    {
-        SUM_OF_BASE_QUALITIES,
-        TOTAL_MAPPED_REFERENCE_LENGTH,
-        RANDOM
-    };
-}
+/**
+ * Enum used to control how duplicates are flagged in the DT optional tag on
+ * each read.
+ */
+enum DuplicateTaggingPolicy { DontTag, OpticalOnly, All };
+
+/* 排序的方式 */
+enum SortOrder {
+    unsorted,
+    queryname,
+    coordinate,
+    duplicate,  // NB: this is not in the SAM spec!
+    unknown
+};
+
+/* 计算reads分数的方式（比那个read得分更高） */
+enum ScoringStrategy {
+    SUM_OF_BASE_QUALITIES,
+    TOTAL_MAPPED_REFERENCE_LENGTH,
+    RANDOM
+};
+
+/* 索引文件的格式 （bai或者csi） */
+enum IndexFormat {
+    BAI,
+    CSI
+};
+
+}  // namespace ns_md
 
 /* markduplicate 需要的参数*/
 struct MarkDupsArg
@@ -287,6 +288,8 @@ struct MarkDupsArg
     /* "Whether to create an index when writing VCF or coordinate sorted BAM output.", common = true */
     bool CREATE_INDEX = false;
 
+    ns_md::IndexFormat INDEX_FORMAT = ns_md::IndexFormat::BAI;
+
     /* "Whether to create an MD5 digest for any BAM or FASTQ files created.  ", common = true */
     bool CREATE_MD5_FILE = false;
 
diff --git a/src/sam/markdups/md_funcs.cpp b/src/sam/markdups/md_funcs.cpp
new file mode 100644
index 0000000..eb0eb99
--- /dev/null
+++ b/src/sam/markdups/md_funcs.cpp
@@ -0,0 +1,456 @@
+
+#include "md_funcs.h"
+
+#include <common/hts/bam_buf.h>
+#include <common/utils/debug.h>
+#include <common/utils/murmur3.h>
+#include <common/utils/profiling.h>
+#include <common/utils/timer.h>
+#include <common/utils/util.h>
+#include <sam/utils/read_ends.h>
+#include <stdint.h>
+
+#include <iostream>
+#include <map>
+#include <set>
+#include <vector>
+#include <math.h>
+#include <unordered_map>
+
+#include "dup_metrics.h"
+#include "markdups_arg.h"
+#include "shared_args.h"
+
+using std::cerr;
+using std::endl;
+using std::map;
+using std::set;
+using std::unordered_map;
+using std::vector;
+
+/* 清除key位置的数据 */
+void clearIdxAtPos(int64_t key, map<int64_t, set<int64_t>> *pmsIdx) {
+    auto &msIdx = *pmsIdx;
+    if (msIdx.find(key) != msIdx.end())
+        msIdx[key].clear();  // 清除该位点的冗余结果
+}
+
+/* 删除key位置的数据 */
+void delIdxAtPos(int64_t key, map<int64_t, set<int64_t>> *pmsIdx) {
+    auto &msIdx = *pmsIdx;
+    if (msIdx.find(key) != msIdx.end())
+        msIdx.erase(key);
+}
+
+/*
+ * 计算read的分数
+ */
+int16_t computeDuplicateScore(BamWrap &bw) {
+    int16_t score = 0;
+    switch (g_mdArg.DUPLICATE_SCORING_STRATEGY) {
+    case ns_md::SUM_OF_BASE_QUALITIES:
+        // two (very) long reads worth of high-quality bases can go over
+        // Short.MAX_VALUE/2 and risk overflow.
+        score += (int16_t)min(bw.GetSumOfBaseQualities(), INT16_MAX / 2);
+        break;
+    case ns_md::TOTAL_MAPPED_REFERENCE_LENGTH:
+        if (!bw.GetReadUnmappedFlag())
+            // no need to remember the score since this scoring mechanism is
+            // symmetric
+            score = (int16_t)min(bw.GetReferenceLength(), INT16_MAX / 2);
+        break;
+    case ns_md::RANDOM:
+        // The RANDOM score gives the same score to both reads so that they get
+        // filtered together. it's not critical do use the readName since the
+        // scores from both ends get added, but it seem to be clearer this way.
+        score += (short)(Murmur3::Instance().HashUnencodedChars(bw.query_name()) & 0b11111111111111);
+        // subtract Short.MIN_VALUE/4 from it to end up with a number between
+        // 0 and Short.MAX_VALUE/2. This number can be then discounted in case
+        // the read is not passing filters. We need to stay far from overflow so
+        // that when we add the two scores from the two read mates we do not
+        // overflow since that could cause us to chose a failing read-pair
+        // instead of a passing one.
+        score -= INT16_MIN / 4;
+    default:
+        break;
+    }
+    // make sure that filter-failing records are heavily discounted. (the
+    // discount can happen twice, once for each mate, so need to make sure we do
+    // not subtract more than Short.MIN_VALUE overall.)
+    score += bw.GetReadFailsVendorQualityCheckFlag() ? (int16_t)(INT16_MIN / 2) : 0;
+
+    return score;
+}
+
+/*
+ * Builds a read ends object that represents a single read.
+ * 用来表示一个read的特征结构
+ */
+void buildReadEnds(BamWrap &bw, int64_t index, ReadNameParser &rnParser, ReadEnds *pKey) {
+    auto &k = *pKey;
+    auto &bc = bw.b->core;
+    k.read1FirstOfPair = bw.GetFirstOfPairFlag();
+    k.read1ReferenceIndex = bc.tid;
+    k.read1Coordinate = (bc.flag & BAM_FREVERSE) ? bw.GetUnclippedEnd() : bw.GetUnclippedStart();
+    k.orientation = (bc.flag & BAM_FREVERSE) ? ReadEnds::R : ReadEnds::F;
+
+    k.read1IndexInFile = index;
+    k.score = computeDuplicateScore(bw);
+    // Doing this lets the ends object know that it's part of a pair
+    if (bw.GetReadPairedFlag() && !bw.GetMateUnmappedFlag()) {
+        k.read2ReferenceIndex = bc.mtid;
+    }
+    // Fill in the location information for optical duplicates
+    rnParser.AddLocationInformation(bw.query_name(), pKey);
+    // cout << k.tile << ' ' << k.x << ' ' << k.y << endl;
+    // 计算位置key
+    k.posKey =
+        BamWrap::bam_global_pos(k.read1ReferenceIndex, k.read1Coordinate);  // << 1 | k.orientation;
+}
+
+/**
+ * Takes a list of ReadEndsForMarkDuplicates objects and identify the
+ * representative read based on quality score. For all members of the duplicate
+ * set, add the read1 index-in-file of the representative read to the records of
+ * the first and second in a pair. This value becomes is used for the 'DI' tag.
+ */
+void addRepresentativeReadIndex(vector<const ReadEnds *> &vpRe) {}
+
+/* 处理一组pairend的readends，标记冗余 */
+void markDuplicatePairs(int64_t posKey, vector<const ReadEnds *> &vpRe,
+                        DupContainer<int64_t> *dupIdx, DupContainer<int64_t> *opticalDupIdx) {
+    if (vpRe.size() < 2) {
+        if (vpRe.size() == 1) {
+            // addSingletonToCount(libraryIdGenerator);
+        }
+        return;
+    }
+    // cout << "pos:" << posKey + 1 << ";size:" << vpRe.size() << endl;
+    auto &vDupIdx = dupIdx->AtPos(posKey);
+    auto &vOpticalDupIdx = opticalDupIdx->AtPos(posKey);
+
+    int maxScore = 0;
+    const ReadEnds *pBest = nullptr;
+    /** All read ends should have orientation FF, FR, RF, or RR **/
+    for (auto pe : vpRe)  // 找分数最高的readend
+    {
+        if (pe->score > maxScore || pBest == nullptr) {
+            maxScore = pe->score;
+            pBest = pe;
+        }
+    }
+    if (!g_mdArg.READ_NAME_REGEX.empty())  // 检查光学冗余
+    {
+        // trackOpticalDuplicates
+    }
+    for (auto pe : vpRe)  // 对非best read标记冗余
+    {
+        if (pe != pBest)  // 非best
+        {
+            vDupIdx.push_back(pe->read1IndexInFile);  // 添加read1
+            if (pe->read2IndexInFile != pe->read1IndexInFile)
+                vDupIdx.push_back(pe->read2IndexInFile);  // 添加read2
+        }
+    }
+
+    if (g_mdArg.TAG_DUPLICATE_SET_MEMBERS) {
+        addRepresentativeReadIndex(vpRe);
+    }
+}
+
+/* 处理一组非paired的readends，标记冗余 */
+void markDuplicateFragments(int64_t posKey, vector<const ReadEnds *> &vpRe, bool containsPairs,
+                            DupContainer<int64_t> *dupIdx) {
+    auto &vDupIdx = dupIdx->AtPos(posKey);
+
+    if (containsPairs) {
+        for (auto pe : vpRe) {
+            if (!pe->IsPaired()) {
+                vDupIdx.push_back(pe->read1IndexInFile);
+            }
+        }
+    } else {
+        int maxScore = 0;
+        const ReadEnds *pBest = nullptr;
+        for (auto pe : vpRe) {
+            if (pe->score > maxScore || pBest == nullptr) {
+                maxScore = pe->score;
+                pBest = pe;
+            }
+        }
+
+        for (auto pe : vpRe) {
+            if (pe != pBest) {
+                vDupIdx.push_back(pe->read1IndexInFile);
+            }
+        }
+    }
+}
+
+/* 处理位于某个坐标的pairend reads */
+void handlePairs(int64_t posKey, vector<ReadEnds> &readEnds,
+                        vector<const ReadEnds *> &vpCache, DupContainer<int64_t> *dupIdx,
+                        DupContainer<int64_t> *opticalDupIdx) {
+    if (readEnds.size() > 1) { // 有潜在的冗余
+        vpCache.clear();
+        //        std::sort(readEnds.begin(), readEnds.end());
+        const ReadEnds *pReadEnd = nullptr;
+        for (auto &re : readEnds) {
+            if (pReadEnd != nullptr && ReadEnds::AreComparableForDuplicates(*pReadEnd, re, true))  // 跟前一个一样
+                vpCache.push_back(&re);  // 处理一个潜在的冗余组
+            else {
+                markDuplicatePairs(posKey, vpCache, dupIdx,
+                                   opticalDupIdx);  // 不一样
+                vpCache.clear();
+                vpCache.push_back(&re);
+                pReadEnd = &re;
+            }
+        }
+        markDuplicatePairs(posKey, vpCache, dupIdx, opticalDupIdx);
+    }
+}
+
+/* 处理位于某个坐标的 reads */
+void handleFrags(int64_t posKey, vector<ReadEnds> &readEnds,
+                        vector<const ReadEnds *> &vpCache, DupContainer<int64_t> *dupIdx) {
+    if (readEnds.size() > 1)  // 有潜在的冗余
+    {
+        vpCache.clear();
+        //        std::sort(readEnds.begin(), readEnds.end());
+        const ReadEnds *pReadEnd = nullptr;
+        bool containsPairs = false;
+        bool containsFrags = false;
+        for (auto &re : readEnds) {
+            if (pReadEnd != nullptr && ReadEnds::AreComparableForDuplicates(*pReadEnd, re, false)) {
+                vpCache.push_back(&re);
+                containsPairs = containsPairs || re.IsPaired();
+                containsFrags = containsFrags || !re.IsPaired();
+            } else {
+                if (vpCache.size() > 1 && containsFrags) {
+                    markDuplicateFragments(posKey, vpCache, containsPairs, dupIdx);
+                }
+                vpCache.clear();
+                vpCache.push_back(&re);
+                pReadEnd = &re;
+                containsPairs = re.IsPaired();
+                containsFrags = !re.IsPaired();
+            }
+        }
+        if (vpCache.size() > 1 && containsFrags) {
+            markDuplicateFragments(posKey, vpCache, containsPairs, dupIdx);
+        }
+    }
+}
+
+/* 对找到的pairend read end添加一些信息 */
+void modifyPairedEnds(const ReadEnds &fragEnd, ReadEnds *pPairedEnds) {
+    auto &pairedEnds = *pPairedEnds;
+    int64_t bamIdx = fragEnd.read1IndexInFile;
+    const int matesRefIndex = fragEnd.read1ReferenceIndex;
+    const int matesCoordinate = fragEnd.read1Coordinate;
+    // Set orientationForOpticalDuplicates, which always goes by the first then
+    // the second end for the strands.  NB: must do this before updating the
+    // orientation later.
+    if (fragEnd.read1FirstOfPair) {
+        pairedEnds.orientationForOpticalDuplicates = ReadEnds::GetOrientationByte(
+            fragEnd.IsNegativeStrand(), pairedEnds.orientation == ReadEnds::R);
+    } else {
+        pairedEnds.orientationForOpticalDuplicates = ReadEnds::GetOrientationByte(
+            pairedEnds.orientation == ReadEnds::R, fragEnd.IsNegativeStrand());
+    }
+    // If the other read is actually later, simply add the other read's data as
+    // read2, else flip the reads
+    if (matesRefIndex > pairedEnds.read1ReferenceIndex ||
+        (matesRefIndex == pairedEnds.read1ReferenceIndex &&
+         matesCoordinate >= pairedEnds.read1Coordinate)) {
+        pairedEnds.read2ReferenceIndex = matesRefIndex;
+        pairedEnds.read2Coordinate = matesCoordinate;
+        pairedEnds.read2IndexInFile = bamIdx;
+        pairedEnds.orientation = ReadEnds::GetOrientationByte(pairedEnds.orientation == ReadEnds::R,
+                                                              fragEnd.IsNegativeStrand());
+
+        // if the two read ends are in the same position, pointing in opposite
+        // directions, the orientation is undefined and the procedure above will
+        // depend on the order of the reads in the file. To avoid this, we set
+        // it explicitly (to FR):
+        if (pairedEnds.read2ReferenceIndex == pairedEnds.read1ReferenceIndex &&
+            pairedEnds.read2Coordinate == pairedEnds.read1Coordinate &&
+            pairedEnds.orientation == ReadEnds::RF) {
+            pairedEnds.orientation = ReadEnds::FR;
+        }
+    } else {
+        pairedEnds.read2ReferenceIndex = pairedEnds.read1ReferenceIndex;
+        pairedEnds.read2Coordinate = pairedEnds.read1Coordinate;
+        pairedEnds.read2IndexInFile = pairedEnds.read1IndexInFile;
+        pairedEnds.read1ReferenceIndex = matesRefIndex;
+        pairedEnds.read1Coordinate = matesCoordinate;
+        pairedEnds.read1IndexInFile = bamIdx;
+        pairedEnds.orientation = ReadEnds::GetOrientationByte(
+            fragEnd.IsNegativeStrand(), pairedEnds.orientation == ReadEnds::R);
+        pairedEnds.posKey = fragEnd.posKey;
+    }
+    pairedEnds.score += fragEnd.score;
+}
+
+static inline bool closeEnough(const ReadEnds *lhs, const ReadEnds *rhs, const int distance) {
+    return lhs != rhs &&  // no comparing an object to itself (checked using object identity)!
+           (lhs->tile != ReadEnds::NO_VALUE) &&
+           (rhs->tile != ReadEnds::NO_VALUE) &&  // no comparing objects without locations
+           lhs->tile == rhs->tile &&             // and the same tile
+           abs(lhs->x - rhs->x) <= distance && abs(lhs->y - rhs->y) <= distance;
+}
+
+/**
+ * Finds which reads within the list of duplicates that are likely to be optical/co-localized duplicates of
+ * one another. Within each cluster of optical duplicates that is found, one read remains un-flagged for
+ * optical duplication and the rest are flagged as optical duplicates.  The set of reads that are considered
+ * optical duplicates are indicated by returning "true" at the same index in the resulting boolean[] as the
+ * read appeared in the input list of physical locations.
+ *
+ * @param list   a list of reads that are determined to be duplicates of one another
+ * @param keeper a single PhysicalLocation that is the one being kept as non-duplicate, and thus should never be
+ *               annotated as an optical duplicate. May in some cases be null, or a PhysicalLocation not
+ *               contained within the list! (always not be null!)
+ * @return a boolean[] of the same length as the incoming list marking which reads are optical duplicates
+ */
+static void findOpticalDuplicates(vector<const ReadEnds *> &readEndsArr, const ReadEnds *pBestRe,
+                           vector<bool> *pOpticalDuplicateFlags) {
+    const int DEFAULT_OPTICAL_DUPLICATE_DISTANCE = 100;
+    const int DEFAULT_MAX_DUPLICATE_SET_SIZE = 300000;
+
+    vector<bool> &opticalDuplicateFlags = *pOpticalDuplicateFlags;
+    opticalDuplicateFlags.push_back(true);
+    int len = readEndsArr.size();
+    // If there is only one or zero reads passed in (so there are obviously no optical duplicates),
+    // or if there are too many reads (so we don't want to try to run this expensive n^2 algorithm),
+    // then just return an array of all false
+    if (len < 2 || len > DEFAULT_MAX_DUPLICATE_SET_SIZE) {
+        return;
+    }
+
+    if (len >= 4) {
+        /**
+         * Compute the optical duplicates correctly in the case where the duplicate group could end up with transitive
+         * optical duplicates
+         * getOpticalDuplicatesFlagWithGraph
+         */
+        // Make a graph where the edges are reads that lie within the optical duplicate pixel distance from each other,
+        // we will then use the union-find algorithm to cluster the graph and find optical duplicate groups
+        Graph<int> opticalDistanceRelationGraph;
+        unordered_map<int, vector<int>> tileRGmap;
+        int keeperIndex = -1;
+        for (int i = 0; i < readEndsArr.size(); ++i) {
+            const ReadEnds *currentLoc = readEndsArr[i];
+            if (currentLoc == pBestRe)
+                keeperIndex = i;
+            if (currentLoc->tile != ReadEnds::NO_VALUE) {
+                int key = currentLoc->tile; // 只处理一个样本，所以只有一个read group
+                tileRGmap[key].push_back(i);
+            }
+        }
+
+    } else {
+        /**
+         * Compute optical duplicates quickly in the standard case where we know that there won't be any transitive
+         * distances to worry about. Note, this is guaranteed to be correct when there are at most 2x reads from a
+         * readgroup or 3x with the keeper present
+         * getOpticalDuplicatesFlagFast
+         */
+        // First go through and compare all the reads to the keeper
+        for (int i = 0; i < len; ++i) {
+            const ReadEnds *other = readEndsArr[i];
+            opticalDuplicateFlags[i] = closeEnough(pBestRe, other, DEFAULT_OPTICAL_DUPLICATE_DISTANCE);
+        }
+        // Now go through and do each pairwise comparison not involving the actualKeeper
+        for (int i = 0; i < len; ++i) {
+            const ReadEnds *lhs = readEndsArr[i];
+            if (lhs == pBestRe)  // no comparisons to actualKeeper since those are all handled above
+                continue;
+            for (int j = i + 1; j < len; ++j) {
+                const ReadEnds *rhs = readEndsArr[j];
+                if (rhs == pBestRe)  // no comparisons to actualKeeper since those are all handled above
+                    continue;
+                if (opticalDuplicateFlags[i] && opticalDuplicateFlags[j])
+                    continue;  // both already marked, no need to check
+                if (closeEnough(lhs, rhs, DEFAULT_OPTICAL_DUPLICATE_DISTANCE)) {
+                    // At this point we want to mark either lhs or rhs as duplicate. Either could have been marked
+                    // as a duplicate of the keeper (but not both - that's checked above), so be careful about which
+                    // one to now mark as a duplicate.
+                    int index = opticalDuplicateFlags[j] ? i : j;
+                    opticalDuplicateFlags[index] = true;
+                }
+            }
+        }
+    }
+}
+
+/**
+ * Looks through the set of reads and identifies how many of the duplicates are
+ * in fact optical duplicates, and stores the data in the instance level histogram.
+ *
+ * We expect only reads with FR or RF orientations, not a mixture of both.
+ *
+ * In PCR duplicate detection, a duplicates can be a have FR and RF when fixing the orientation order to the first end
+ * of the mate.  In optical duplicate detection, we do not consider them duplicates if one read as FR and the other RF
+ * when we order orientation by the first mate sequenced (read #1 of the pair).
+ */
+static int checkOpticalDuplicates(vector<const ReadEnds *> &readEndsArr, const ReadEnds *pBestRe) {
+    vector<bool> opticalDuplicateFlags(readEndsArr.size(), false);
+    // find OpticalDuplicates
+    findOpticalDuplicates(readEndsArr, pBestRe, &opticalDuplicateFlags);
+    int opticalDuplicates = 0;
+    for (int i = 0; i < opticalDuplicateFlags.size(); ++i) {
+        if (opticalDuplicateFlags[i]) {
+            ++opticalDuplicates;
+            ReadEnds *pRe = const_cast<ReadEnds *>(readEndsArr[i]);
+            pRe->isOpticalDuplicate = true;
+        }
+    }
+    if (opticalDuplicates > 0)
+        gMetrics.OpticalDuplicatesByLibraryId += opticalDuplicates;
+    return opticalDuplicates;
+}
+
+/**
+ * 记录光学原因造成的冗余
+ */
+void trackOpticalDuplicates(vector<const ReadEnds *> &readEndsArr, const ReadEnds *pBestRe) {
+    bool hasFR = false, hasRF = false;
+    // Check to see if we have a mixture of FR/RF
+    for (auto pRe : readEndsArr) {
+        if (ReadEnds::FR == pRe->orientationForOpticalDuplicates)
+            hasFR = true;
+        else if (ReadEnds::RF == pRe->orientationForOpticalDuplicates)
+            hasRF = true;
+    }
+
+    // Check if we need to partition since the orientations could have changed
+    int nOpticalDup;
+    if (hasFR && hasRF) { // need to track them independently
+        vector<const ReadEnds *> trackOpticalDuplicatesF;
+        vector<const ReadEnds *> trackOpticalDuplicatesR;
+        // Split into two lists: first of pairs and second of pairs, 
+        // since they must have orientation and same starting end
+        for (auto pRe: readEndsArr) {
+            if (ReadEnds::FR == pRe->orientationForOpticalDuplicates)
+                trackOpticalDuplicatesF.push_back(pRe);
+            else if (ReadEnds::RF == pRe->orientationForOpticalDuplicates)
+                trackOpticalDuplicatesR.push_back(pRe);
+            else
+                cerr << "Found an unexpected orientation: " << pRe->orientation << endl;
+        }
+        // track the duplicates
+        int nOpticalDupF = checkOpticalDuplicates(trackOpticalDuplicatesF, pBestRe);
+        int nOpticalDupR = checkOpticalDuplicates(trackOpticalDuplicatesR, pBestRe);
+        nOpticalDup = nOpticalDupF + nOpticalDupR;
+    } else {  // No need to partition
+        nOpticalDup = checkOpticalDuplicates(readEndsArr, pBestRe);
+    }
+
+    // trackDuplicateCounts
+    gMetrics.DuplicateCountHist += readEndsArr.size() - nOpticalDup;
+    if (readEndsArr.size() > nOpticalDup)
+        gMetrics.NonOpticalDuplicateCountHist += readEndsArr.size() - nOpticalDup;
+    if (nOpticalDup)
+        gMetrics.OpticalDuplicatesCountHist += nOpticalDup + 1;
+}
\ No newline at end of file
diff --git a/src/sam/markdups/md_funcs.h b/src/sam/markdups/md_funcs.h
index 89655b8..b723f65 100644
--- a/src/sam/markdups/md_funcs.h
+++ b/src/sam/markdups/md_funcs.h
@@ -1,60 +1,64 @@
+#pragma once
+
 #include <robin-map/include/tsl/robin_map.h>
+
+#include <vector>
+#include <queue>
+#include <unordered_set>
+#include <unordered_map>
+
+using std::priority_queue;
+using std::unordered_map;
+using std::unordered_set;
+using std::vector;
+
 /* 前向声明 */
+class BamWrap;
+class ReadEnds;
+class ReadNameParser;
 
 /* 存放readend或者冗余idx，避免频繁的开辟和释放内存 */
 template <class T>
-struct DupContainer
-{
-    vector<vector<T>> arr;               // 类似map<int64_t, set<ReadEnds>> 或 map<int64_t, set<int64_t>>
-    vector<int64_t> pos;                 // arr中每个元素对应的position
+struct DupContainer {
+    vector<vector<T>> arr;  // 类似map<int64_t, set<ReadEnds>> 或 map<int64_t, set<int64_t>>
+    vector<int64_t> pos;    // arr中每个元素对应的position
     // unordered_map<int64_t, int64_t> idx; // 某个位点对应在vector中的坐标
-    tsl::robin_map<int64_t, int64_t> idx;    // 某个位点对应在vector中的坐标
-    int64_t size = 0;                        // 实际使用的空间
-    int64_t capacity = 0;                    // 内存容量
-    inline void Init()
-    {
+    tsl::robin_map<int64_t, int64_t> idx;  // 某个位点对应在vector中的坐标
+    int64_t size = 0;                      // 实际使用的空间
+    int64_t capacity = 0;                  // 内存容量
+    inline void Init() {
         idx.clear();
         size = 0;
     }
-    inline void SortAtPos(int64_t p) // 这里的pos表示位点
+    inline void SortAtPos(int64_t p)  // 这里的pos表示位点
     {
-        if (idx.find(p) != idx.end())
-        {
+        if (idx.find(p) != idx.end()) {
             const int64_t i = idx.at(p);
             std::sort(arr[i].begin(), arr[i].end());
         }
     }
-    inline void SortAtIdx(int64_t i) // 这里的i表示vector的索引
+    inline void SortAtIdx(int64_t i)  // 这里的i表示vector的索引
     {
         std::sort(arr[i].begin(), arr[i].end());
     }
-    inline void RemoveAtPos(int64_t p)
-    {
-        if (idx.find(p) != idx.end())
-        {
+    inline void RemoveAtPos(int64_t p) {
+        if (idx.find(p) != idx.end()) {
             const int64_t i = idx.at(p);
             arr[i].clear();
         }
     }
-    inline void RemoveAtIdx(int64_t i) // 这里的i表示vector的索引
+    inline void RemoveAtIdx(int64_t i)  // 这里的i表示vector的索引
     {
         arr[i].clear();
     }
-    inline void AddAtPos(int64_t p, T &val)
-    {
-        AtPos(p).push_back(val);
-    }
-    
-    inline vector<T> &AtPos(int64_t p)
-    {
-        if (idx.find(p) != idx.end())
-        {
+    inline void AddAtPos(int64_t p, T &val) { AtPos(p).push_back(val); }
+
+    inline vector<T> &AtPos(int64_t p) {
+        if (idx.find(p) != idx.end()) {
             const int64_t i = idx.at(p);
             return arr[i];
         }
-
-        if (size >= capacity)
-        {
+        if (size >= capacity) {
             capacity += 1;
             arr.push_back(vector<T>());
             pos.push_back(0);
@@ -67,308 +71,131 @@ struct DupContainer
     }
 };
 
-/* 清除key位置的数据 */
-static inline void clearIdxAtPos(int64_t key, map<int64_t, set<int64_t>> *pmsIdx)
-{
-    auto &msIdx = *pmsIdx;
-    if (msIdx.find(key) != msIdx.end())
-        msIdx[key].clear(); // 清除该位点的冗余结果
-}
 
-/* 删除key位置的数据 */
-static inline void delIdxAtPos(int64_t key, map<int64_t, set<int64_t>> *pmsIdx)
-{
-    auto &msIdx = *pmsIdx;
-    if (msIdx.find(key) != msIdx.end())
-        msIdx.erase(key);
-}
+/*
+ * 优先队列，用最小堆来实现对所有冗余索引的排序
+ */
+struct PairArrIdIdx {
+    int arrId = 0;
+    uint64_t arrIdx = 0;
+    int64_t dupIdx = 0;
+};
+
+struct IdxGreaterThan {
+    bool operator()(const PairArrIdIdx &a, const PairArrIdIdx &b) { return a.dupIdx > b.dupIdx; }
+};
+
+struct DupIdxQueue {
+    // 将冗余索引和他对应的task vector对应起来
+
+    // 由于是多个task来查找冗余，所以每次找到的冗余index都放在一个独立的vector中，vector之间可能有重叠，所以需要用一个最小堆来维护
+    vector<vector<int64_t>> *dupIdx2DArr;
+    priority_queue<PairArrIdIdx, vector<PairArrIdIdx>, IdxGreaterThan> minHeap;
+    uint64_t popNum = 0;
+
+    int Init(vector<vector<int64_t>> *_dupIdx2DArr) { 
+        dupIdx2DArr = _dupIdx2DArr;
+        if (dupIdx2DArr == nullptr) {
+            return -1;
+        }
+        for (int i = 0; i < dupIdx2DArr->size(); ++i) {
+            auto &v = (*dupIdx2DArr)[i];
+            if (!v.empty()) {
+                minHeap.push({i, 1, v[0]});
+            }
+        }
+        return 0;
+    }
+
+    int64_t Pop() {
+        int64_t ret = -1;
+        if (!minHeap.empty()) {
+            auto idx = minHeap.top();
+            minHeap.pop();
+            ++popNum;
+            ret = idx.dupIdx;
+            auto &v = (*dupIdx2DArr)[idx.arrId];
+            if (v.size() > idx.arrIdx) {
+                minHeap.push({idx.arrId, idx.arrIdx + 1, v[idx.arrIdx]});
+            }
+        }
+        return ret;
+    }
+
+    uint64_t Size() {
+        uint64_t len = 0;
+        if (dupIdx2DArr != nullptr) {
+            for (auto &v : *dupIdx2DArr) {
+                len += v.size();
+            }
+        }
+        return len - popNum;
+    }
+};
+
+/*
+ * 用来检测optical duplication的graph
+ */
+template <class Node>
+struct Graph {         // 用set？
+    unordered_set<Node> nodes; // 图中的结点
+    unordered_map<Node*, unordered_set<Node*>> neighbors;  // 邻接列表
+
+    Node *addNode(const Node &singleton) {
+        if (nodes.find(singleton) == nodes.end()) {
+            Node *n = const_cast<Node *>(&(*nodes.insert(singleton).first));
+            neighbors[n].clear();
+            return n;
+        }
+        return const_cast<Node *>(&(*nodes.find(singleton)));
+    }
+};
 
 /*
  * 计算read的分数
  */
-static int16_t computeDuplicateScore(BamWrap &bw)
-{
-    int16_t score = 0;
-    switch (g_mdArg.DUPLICATE_SCORING_STRATEGY)
-    {
-    case ns_md::SUM_OF_BASE_QUALITIES:
-        // two (very) long reads worth of high-quality bases can go over Short.MAX_VALUE/2
-        // and risk overflow.
-        score += (int16_t)min(bw.GetSumOfBaseQualities(), INT16_MAX / 2);
-        break;
-    case ns_md::TOTAL_MAPPED_REFERENCE_LENGTH:
-        if (!bw.GetReadUnmappedFlag())
-            // no need to remember the score since this scoring mechanism is symmetric
-            score = (int16_t)min(bw.GetReferenceLength(), INT16_MAX / 2);
-        break;
-    case ns_md::RANDOM:
-        // The RANDOM score gives the same score to both reads so that they get filtered together.
-        // it's not critical do use the readName since the scores from both ends get added, but it seem
-        // to be clearer this way.
-        score += (short)(Murmur3::Instance().HashUnencodedChars(bw.query_name()) & 0b11111111111111);
-        // subtract Short.MIN_VALUE/4 from it to end up with a number between
-        // 0 and Short.MAX_VALUE/2. This number can be then discounted in case the read is
-        // not passing filters. We need to stay far from overflow so that when we add the two
-        // scores from the two read mates we do not overflow since that could cause us to chose a
-        // failing read-pair instead of a passing one.
-        score -= INT16_MIN / 4;
-    default:
-        break;
-    }
-    // make sure that filter-failing records are heavily discounted. (the discount can happen twice, once
-    // for each mate, so need to make sure we do not subtract more than Short.MIN_VALUE overall.)
-    score += bw.GetReadFailsVendorQualityCheckFlag() ? (int16_t)(INT16_MIN / 2) : 0;
-
-    return score;
-}
+int16_t computeDuplicateScore(BamWrap &bw);
 
 /*
- * Builds a read ends object that represents a single read. 用来表示一个read的特征结构
+ * Builds a read ends object that represents a single read.
+ * 用来表示一个read的特征结构
  */
-static void buildReadEnds(BamWrap &bw, int64_t index, ReadNameParser &rnParser, ReadEnds *pKey)
-{
-    auto &k = *pKey;
-    auto &bc = bw.b->core;
-    k.read1FirstOfPair = bw.GetFirstOfPairFlag();
-    k.read1ReferenceIndex = bc.tid;
-    k.read1Coordinate = (bc.flag & BAM_FREVERSE) ? bw.GetUnclippedEnd() : bw.GetUnclippedStart();
-    k.orientation = (bc.flag & BAM_FREVERSE) ? ReadEnds::R : ReadEnds::F;
+void buildReadEnds(BamWrap &bw, int64_t index, ReadNameParser &rnParser, ReadEnds *pKey);
 
-    k.read1IndexInFile = index;
-    k.score = computeDuplicateScore(bw);
-    // Doing this lets the ends object know that it's part of a pair
-    if (bw.GetReadPairedFlag() && !bw.GetMateUnmappedFlag())
-    {
-        k.read2ReferenceIndex = bc.mtid;
-    }
-    // Fill in the location information for optical duplicates
-    rnParser.AddLocationInformation(bw.query_name(), pKey);
-    // cout << k.tile << ' ' << k.x << ' ' << k.y << endl;
-    // 计算位置key
-    k.posKey = BamWrap::bam_global_pos(k.read1ReferenceIndex, k.read1Coordinate); // << 1 | k.orientation;
-}
+/*
+ * 处理一组pairend的readends，标记冗余
+ */
+void markDuplicatePairs(int64_t posKey, vector<const ReadEnds *> &vpRe,
+                        DupContainer<int64_t> *dupIdx, DupContainer<int64_t> *opticalDupIdx);
+
+/*
+ * 处理一组非paired的readends，标记冗余
+ */
+void markDuplicateFragments(int64_t posKey, vector<const ReadEnds *> &vpRe, bool containsPairs,
+                            DupContainer<int64_t> *dupIdx);
+
+/*
+ * 处理位于某个坐标的pairend reads
+ */
+void handlePairs(int64_t posKey, vector<ReadEnds> &readEnds, vector<const ReadEnds *> &vpCache,
+                 DupContainer<int64_t> *dupIdx, DupContainer<int64_t> *opticalDupIdx);
+
+/*
+ * 处理位于某个坐标的非配对的frag reads
+ */
+void handleFrags(int64_t posKey, vector<ReadEnds> &readEnds, vector<const ReadEnds *> &vpCache,
+                 DupContainer<int64_t> *dupIdx);
+
+/*
+ * 对找到的pairend read end添加一些信息
+ */
+void modifyPairedEnds(const ReadEnds &fragEnd, ReadEnds *pPairedEnds);
 
 /**
- * Takes a list of ReadEndsForMarkDuplicates objects and identify the representative read based on
- * quality score. For all members of the duplicate set, add the read1 index-in-file of the representative
- * read to the records of the first and second in a pair. This value becomes is used for
- * the 'DI' tag.
+ * Looks through the set of reads and identifies how many of the duplicates are
+ * in fact optical duplicates, and stores the data in the instance level histogram.
+ * Additionally sets the transient isOpticalDuplicate flag on each read end that is
+ * identified as an optical duplicate.
+ * 记录光学原因造成的冗余
  */
-static void addRepresentativeReadIndex(vector<const ReadEnds *> &vpRe)
-{
-}
-
-/* 处理一组pairend的readends，标记冗余 */
-static void markDuplicatePairs(int64_t posKey,
-                               vector<const ReadEnds *> &vpRe,
-                               DupContainer<int64_t> *dupIdx,
-                               DupContainer<int64_t> *opticalDupIdx)
-{
-    if (vpRe.size() < 2)
-    {
-        if (vpRe.size() == 1)
-        {
-            // addSingletonToCount(libraryIdGenerator);
-        }
-        return;
-    }
-    // cout << "pos:" << posKey + 1 << ";size:" << vpRe.size() << endl;
-    auto &vDupIdx = dupIdx->AtPos(posKey);
-    auto &vOpticalDupIdx = opticalDupIdx->AtPos(posKey);
-
-    int maxScore = 0;
-    const ReadEnds *pBest = nullptr;
-    /** All read ends should have orientation FF, FR, RF, or RR **/
-    for (auto pe : vpRe) // 找分数最高的readend
-    {
-        if (pe->score > maxScore || pBest == nullptr)
-        {
-            maxScore = pe->score;
-            pBest = pe;
-        }
-    }
-    if (!g_mdArg.READ_NAME_REGEX.empty()) // 检查光学冗余
-    {
-        // trackOpticalDuplicates
-    }
-    for (auto pe : vpRe) // 对非best read标记冗余
-    {
-        if (pe != pBest) // 非best
-        {
-            vDupIdx.push_back(pe->read1IndexInFile); // 添加read1
-            if (pe->read2IndexInFile != pe->read1IndexInFile)
-                vDupIdx.push_back(pe->read2IndexInFile); // 添加read2
-        }
-    }
-
-    if (g_mdArg.TAG_DUPLICATE_SET_MEMBERS)
-    {
-        addRepresentativeReadIndex(vpRe);
-    }
-}
-
-/* 处理一组非paired的readends，标记冗余 */
-static void markDuplicateFragments(int64_t posKey,
-                                   vector<const ReadEnds *> &vpRe,
-                                   bool containsPairs,
-                                   DupContainer<int64_t> *dupIdx)
-{
-    auto &vDupIdx = dupIdx->AtPos(posKey);
-
-    if (containsPairs)
-    {
-        for (auto pe : vpRe)
-        {
-            if (!pe->IsPaired())
-            {
-                vDupIdx.push_back(pe->read1IndexInFile);
-            }
-        }
-    }
-    else
-    {
-        int maxScore = 0;
-        const ReadEnds *pBest = nullptr;
-        for (auto pe : vpRe)
-        {
-            if (pe->score > maxScore || pBest == nullptr)
-            {
-                maxScore = pe->score;
-                pBest = pe;
-            }
-        }
-
-        for (auto pe : vpRe)
-        {
-            if (pe != pBest)
-            {
-                vDupIdx.push_back(pe->read1IndexInFile);
-            }
-        }
-    }
-}
-
-/* 处理位于某个坐标的pairend reads */
-static inline void handlePairs(int64_t posKey,
-                               vector<ReadEnds> &readEnds,
-                               vector<const ReadEnds *> &vpCache,
-                               DupContainer<int64_t> *dupIdx,
-                               DupContainer<int64_t> *opticalDupIdx)
-{
-    if (readEnds.size() > 1) // 有潜在的冗余
-    {
-        vpCache.clear();
-//        std::sort(readEnds.begin(), readEnds.end());
-        const ReadEnds *pReadEnd = nullptr;
-        for (auto &re : readEnds)
-        {
-            if (pReadEnd != nullptr && ReadEnds::AreComparableForDuplicates(*pReadEnd, re, true)) // 跟前一个一样
-                vpCache.push_back(&re);                                                           // 处理一个潜在的冗余组
-            else
-            {
-                markDuplicatePairs(posKey, vpCache, dupIdx, opticalDupIdx); // 不一样
-                vpCache.clear();
-                vpCache.push_back(&re);
-                pReadEnd = &re;
-            }
-        }
-        markDuplicatePairs(posKey, vpCache, dupIdx, opticalDupIdx);
-    }
-}
-
-/* 处理位于某个坐标的 reads */
-static inline void handleFrags(
-    int64_t posKey,
-    vector<ReadEnds> &readEnds,
-    vector<const ReadEnds *> &vpCache,
-    DupContainer<int64_t> *dupIdx)
-{
-    if (readEnds.size() > 1) // 有潜在的冗余
-    {
-        vpCache.clear();
-//        std::sort(readEnds.begin(), readEnds.end());
-        const ReadEnds *pReadEnd = nullptr;
-        bool containsPairs = false;
-        bool containsFrags = false;
-        for (auto &re : readEnds)
-        {
-            if (pReadEnd != nullptr && ReadEnds::AreComparableForDuplicates(*pReadEnd, re, false))
-            {
-                vpCache.push_back(&re);
-                containsPairs = containsPairs || re.IsPaired();
-                containsFrags = containsFrags || !re.IsPaired();
-            }
-            else
-            {
-                if (vpCache.size() > 1 && containsFrags)
-                {
-                    markDuplicateFragments(posKey, vpCache, containsPairs, dupIdx);
-                }
-                vpCache.clear();
-                vpCache.push_back(&re);
-                pReadEnd = &re;
-                containsPairs = re.IsPaired();
-                containsFrags = !re.IsPaired();
-            }
-        }
-        if (vpCache.size() > 1 && containsFrags)
-        {
-            markDuplicateFragments(posKey, vpCache, containsPairs, dupIdx);
-        }
-    }
-}
-
-/* 对找到的pairend read end添加一些信息 */
-static inline void modifyPairedEnds(const ReadEnds &fragEnd, ReadEnds *pPairedEnds)
-{
-    auto &pairedEnds = *pPairedEnds;
-    int64_t bamIdx = fragEnd.read1IndexInFile;
-    const int matesRefIndex = fragEnd.read1ReferenceIndex;
-    const int matesCoordinate = fragEnd.read1Coordinate;
-    // Set orientationForOpticalDuplicates, which always goes by the first then the second end for the strands.  NB: must do this
-    // before updating the orientation later.
-    if (fragEnd.read1FirstOfPair)
-    {
-        pairedEnds.orientationForOpticalDuplicates =
-            ReadEnds::GetOrientationByte(fragEnd.IsNegativeStrand(), pairedEnds.orientation == ReadEnds::R);
-    }
-    else
-    {
-        pairedEnds.orientationForOpticalDuplicates =
-            ReadEnds::GetOrientationByte(pairedEnds.orientation == ReadEnds::R, fragEnd.IsNegativeStrand());
-    }
-    // If the other read is actually later, simply add the other read's data as read2, else flip the reads
-    if (matesRefIndex > pairedEnds.read1ReferenceIndex ||
-        (matesRefIndex == pairedEnds.read1ReferenceIndex && matesCoordinate >= pairedEnds.read1Coordinate))
-    {
-        pairedEnds.read2ReferenceIndex = matesRefIndex;
-        pairedEnds.read2Coordinate = matesCoordinate;
-        pairedEnds.read2IndexInFile = bamIdx;
-        pairedEnds.orientation = ReadEnds::GetOrientationByte(pairedEnds.orientation == ReadEnds::R,
-                                                              fragEnd.IsNegativeStrand());
-
-        // if the two read ends are in the same position, pointing in opposite directions,
-        // the orientation is undefined and the procedure above
-        // will depend on the order of the reads in the file.
-        // To avoid this, we set it explicitly (to FR):
-        if (pairedEnds.read2ReferenceIndex == pairedEnds.read1ReferenceIndex &&
-            pairedEnds.read2Coordinate == pairedEnds.read1Coordinate &&
-            pairedEnds.orientation == ReadEnds::RF)
-        {
-            pairedEnds.orientation = ReadEnds::FR;
-        }
-    }
-    else
-    {
-        pairedEnds.read2ReferenceIndex = pairedEnds.read1ReferenceIndex;
-        pairedEnds.read2Coordinate = pairedEnds.read1Coordinate;
-        pairedEnds.read2IndexInFile = pairedEnds.read1IndexInFile;
-        pairedEnds.read1ReferenceIndex = matesRefIndex;
-        pairedEnds.read1Coordinate = matesCoordinate;
-        pairedEnds.read1IndexInFile = bamIdx;
-        pairedEnds.orientation = ReadEnds::GetOrientationByte(fragEnd.IsNegativeStrand(),
-                                                              pairedEnds.orientation == ReadEnds::R);
-        pairedEnds.posKey = fragEnd.posKey;
-    }
-    pairedEnds.score += fragEnd.score;
-}
\ No newline at end of file
+void trackOpticalDuplicates(vector<const ReadEnds *> &readEndsArr, const ReadEnds *pBestRe);
\ No newline at end of file
diff --git a/src/sam/markdups/parallel_md.cpp b/src/sam/markdups/parallel_md.cpp
new file mode 100644
index 0000000..e69de29
diff --git a/src/sam/markdups/serial_md.cpp b/src/sam/markdups/serial_md.cpp
new file mode 100644
index 0000000..ed1dcc7
--- /dev/null
+++ b/src/sam/markdups/serial_md.cpp
@@ -0,0 +1,783 @@
+#include "serial_md.h"
+
+#include <common/hts/bam_buf.h>
+#include <common/utils/debug.h>
+#include <common/utils/global_arg.h>
+#include <common/utils/profiling.h>
+#include <common/utils/timer.h>
+#include <common/utils/util.h>
+#include <sam/utils/read_ends.h>
+
+#include <algorithm>
+#include <iostream>
+#include <set>
+#include <vector>
+
+#include "markdups_arg.h"
+#include "md_funcs.h"
+#include "shared_args.h"
+#include "dup_metrics.h"
+
+using std::cout;
+using std::set;
+using std::vector;
+
+/* 查找 */
+// template<class Itr, class T>
+// static inline Itr binaryFind(Itr first, Itr last, const T &val)
+// {
+//     first = std::lower_bound(first, last, val);
+//     return (first != last && *first == val) ? first : last;
+// }
+
+/* 排序 */
+static inline void sortReadEndsArr(vector<ReadEnds> &arr) {
+    size_t blockSize = 64 * 1024;
+    if (arr.size() < blockSize) {
+        std::sort(arr.begin(), arr.end());
+        return;
+    }
+    size_t blockNum = (arr.size() + blockSize - 1) / blockSize;
+    size_t crossNum = 1024;
+    size_t start, end, i, left, right;
+    std::sort(arr.begin(), arr.begin() + blockSize);
+    for (i = 1; i < blockNum; ++i) {
+        start = i * blockSize;
+        end = min(start + blockSize, arr.size());
+        std::sort(arr.begin() + start, arr.begin() + end);
+        left = crossNum;
+        while (!(arr[start - left] < arr[start])) {
+            left = left << 1;
+            if (left >= blockSize) {
+                std::sort(arr.begin(), arr.end());  // 退化到普通排序
+                return;
+            }
+        }
+        right = min(crossNum, end - start - 1);
+
+        while (!(arr[start - 1] < arr[start + right])) {
+            right = min(right << 1, end - start - 1);
+            if (right == end - start - 1)
+                break;
+        }
+        std::sort(arr.begin() + start - left, arr.begin() + start + right);
+    }
+}
+
+/* 处理一组pairend的readends，标记冗余, 这个函数需要串行运行，因为需要做一些统计*/
+static void markDupsForPairs(vector<const ReadEnds *> &vpRe, set<int64_t> *dupIdx, set<int64_t> *opticalDupIdx,
+                             set<int64_t> *notDupIdx = nullptr) {
+    if (vpRe.size() < 2) {
+        if (vpRe.size() == 1) {
+            // addSingletonToCount(libraryIdGenerator);
+            // 这个统计可能会有误差，因为当前位点可能还有没匹配上的read，导致当前位点的read（paired）数量为1
+            // 可以通过后续的补充计算来解决这个问题，有必要么？
+            gMetrics.AddSingletonToCount();
+        }
+        return;
+    }
+    int maxScore = 0;
+    const ReadEnds *pBest = nullptr;
+    /** All read ends should have orientation FF, FR, RF, or RR **/
+    for (auto pe : vpRe) { // 找分数最高的readend
+        if (pe->score > maxScore || pBest == nullptr) {
+            maxScore = pe->score;
+            pBest = pe;
+        }
+    }
+    if (notDupIdx != nullptr) {
+        notDupIdx->insert(pBest->read1IndexInFile);
+        notDupIdx->insert(pBest->read2IndexInFile);
+    }
+    if (!g_mdArg.READ_NAME_REGEX.empty()) { // 检查光学冗余
+        // trackOpticalDuplicates
+        trackOpticalDuplicates(vpRe, pBest);
+    }
+    for (auto pe : vpRe)  // 对非best read标记冗余
+    {
+        if (pe != pBest)  // 非best
+        {
+            dupIdx->insert(pe->read1IndexInFile);  // 添加read1
+            if (pe->read2IndexInFile != pe->read1IndexInFile)
+                dupIdx->insert(pe->read2IndexInFile);  // 添加read2
+        }
+    }
+    // 在输出的bam文件中添加tag
+    if (g_mdArg.TAG_DUPLICATE_SET_MEMBERS) {
+        // addRepresentativeReadIndex(vpRe); // 每次都更新就行，用最新的覆盖之前的（如果之前有）
+    }
+}
+
+/* 处理一组非paired的readends，标记冗余 */
+static void markDupsForFrags(vector<const ReadEnds *> &vpRe, bool containsPairs, set<int64_t> *dupIdx,
+                             set<int64_t> *notDupIdx = nullptr) {
+    if (containsPairs) {
+        for (auto pe : vpRe) {
+            if (!pe->IsPaired()) {
+                dupIdx->insert(pe->read1IndexInFile);
+            }
+        }
+    } else {
+        int maxScore = 0;
+        const ReadEnds *pBest = nullptr;
+        for (auto pe : vpRe) {
+            if (pe->score > maxScore || pBest == nullptr) {
+                maxScore = pe->score;
+                pBest = pe;
+            }
+        }
+        if (notDupIdx != nullptr) {
+            notDupIdx->insert(pBest->read1IndexInFile);
+        }
+        for (auto pe : vpRe) {
+            if (pe != pBest) {
+                dupIdx->insert(pe->read1IndexInFile);
+            }
+        }
+    }
+}
+
+/* 找到与readend pos相等的所有readend */
+static void getEqualRE(const ReadEnds &re, vector<ReadEnds> &src, vector<ReadEnds> *dst) {
+    auto range = std::equal_range(src.begin(), src.end(), re,
+                                  ReadEnds::PairsLittleThan);  // 只比对位点
+    dst->insert(dst->end(), range.first, range.second);
+}
+
+/* 单线程生成readends (第一步)*/
+static void generateReadEnds(SerailMarkDupArg *arg) {
+    auto &p = *arg;
+    auto &rnParser = g_vRnParser[0];
+
+    p.pairs.clear();
+    p.frags.clear();
+    p.unpairedDic.clear();
+    p.unpairedPosArr.clear();
+
+    /* 处理每个read，创建ReadEnd，并放入frag和pair中 */
+    set<ReadEnds> reSet;
+
+    ReadEnds lastRe;
+
+    for (int i = 0; i < p.bams.size(); ++i)  // 循环处理每个read
+    {
+        BamWrap *bw = p.bams[i];
+        const int64_t bamIdx = p.bamStartIdx + i;
+        if (bw->GetReadUnmappedFlag()) {
+            if (bw->b->core.tid == -1)
+                // When we hit the unmapped reads with no coordinate, no reason
+                // to continue (only in coordinate sort).
+                break;
+        } else if (!bw->IsSecondaryOrSupplementary())  // 是主要比对
+        {
+            ReadEnds fragEnd;
+            tm_arr[8].acc_start();
+            buildReadEnds(*bw, bamIdx, rnParser, &fragEnd);
+            tm_arr[8].acc_end();
+            p.frags.push_back(fragEnd);                                 // 添加进frag集合
+            if (bw->GetReadPairedFlag() && !bw->GetMateUnmappedFlag())  // 是pairend而且互补的read也比对上了
+            {
+                string key = bw->query_name();
+                if (p.unpairedDic.find(key) == p.unpairedDic.end()) {
+                    p.unpairedDic[key] = {p.taskSeq, fragEnd};
+                } else  // 找到了pairend
+                {
+                    auto &pairedEnds = p.unpairedDic.at(key).unpairedRE;
+                    modifyPairedEnds(fragEnd, &pairedEnds);
+                    p.pairs.push_back(pairedEnds);
+                    p.unpairedDic.erase(key);  // 删除找到的pairend
+                }
+            }
+        }
+    }
+    tm_arr[9].acc_start();
+    sortReadEndsArr(p.frags);
+    // sort(p.frags.begin(), p.frags.end());
+    tm_arr[9].acc_end();
+    // cout << "sort pairs" << endl;
+    tm_arr[10].acc_start();
+    sort(p.pairs.begin(), p.pairs.end());
+    tm_arr[10].acc_end();
+    // 记录位点上的未匹配的read个数
+    for (auto &e : p.unpairedDic) {
+        auto posKey = e.second.unpairedRE.posKey;
+        auto &unpairArrInfo = p.unpairedPosArr[posKey];
+        unpairArrInfo.unpairedNum++;
+        unpairArrInfo.taskSeq = p.taskSeq;
+        unpairArrInfo.readNameSet.insert(e.first);
+    }
+    // cout << "依赖比例：" <<  (float)p.unpairedDic.size() / p.frags.size() <<
+    // endl;
+}
+
+/* 处理pairs */
+static void processPairs(vector<ReadEnds> &readEnds, set<int64_t> *dupIdx, set<int64_t> *opticalDupIdx,
+                         set<int64_t> *notDupIdx = nullptr) {
+    vector<const ReadEnds *> vpCache;  // 有可能是冗余的reads
+    const ReadEnds *pReadEnd = nullptr;
+    for (auto &re : readEnds) {
+        if (pReadEnd != nullptr && ReadEnds::AreComparableForDuplicates(*pReadEnd, re, true))  // 跟前一个一样
+            vpCache.push_back(&re);  // 处理一个潜在的冗余组
+        else {
+            markDupsForPairs(vpCache, dupIdx, opticalDupIdx, notDupIdx);  // 不一样
+            vpCache.clear();
+            vpCache.push_back(&re);
+            pReadEnd = &re;
+        }
+    }
+    markDupsForPairs(vpCache, dupIdx, opticalDupIdx, notDupIdx);
+}
+
+/* 处理frags */
+static void processFrags(vector<ReadEnds> &readEnds, set<int64_t> *dupIdx, set<int64_t> *notDupIdx = nullptr) {
+    bool containsPairs = false;
+    bool containsFrags = false;
+    vector<const ReadEnds *> vpCache;  // 有可能是冗余的reads
+    const ReadEnds *pReadEnd = nullptr;
+    for (auto &re : readEnds) {
+        if (pReadEnd != nullptr && ReadEnds::AreComparableForDuplicates(*pReadEnd, re, false)) {
+            vpCache.push_back(&re);
+            containsPairs = containsPairs || re.IsPaired();
+            containsFrags = containsFrags || !re.IsPaired();
+        } else {
+            if (vpCache.size() > 1 && containsFrags) {
+                markDupsForFrags(vpCache, containsPairs, dupIdx, notDupIdx);
+            }
+            vpCache.clear();
+            vpCache.push_back(&re);
+            pReadEnd = &re;
+            containsPairs = re.IsPaired();
+            containsFrags = !re.IsPaired();
+        }
+    }
+    if (vpCache.size() > 1 && containsFrags) {
+        markDupsForFrags(vpCache, containsPairs, dupIdx, notDupIdx);
+    }
+}
+
+/* 单线程markdup (第二步)*/
+static void markdups(SerailMarkDupArg *arg) {
+    auto &p = *arg;
+    p.pairDupIdx.clear();
+    p.pairOpticalDupIdx.clear();
+    p.fragDupIdx.clear();
+    /* generateDuplicateIndexes，计算冗余read在所有read中的位置索引 */
+    // 先处理 pair
+    processPairs(p.pairs, &p.pairDupIdx, &p.pairOpticalDupIdx);
+
+    // 再处理frag
+    processFrags(p.frags, &p.fragDupIdx);
+}
+
+/* 获取交叉部分的数据 */
+static inline void getIntersectData(vector<ReadEnds> &leftArr, vector<ReadEnds> &rightArr, vector<ReadEnds> *dst,
+                                    bool isPairCmp = false) {
+    if (leftArr.empty() || rightArr.empty()) {
+        cout << "bad size: " << leftArr.size() << '\t' << rightArr.size() << endl;
+        return;
+    }
+    const size_t leftEndIdx = leftArr.size() - 1;
+    const size_t rightStartIdx = 0;
+    size_t leftSpan = 0;
+    size_t rightSpan = 0;
+
+    while (!ReadEnds::ReadLittleThan(leftArr[leftEndIdx - leftSpan], rightArr[rightStartIdx], isPairCmp)) {
+        leftSpan += 1;
+        if (leftSpan > leftEndIdx) {
+            leftSpan = leftArr.size() - 1;
+            break;
+        }
+    }
+
+    while (!ReadEnds::ReadLittleThan(leftArr[leftEndIdx], rightArr[rightSpan], isPairCmp)) {
+        rightSpan += 1;
+        if (rightSpan == rightArr.size() - 1)
+            break;
+    }
+    dst->insert(dst->end(), leftArr.end() - leftSpan, leftArr.end());
+    dst->insert(dst->end(), rightArr.begin(), rightArr.begin() + rightSpan);
+    std::sort(dst->begin(), dst->end());
+}
+
+/* 将frags重叠部分的dup idx放进数据中 */
+static inline void refreshFragDupIdx(set<int64_t> &dupIdx, set<int64_t> &notDupIdx, SerailMarkDupArg *lastArg,
+                                     SerailMarkDupArg *curArg) {
+    auto &lp = *lastArg;
+    auto &p = *curArg;
+    for (auto idx : dupIdx) {
+        lp.fragDupIdx.insert(idx);
+        p.fragDupIdx.erase(idx);
+    }
+    for (auto idx : notDupIdx) {
+        lp.fragDupIdx.erase(idx);
+        p.fragDupIdx.erase(idx);
+    }
+}
+
+/* 将pairs重叠部分的dup idx放进数据中 */
+static inline void refreshPairDupIdx(set<int64_t> &dupIdx, set<int64_t> &opticalDupIdx, set<int64_t> &notDupIdx,
+                                     SerailMarkDupArg *lastArg, SerailMarkDupArg *curArg) {
+    auto &lp = *lastArg;
+    auto &p = *curArg;
+    for (auto idx : dupIdx) {
+        lp.pairDupIdx.insert(idx);
+        p.pairDupIdx.erase(idx);
+    }
+    for (auto idx : opticalDupIdx) {
+        lp.pairOpticalDupIdx.insert(idx);
+        p.pairOpticalDupIdx.erase(idx);
+    }
+    for (auto idx : notDupIdx) {
+        lp.pairDupIdx.erase(idx);
+        lp.pairOpticalDupIdx.erase(idx);
+        p.pairDupIdx.erase(idx);
+        p.pairOpticalDupIdx.erase(idx);
+    }
+}
+
+// 用来分别处理dup和optical dup
+static void refeshTaskDupInfo(set<int64_t> &dupIdx, set<int64_t> &opticalDupIdx, set<int64_t> &notDupIdx,
+                              set<int64_t> &latterDupIdx, set<int64_t> &latterOpticalDupIdx,
+                              set<int64_t> &latterNotDupIdx) {
+    for (auto idx : dupIdx) latterDupIdx.insert(idx);
+    for (auto idx : opticalDupIdx) latterOpticalDupIdx.insert(idx);
+    for (auto idx : notDupIdx) latterNotDupIdx.insert(idx);
+}
+
+/* 最后合并数据并排序 */
+static void refeshFinalTaskDupInfo(set<int64_t> &dupIdx, set<int64_t> &notDupIdx, vector<int64_t> &dupArr) {
+    vector<int64_t> midArr;
+
+    auto ai = dupArr.begin();
+    auto bi = dupIdx.begin();
+    auto ae = dupArr.end();
+    auto be = dupIdx.end();
+
+    int64_t val = 0;
+    while (ai != ae && bi != be) {
+        if (*ai < *bi) {
+            val = *ai++;
+        } else if (*bi < *ai) {
+            val = *bi++;
+        } else {
+            val = *ai++;
+            bi++;
+        }
+        if (notDupIdx.find(val) == notDupIdx.end()) {
+            midArr.push_back(val);
+        }
+    }
+    while (ai != ae) {
+        val = *ai++;
+        if (notDupIdx.find(val) == notDupIdx.end()) {
+            midArr.push_back(val);
+        }
+    }
+    while (bi != be) {
+        val = *bi++;
+        if (notDupIdx.find(val) == notDupIdx.end()) {
+            midArr.push_back(val);
+        }
+    }
+    dupArr = midArr;
+}
+
+/* 处理相邻的两个任务，有相交叉的数据  */
+static void handleIntersectData(SerailMarkDupArg *lastArg, SerailMarkDupArg *curArg, GlobalDataArg *gDataArg) {
+    auto &lp = *lastArg;
+    auto &p = *curArg;
+    auto &g = *gDataArg;
+
+    vector<ReadEnds> reArr;
+    set<int64_t> dupIdx;
+    set<int64_t> notDupIdx;
+    // 先处理重叠的frags
+    getIntersectData(lp.frags, p.frags, &reArr);
+    processFrags(reArr, &dupIdx, &notDupIdx);
+    refreshFragDupIdx(dupIdx, notDupIdx, &lp, &p);
+
+    // 再处理重叠的pairs
+    reArr.clear();
+    dupIdx.clear();
+    notDupIdx.clear();
+    set<int64_t> opticalDupIdx;
+    getIntersectData(lp.pairs, p.pairs, &reArr, true);
+    processPairs(reArr, &dupIdx, &opticalDupIdx, &notDupIdx);
+    refreshPairDupIdx(dupIdx, opticalDupIdx, notDupIdx, &lp, &p);
+
+    // 处理之前未匹配的部分
+    map<CalcKey, int64_t> recalcPos;
+    set<CalcKey> alreadyAdd;  // 与该位点相同的pair都添加到数组里了
+    set<int64_t> addToGlobal;
+    int64_t prevLastPos = 0, nextFirstPos = 0;
+    if (lp.frags.size() > 0)
+        prevLastPos = lp.frags.back().posKey;
+    if (p.frags.size() > 0)
+        nextFirstPos = p.frags[0].posKey;
+    // cout << "range: " << nextFirstPos << '\t' << prevLastPos << endl;
+    for (auto &prevUnpair : lp.unpairedDic) {  // 遍历上一个任务中的每个未匹配的read
+        auto &readName = prevUnpair.first;
+        auto &prevPosInfo = prevUnpair.second;
+        auto prevFragEnd = prevPosInfo.unpairedRE;  // 未匹配的read end
+
+        if (p.unpairedDic.find(readName) != p.unpairedDic.end()) {  // 在当前这个任务里找到了这个未匹配的read
+            auto &nextPosInfo = p.unpairedDic[readName];
+            auto &nextFragEnd = nextPosInfo.unpairedRE;
+            int64_t prevPosKey = prevFragEnd.posKey;
+            modifyPairedEnds(nextFragEnd, &prevFragEnd);  // 在某些clip情况下，poskey可能是后面的read
+            int64_t nextPosKey = max(prevPosKey, nextFragEnd.posKey);
+            CalcKey ck = {prevPosKey, nextPosKey};
+            UnpairedPosInfo *prevUnpairInfoP = nullptr;
+            UnpairedPosInfo *nextUnpairInfoP = nullptr;
+            if (lp.unpairedPosArr.find(prevPosKey) != lp.unpairedPosArr.end())
+                prevUnpairInfoP = &lp.unpairedPosArr[prevPosKey];
+            if (p.unpairedPosArr.find(prevPosKey) != p.unpairedPosArr.end())
+                nextUnpairInfoP = &p.unpairedPosArr[prevPosKey];
+
+            // pos分为两种情况，根据poskey(pair中两个read分别的pos)的位置确定
+            //  1.
+            //  prevpos在交叉部分之前，nextpos在交叉部分之后，这种情况不需要获取pairarr中的数据;
+            //  2.
+            //  prevpos在交叉部分之前，nextpos在交叉部分，需要获取lp中的相等read
+            //  pair进行重新计算
+            //     复杂情况1.
+            //     g中包含prevPosKey对应的unpair，p中有对应的pair，此时应该把这些pair考虑进去
+            //  3.
+            //  prevpos在交叉部分，nextpos在交叉部分之后，需要获取p中的相等read
+            //  pair进行重新计算
+            //     复杂情况2. p中是否包含prevPosKey对应的unpair
+            //  4.
+            //  prevpos在交叉部分，nextpos在交叉部分，需要获取lp和p中的相等read
+            //  pair进行重新计算
+
+            bool addDataToPos = true;
+            if (alreadyAdd.find(ck) != alreadyAdd.end()) {
+                addDataToPos = false;  // 之前已经添加过了，后面就不用再添加数据了
+            } else
+                alreadyAdd.insert(ck);
+
+            if (prevPosKey < nextFirstPos) {                   // prevpos在交叉部分之前
+                auto &prevPairArr = prevUnpairInfoP->pairArr;  // prevUnpairInfoP肯定不是nullptr
+                prevPairArr.push_back(prevFragEnd);
+                if (nextPosKey <= prevLastPos && addDataToPos) {  // 第二种情况
+                    getEqualRE(prevFragEnd, lp.pairs, &prevPairArr);
+                }
+                // 第一种情况，第二种情况下都会出现，复杂情况一
+                auto gPosInfo = g.unpairedPosArr.find(prevPosKey);
+                if (gPosInfo != g.unpairedPosArr.end()) {  // 可能g和p有匹配的，刚好和该位点一致
+                    auto &gUnpairInfo = gPosInfo->second;
+                    auto pPosInfo = p.unpairedPosArr.find(nextPosKey);
+                    if (pPosInfo != p.unpairedPosArr.end()) {
+                        auto &pUnpairInfo = pPosInfo->second;
+                        for (auto &rn : gUnpairInfo.readNameSet) {  // 遍历每一个readname，看是否有匹配的
+                            if (pUnpairInfo.readNameSet.find(rn) != pUnpairInfo.readNameSet.end()) {
+                                auto pe = g.unpairedDic[rn].unpairedRE;
+                                auto fe = p.unpairedDic[rn].unpairedRE;
+                                modifyPairedEnds(fe, &pe);
+                                prevPairArr.push_back(pe);
+                                g.unpairedDic.erase(rn);
+                                p.unpairedDic.erase(rn);
+                                // cout << "找到了！" << rn << endl;
+                            }
+                        }
+                    }
+                }
+                recalcPos[ck] = prevPosInfo.taskSeq;
+                std::sort(prevPairArr.begin(), prevPairArr.end());
+            } else { // prevpos在交叉部分
+                if (nextPosKey > prevLastPos) { // nextpos在交叉部分之后 第三种情况
+                    if (nextUnpairInfoP != nullptr) { // 且在pos点，next task有unpair，这样才把这些数据放到next task里
+                        auto &nextPairArr = nextUnpairInfoP->pairArr;
+                        nextPairArr.push_back(prevFragEnd);
+                        auto &prevPairArr = prevUnpairInfoP->pairArr;
+                        prevPairArr.push_back(prevFragEnd);
+                        if (addDataToPos) {
+                            getEqualRE(prevFragEnd, p.pairs, &prevPairArr);
+                        }
+                        // 将数据放到next task里,（这个位点以后会可能还会计算到，目前方案是都计算，只是把冗余剔除）
+                        recalcPos[ck] = nextPosInfo.taskSeq;
+                                                  
+                        std::sort(prevPairArr.begin(), prevPairArr.end());
+                    } else { // next task在该位点没有unpair，那就把数据放到prev task里
+                        auto &prevPairArr = prevUnpairInfoP->pairArr;  // prevUnpairInfoP肯定不是nullptr
+                        prevPairArr.push_back(prevFragEnd);
+                        if (addDataToPos)  // 第二种情况
+                            getEqualRE(prevFragEnd, p.pairs, &prevPairArr);
+                        recalcPos[ck] = prevPosInfo.taskSeq;
+                        std::sort(prevPairArr.begin(), prevPairArr.end());
+                    }
+                } else {  // 第四种情况
+                    if (prevUnpairInfoP == nullptr) {
+                        prevUnpairInfoP = &lp.unpairedPosArr[prevPosKey];
+                        prevUnpairInfoP->taskSeq = lp.taskSeq;
+                    }
+                    auto &prevPairArr = prevUnpairInfoP->pairArr;
+                    prevPairArr.push_back(prevFragEnd);
+                    if (addDataToPos) {
+                        getEqualRE(prevFragEnd, lp.pairs, &prevPairArr);
+                        getEqualRE(prevFragEnd, p.pairs, &prevPairArr);
+                    }
+                    recalcPos[ck] = prevPosInfo.taskSeq;
+                    std::sort(prevPairArr.begin(), prevPairArr.end());
+                }
+            }
+            p.unpairedDic.erase(readName); // 在next task里删除该read
+        } else if (g.unpairedDic.find(readName) != g.unpairedDic.end()) { // 在遗留数据中找到了匹配的read
+            auto &remainPosInfo = g.unpairedDic[readName];
+            auto remainFragEnd = remainPosInfo.unpairedRE;
+            int64_t remainPosKey = remainFragEnd.posKey;
+            modifyPairedEnds(prevFragEnd, &remainFragEnd);  // 在某些clip情况下，poskey可能是后面的read
+            auto &remainUnpairInfo = g.unpairedPosArr[remainPosKey];
+            auto &remainPairArr = remainUnpairInfo.pairArr;
+            remainPairArr.push_back(remainFragEnd);
+            CalcKey ck = {remainPosKey, prevFragEnd.posKey};
+            recalcPos[ck] = remainPosInfo.taskSeq;
+            std::sort(remainPairArr.begin(), remainPairArr.end());
+
+            g.unpairedDic.erase(readName);
+        } else { // 都没找到，那就保存到遗留数据里
+            int64_t prevPosKey = prevFragEnd.posKey;
+            g.unpairedDic.insert(prevUnpair);
+            addToGlobal.insert(prevPosKey);
+        }
+    }
+    // 最后再添加，以防开始赋值，后来这个位置要是又添加了新的数据
+    for (auto posKey : addToGlobal) g.unpairedPosArr[posKey] = lp.unpairedPosArr[posKey];
+
+    map<int64_t, TaskSeqDupInfo> taskChanged;
+    set<int64_t> posProcessed;
+    for (auto &e : recalcPos) {
+        auto posKey = e.first.read1Pos;
+        if (posProcessed.find(posKey) != posProcessed.end())
+            continue;
+        posProcessed.insert(posKey);
+        auto taskSeq = e.second;
+        auto &t = taskChanged[taskSeq];
+        // 在对应的任务包含的dup idx里修改结果数据
+        vector<ReadEnds> *pairArrP = nullptr;
+        if (taskSeq < lp.taskSeq)
+            pairArrP = &g.unpairedPosArr[posKey].pairArr;
+        else
+            pairArrP = &lp.unpairedPosArr[posKey].pairArr;
+        processPairs(*pairArrP, &t.dupIdx, &t.opticalDupIdx, &t.notDupIdx);
+        if (taskSeq < lp.taskSeq)
+            g.unpairedPosArr.erase(posKey);
+    }
+    // 更新结果
+
+    for (auto &e : taskChanged) {
+        auto taskSeq = e.first;
+        auto &t = e.second;
+        if (taskSeq < lp.taskSeq) {
+            refeshTaskDupInfo(t.dupIdx, t.opticalDupIdx, t.notDupIdx, g.latterDupIdxArr[taskSeq],
+                              g.latterOpticalDupIdxArr[taskSeq], g.latterNotDupIdxArr[taskSeq]);
+        } else if (taskSeq == lp.taskSeq) {
+            refreshPairDupIdx(t.dupIdx, t.opticalDupIdx, t.notDupIdx, &lp, &p);
+        } else {
+            refreshPairDupIdx(t.dupIdx, t.opticalDupIdx, t.notDupIdx, &p, &lp);  // 把结果放到p中
+        }
+    }
+
+    // cout << "remain unpaired: " << g.unpairedDic.size() << '\t' <<
+    // g.unpairedPosArr.size() << endl; cout << "calc g time: " <<
+    // t.seconds_elapsed() << " s" << endl; 将dupidx放进全局数据
+    g.latterDupIdxArr.push_back(set<int64_t>());
+    g.latterOpticalDupIdxArr.push_back(set<int64_t>());
+    g.latterNotDupIdxArr.push_back(set<int64_t>());
+
+    g.dupIdxArr.push_back(vector<int64_t>());
+    auto &vIdx = g.dupIdxArr.back();
+    lp.pairDupIdx.insert(lp.fragDupIdx.begin(), lp.fragDupIdx.end());
+    vIdx.insert(vIdx.end(), lp.pairDupIdx.begin(), lp.pairDupIdx.end());
+
+    g.opticalDupIdxArr.push_back(vector<int64_t>());
+    auto &vOpticalIdx = g.opticalDupIdxArr.back();
+    vOpticalIdx.insert(vOpticalIdx.end(), lp.pairOpticalDupIdx.begin(), lp.pairOpticalDupIdx.end());
+}
+
+/* 当所有任务结束后，global data里还有未处理的数据 */
+static void handleLastTask(SerailMarkDupArg *task, GlobalDataArg *gDataArg) {
+    auto &lp = *task;
+    auto &g = *gDataArg;
+    // 遗留的未匹配的pair
+    for (auto &prevUnpair : lp.unpairedDic) { // 遍历上一个任务中的每个未匹配的read
+        auto &readName = prevUnpair.first;
+        auto &prevPosInfo = prevUnpair.second;
+        auto prevFragEnd = prevPosInfo.unpairedRE;  // 未匹配的read end
+
+        if (g.unpairedDic.find(readName) != g.unpairedDic.end()) { // 在遗留数据中找到了匹配的read
+            auto &remainPosInfo = g.unpairedDic[readName];
+            auto remainFragEnd = remainPosInfo.unpairedRE;
+            int64_t remainPosKey = remainFragEnd.posKey;
+            modifyPairedEnds(prevFragEnd, &remainFragEnd);  // 在某些clip情况下，poskey可能是后面的read
+            auto &remainUnpairInfo = g.unpairedPosArr[remainPosKey];
+
+            remainUnpairInfo.pairArr.push_back(remainFragEnd);
+            g.unpairedDic.erase(readName);
+        }
+    }
+    map<int64_t, TaskSeqDupInfo> taskChanged;
+    for (auto &e : g.unpairedPosArr) {
+        auto posKey = e.first;
+        auto taskSeq = e.second.taskSeq;
+        auto &t = taskChanged[taskSeq];
+        auto &arr = g.unpairedPosArr[posKey].pairArr;
+
+        if (arr.size() > 1) {
+            std::sort(arr.begin(), arr.end());
+            processPairs(arr, &t.dupIdx, &t.opticalDupIdx, &t.notDupIdx);
+        }
+    }
+    // 更新结果
+    vector<int64_t> addDup;
+    map<int64_t, int64_t> ndPosVal;
+    for (auto &e : taskChanged) {
+        auto taskSeq = e.first;
+        auto &t = e.second;
+        refeshTaskDupInfo(t.dupIdx, t.opticalDupIdx, t.notDupIdx, g.latterDupIdxArr[taskSeq],
+                          g.latterOpticalDupIdxArr[taskSeq], g.latterNotDupIdxArr[taskSeq]);
+    }
+
+    cout << "last unpair info: " << g.unpairedDic.size() << '\t' << g.unpairedPosArr.size() << endl;
+    g.unpairedPosArr.clear();
+    g.unpairedDic.clear();
+
+    // 将dupidx放进全局数据
+    for (int i = 0; i < (int)g.dupIdxArr.size() - 1; ++i)
+        refeshFinalTaskDupInfo(g.latterDupIdxArr[i], g.latterNotDupIdxArr[i], g.dupIdxArr[i]);
+    for (int i = 0; i < (int)g.opticalDupIdxArr.size() - 1; ++i)
+        refeshFinalTaskDupInfo(g.latterOpticalDupIdxArr[i], g.latterNotDupIdxArr[i], g.opticalDupIdxArr[i]);
+
+    g.dupIdxArr.push_back(vector<int64_t>());
+    auto &vIdx = g.dupIdxArr.back();
+    lp.pairDupIdx.insert(lp.fragDupIdx.begin(), lp.fragDupIdx.end());
+    vIdx.insert(vIdx.end(), lp.pairDupIdx.begin(), lp.pairDupIdx.end());
+
+    g.opticalDupIdxArr.push_back(vector<int64_t>());
+    auto &vOpticalIdx = g.opticalDupIdxArr.back();
+    vOpticalIdx.insert(vOpticalIdx.end(), lp.pairOpticalDupIdx.begin(), lp.pairOpticalDupIdx.end());
+}
+
+/* 串行处理数据，标记冗余 */
+void serialMarkDups() {
+    tm_arr[5].acc_start();
+    Timer::log_time("serial start");
+    // 读取缓存初始化
+    BamBufType inBamBuf(g_gArg.use_asyncio);
+    inBamBuf.Init(g_inBamFp, g_inBamHeader, g_gArg.max_mem);
+    // BamBufType inBamBuf(false);
+    // inBamBuf.Init(g_inBamFp, g_inBamHeader, 100 * 1024 * 1024);
+    int64_t processedBamNum = 0;
+
+    SerailMarkDupArg smdArg1, smdArg2;
+    SerailMarkDupArg *lastArgP = &smdArg1;
+    SerailMarkDupArg *curArgP = &smdArg2;
+
+    bool isFirstRound = true;
+    int roundNum = 0;
+    int64_t readNumSum = 0;
+    while (inBamBuf.ReadStat() >= 0) {
+        Timer t_round;
+        // 读取bam文件中的read
+        tm_arr[4].acc_start();
+        size_t readNum = inBamBuf.ReadBam();
+        readNumSum += readNum;
+        tm_arr[4].acc_end();
+        cout << "read num: " << readNum << '\t' << roundNum << endl;
+        // lastArgP = curArgP;
+        tm_arr[6].acc_start();
+        curArgP->taskSeq = roundNum;
+        curArgP->bamStartIdx = processedBamNum;
+        curArgP->bams = inBamBuf.GetBamArr();
+        tm_arr[6].acc_end();
+
+        tm_arr[0].acc_start();
+        Timer t1;
+        generateReadEnds(curArgP);
+        // cout << "calc read end time: " << t1.seconds_elapsed() << " s" <<
+        // endl;
+        tm_arr[0].acc_end();
+
+        tm_arr[1].acc_start();
+        t1.reinit();
+        markdups(curArgP);
+        // cout << "markdups time: " << t1.seconds_elapsed() << " s" << endl;
+        tm_arr[1].acc_end();
+
+        if (!isFirstRound) {
+            tm_arr[2].acc_start();
+            t1.reinit();
+            handleIntersectData(lastArgP, curArgP, &gData);
+            // cout << "intersect time: " << t1.seconds_elapsed() << " s" <<
+            // endl;
+            //  addTaskIdxToSet(lastArgP, &gData);
+            tm_arr[2].acc_end();
+        } else {
+            isFirstRound = false;
+        }
+        inBamBuf.ClearAll();  // 清理上一轮读入的数据
+        processedBamNum += readNum;
+
+        // 交换
+        auto tmp = lastArgP;
+        lastArgP = curArgP;
+        curArgP = tmp;
+        // cout << "round time: " << t_round.seconds_elapsed() << endl;
+        roundNum++;
+        if (roundNum % 100 == 0) {
+            cout << "read sum: " << readNumSum << endl;
+            cout << "round time: " << t_round.seconds_elapsed() * 100 << " s" << endl;
+        }
+    }
+    // cout << "here" << endl;
+    tm_arr[3].acc_start();
+    // 处理剩下的全局数据
+    handleLastTask(lastArgP, &gData);
+    // cout << "here 2" << endl;
+    tm_arr[3].acc_end();
+
+    tm_arr[5].acc_end();
+    // 统计所有冗余index数量
+    int64_t dupNum = 0;
+    map<int64_t, int> dup;
+
+    int taskSeq = 0;
+    for (auto &arr : gData.dupIdxArr) {
+        for (auto idx : arr) {
+            if (dup.find(idx) != dup.end()) {
+                // cout << "dup index: " << dup[idx] << '\t' << taskSeq << '\t'
+                // << idx << endl;
+            }
+            dup[idx] = taskSeq;
+        }
+        taskSeq++;
+    }
+    // #include <fstream>
+    // ofstream out("tumor_dup.txt");
+    // for (auto idx : dup)
+    // {
+    //     out << idx << endl;
+    // }
+    // out.close();
+
+    for (auto &arr : gData.dupIdxArr) dupNum += arr.size();
+
+    cout << "dup num     : " << dupNum << '\t' << dup.size() << endl;
+
+    cout << "calc readend: " << tm_arr[0].acc_seconds_elapsed() << endl;
+    cout << "markdup     : " << tm_arr[1].acc_seconds_elapsed() << endl;
+    cout << "handle tail : " << tm_arr[2].acc_seconds_elapsed() << endl;
+    cout << "handle last : " << tm_arr[3].acc_seconds_elapsed() << endl;
+    cout << "read bam    : " << tm_arr[4].acc_seconds_elapsed() << endl;
+    cout << "new arg     : " << tm_arr[6].acc_seconds_elapsed() << endl;
+    cout << "del arg     : " << tm_arr[7].acc_seconds_elapsed() << endl;
+    cout << "build ends  : " << tm_arr[8].acc_seconds_elapsed() << endl;
+    cout << "sort frags  : " << tm_arr[9].acc_seconds_elapsed() << endl;
+    cout << "sort pairs  : " << tm_arr[10].acc_seconds_elapsed() << endl;
+    cout << "all         : " << tm_arr[5].acc_seconds_elapsed() << endl;
+
+    Timer::log_time("serial end  ");
+
+    // for (auto i : gData.dupArr)
+    //     cout << i << endl;
+}
\ No newline at end of file
diff --git a/src/sam/markdups/serial_md.h b/src/sam/markdups/serial_md.h
index 5aa7f56..4a1f392 100644
--- a/src/sam/markdups/serial_md.h
+++ b/src/sam/markdups/serial_md.h
@@ -1,20 +1,28 @@
-#include <algorithm>
+#pragma once
+
+#include <common/hts/bam_buf.h>
 #include <robin-map/include/tsl/robin_map.h>
+#include <sam/utils/read_ends.h>
+
+#include <set>
+#include <string>
+#include <vector>
+
+using std::set;
+using std::string;
+using std::vector;
 
 /* 存放未匹配readend相同位点的所有readend */
-struct UnpairedREInfo
-{
+struct UnpairedREInfo {
     int64_t taskSeq;
     ReadEnds unpairedRE;
 };
 
 /* 对于一个pair数据，一个完整的计算点，包含read1的比对位置和read2的比对位置 */
-struct CalcKey
-{
+struct CalcKey {
     int64_t read1Pos;
     int64_t read2Pos;
-    bool operator<(const CalcKey &o) const
-    {
+    bool operator<(const CalcKey &o) const {
         int comp = (int)(read1Pos - o.read1Pos);
         if (comp == 0)
             comp = (int)(read2Pos - o.read2Pos);
@@ -23,16 +31,14 @@ struct CalcKey
 };
 
 /* 当遗留数据在当前任务找到了pair read后，进行冗余计算时候存放结果的数据结构 */
-struct TaskSeqDupInfo
-{
+struct TaskSeqDupInfo {
     set<int64_t> dupIdx;
     set<int64_t> opticalDupIdx;
     set<int64_t> notDupIdx;
 };
 
 /* 保存有未匹配pair位点的信息，包括read end数组和有几个未匹配的read end */
-struct UnpairedPosInfo
-{
+struct UnpairedPosInfo {
     int unpairedNum = 0;
     int64_t taskSeq;
     vector<ReadEnds> pairArr;
@@ -41,28 +47,26 @@ struct UnpairedPosInfo
 // typedef unordered_map<string, UnpairedREInfo> UnpairedNameMap;
 // typedef unordered_map<int64_t, UnpairedPosInfo> UnpairedPositionMap;
 
-typedef tsl::robin_map<string, UnpairedREInfo> UnpairedNameMap; // 以read name为索引，保存未匹配的pair read
-typedef tsl::robin_map<int64_t, UnpairedPosInfo> UnpairedPositionMap; // 以位点为索引，保存该位点包含的对应的所有read和该位点包含的剩余未匹配的read的数量
+typedef tsl::robin_map<string, UnpairedREInfo> UnpairedNameMap;  // 以read name为索引，保存未匹配的pair read
+typedef tsl::robin_map<int64_t, UnpairedPosInfo> UnpairedPositionMap;  // 以位点为索引，保存该位点包含的对应的所有read和该位点包含的剩余未匹配的read的数量
 
 /* 单线程处理冗余参数结构体 */
-struct SerailMarkDupArg
-{
-    int64_t taskSeq;
-    int64_t bamStartIdx;                         // 当前vBam数组中第一个bam记录在整体bam中所处的位置
-    vector<BamWrap *> bams;                      // 存放待处理的bam read
-    vector<ReadEnds> pairs;
-    vector<ReadEnds> frags;
-    set<int64_t> pairDupIdx;        // pair的冗余read的索引
-    set<int64_t> pairOpticalDupIdx; // optical冗余read的索引
-    set<int64_t> fragDupIdx;        // frag的冗余read的索引
-    UnpairedNameMap unpairedDic;    // 用来寻找pair end
-    UnpairedPositionMap unpairedPosArr; // 存放未匹配的ReadEnd对应位点的所有ReadEnd，为了避免重复存储
+struct SerailMarkDupArg {
+    int64_t taskSeq; // 任务序号
+    int64_t bamStartIdx;  // 当前vBam数组中第一个bam记录在整体bam中所处的位置
+    vector<BamWrap *> bams;  // 存放待处理的bam read
+    vector<ReadEnds> pairs; // 成对的reads
+    vector<ReadEnds> frags; // 暂未找到配对的reads
+    set<int64_t> pairDupIdx;         // pair的冗余read的索引
+    set<int64_t> pairOpticalDupIdx;  // optical冗余read的索引
+    set<int64_t> fragDupIdx;         // frag的冗余read的索引
+    UnpairedNameMap unpairedDic;     // 用来寻找pair end
+    UnpairedPositionMap  unpairedPosArr;  // 存放未匹配的ReadEnd对应位点的所有ReadEnd，为了避免重复存储
 };
 
 /* 全局保留的数据，因为有些paired数据比对到了不同的染色体，相距甚远 */
-struct GlobalDataArg
-{
-    UnpairedNameMap unpairedDic; // 用来寻找pair end
+struct GlobalDataArg {
+    UnpairedNameMap unpairedDic;  // 用来寻找pair end
     UnpairedPositionMap unpairedPosArr;
 
     // 每个task对应一个vector
@@ -75,869 +79,5 @@ struct GlobalDataArg
     vector<set<int64_t>> latterNotDupIdxArr;
 };
 
-static GlobalDataArg gData;
-
-
-/* 查找 */
-// template<class Itr, class T>
-// static inline Itr binaryFind(Itr first, Itr last, const T &val)
-// {
-//     first = std::lower_bound(first, last, val);
-//     return (first != last && *first == val) ? first : last;
-// }
-
-/* 排序 */
-static inline void sortReadEndsArr(vector<ReadEnds> &arr)
-{
-    size_t blockSize = 64 * 1024;
-    blockSize = min(blockSize, arr.size());
-    size_t blockNum = (arr.size() + blockSize - 1) / blockSize;
-    size_t crossNum = 1024;
-    size_t start, end, i, left, right;
-    std::sort(arr.begin(), arr.begin() + blockSize);
-    for (i = 1; i < blockNum; ++i)
-    {
-        start = i * blockSize;
-        end = min(start + blockSize, arr.size());
-        std::sort(arr.begin() + start, arr.begin() + end);
-        left = crossNum;
-        while (!(arr[start - left] < arr[start]))
-        {
-            left = left << 1;
-            if (left >= blockSize)
-            {
-                std::sort(arr.begin(), arr.end()); // 退化到普通排序
-                return;
-            }
-        }
-        right = min(crossNum, end - start - 1);
-
-        while (!(arr[start - 1] < arr[start + right]))
-        {
-            right = min(right << 1, end - start - 1);
-            if (right == end - start - 1)
-                break;
-        }
-        std::sort(arr.begin() + start - left, arr.begin() + start + right);
-    }
-}
-
-/* 处理一组pairend的readends，标记冗余 */
-static void markDupsForPairs(vector<const ReadEnds *> &vpRe,
-                                  set<int64_t> *dupIdx,
-                                  set<int64_t> *opticalDupIdx,
-                                  set<int64_t> *notDupIdx = nullptr)
-{
-    if (vpRe.size() < 2)
-    {
-        if (vpRe.size() == 1)
-        {
-            // addSingletonToCount(libraryIdGenerator);
-        }
-        return;
-    }
-    int maxScore = 0;
-    const ReadEnds *pBest = nullptr;
-    /** All read ends should have orientation FF, FR, RF, or RR **/
-    for (auto pe : vpRe) // 找分数最高的readend
-    {
-        if (pe->score > maxScore || pBest == nullptr)
-        {
-            maxScore = pe->score;
-            pBest = pe;
-        }
-    }
-    if (notDupIdx != nullptr)
-    {
-        notDupIdx->insert(pBest->read1IndexInFile);
-        notDupIdx->insert(pBest->read2IndexInFile);
-    }
-    if (!g_mdArg.READ_NAME_REGEX.empty()) // 检查光学冗余
-    {
-        // trackOpticalDuplicates
-    }
-    for (auto pe : vpRe) // 对非best read标记冗余
-    {
-        if (pe != pBest) // 非best
-        {
-            dupIdx->insert(pe->read1IndexInFile); // 添加read1
-            if (pe->read2IndexInFile != pe->read1IndexInFile)
-                dupIdx->insert(pe->read2IndexInFile); // 添加read2
-        }
-    }
-
-    // if (g_mdArg.TAG_DUPLICATE_SET_MEMBERS)
-    // {
-    //     addRepresentativeReadIndex(vpRe);
-    // }
-}
-
-/* 处理一组非paired的readends，标记冗余 */
-static void markDupsForFrags(vector<const ReadEnds *> &vpRe,
-                             bool containsPairs,
-                             set<int64_t> *dupIdx,
-                             set<int64_t> *notDupIdx = nullptr)
-{
-    if (containsPairs)
-    {
-        for (auto pe : vpRe)
-        {
-            if (!pe->IsPaired())
-            {
-                dupIdx->insert(pe->read1IndexInFile);
-            }
-        }
-    }
-    else
-    {
-        int maxScore = 0;
-        const ReadEnds *pBest = nullptr;
-        for (auto pe : vpRe)
-        {
-            if (pe->score > maxScore || pBest == nullptr)
-            {
-                maxScore = pe->score;
-                pBest = pe;
-            }
-        }
-        if (notDupIdx != nullptr)
-        {
-            notDupIdx->insert(pBest->read1IndexInFile);
-        }
-        for (auto pe : vpRe)
-        {
-            if (pe != pBest)
-            {
-                dupIdx->insert(pe->read1IndexInFile);
-            }
-        }
-    }
-}
-
-/* 找到与readend pos相等的所有readend */
-static void getEqualRE(const ReadEnds &re, vector<ReadEnds> &src, vector<ReadEnds> *dst)
-{
-    auto range = std::equal_range(src.begin(), src.end(), re, ReadEnds::pairsLittleThan); // 只比对位点
-    dst->insert(dst->end(), range.first, range.second);
-}
-
-/* 单线程生成readends (第一步)*/
-static void generateReadEnds(SerailMarkDupArg *arg)
-{
-    auto &p = *arg;
-    auto &rnParser = g_vRnParser[0];
-
-    p.pairs.clear();
-    p.frags.clear();
-    p.unpairedDic.clear();
-    p.unpairedPosArr.clear();
-
-    /* 处理每个read，创建ReadEnd，并放入frag和pair中 */
-    set<ReadEnds> reSet;
-
-    ReadEnds lastRe;
-
-    for (int i = 0; i < p.bams.size(); ++i) // 循环处理每个read
-    {
-        BamWrap *bw = p.bams[i];
-        const int64_t bamIdx = p.bamStartIdx + i;
-        if (bw->GetReadUnmappedFlag())
-        {
-            if (bw->b->core.tid == -1)
-                // When we hit the unmapped reads with no coordinate, no reason to continue (only in coordinate sort).
-                break;
-        }
-        else if (!bw->IsSecondaryOrSupplementary()) // 是主要比对
-        {
-            ReadEnds fragEnd;
-            tm_arr[8].acc_start();
-            buildReadEnds(*bw, bamIdx, rnParser, &fragEnd);
-            tm_arr[8].acc_end();
-            p.frags.push_back(fragEnd);                                // 添加进frag集合
-            if (bw->GetReadPairedFlag() && !bw->GetMateUnmappedFlag()) // 是pairend而且互补的read也比对上了
-            {
-                string key = bw->query_name();
-                if (p.unpairedDic.find(key) == p.unpairedDic.end())
-                {
-                    p.unpairedDic[key] = {p.taskSeq, fragEnd};
-                }
-                else // 找到了pairend
-                {
-                    auto &pairedEnds = p.unpairedDic.at(key).unpairedRE;
-                    modifyPairedEnds(fragEnd, &pairedEnds);
-                    p.pairs.push_back(pairedEnds);
-                    p.unpairedDic.erase(key); // 删除找到的pairend
-                }
-            }
-        }
-    }
-    tm_arr[9].acc_start();
-    sortReadEndsArr(p.frags);
-    // sort(p.frags.begin(), p.frags.end());
-    tm_arr[9].acc_end();
-    // cout << "sort pairs" << endl;
-    tm_arr[10].acc_start();
-    sort(p.pairs.begin(), p.pairs.end());
-    tm_arr[10].acc_end();
-    // 记录位点上的未匹配的read个数
-    for (auto &e : p.unpairedDic) {
-        auto posKey = e.second.unpairedRE.posKey;
-        auto &unpairArrInfo = p.unpairedPosArr[posKey];
-        unpairArrInfo.unpairedNum++;
-        unpairArrInfo.taskSeq = p.taskSeq;
-        unpairArrInfo.readNameSet.insert(e.first);
-    }
-    cout << "依赖比例：" <<  (float)p.unpairedDic.size() / p.frags.size() << endl;
-}
-
-/* 处理pairs */
-static void processPairs(vector<ReadEnds> &readEnds,
-                         set<int64_t> *dupIdx,
-                         set<int64_t> *opticalDupIdx,
-                         set<int64_t> *notDupIdx = nullptr)
-{
-    vector<const ReadEnds *> vpCache; // 有可能是冗余的reads
-    const ReadEnds *pReadEnd = nullptr;
-    for (auto &re : readEnds)
-    {
-        if (pReadEnd != nullptr && ReadEnds::AreComparableForDuplicates(*pReadEnd, re, true)) // 跟前一个一样
-            vpCache.push_back(&re);                                                           // 处理一个潜在的冗余组
-        else
-        {
-            markDupsForPairs(vpCache, dupIdx, opticalDupIdx, notDupIdx); // 不一样
-            vpCache.clear();
-            vpCache.push_back(&re);
-            pReadEnd = &re;
-        }
-    }
-    markDupsForPairs(vpCache, dupIdx, opticalDupIdx, notDupIdx);
-}
-
-/* 处理frags */
-static void processFrags(vector<ReadEnds> &readEnds,
-                         set<int64_t> *dupIdx,
-                         set<int64_t> *notDupIdx = nullptr)
-{
-    bool containsPairs = false;
-    bool containsFrags = false;
-    vector<const ReadEnds *> vpCache; // 有可能是冗余的reads
-    const ReadEnds *pReadEnd = nullptr;
-    for (auto &re : readEnds)
-    {
-        if (pReadEnd != nullptr && ReadEnds::AreComparableForDuplicates(*pReadEnd, re, false))
-        {
-            vpCache.push_back(&re);
-            containsPairs = containsPairs || re.IsPaired();
-            containsFrags = containsFrags || !re.IsPaired();
-        }
-        else
-        {
-            if (vpCache.size() > 1 && containsFrags)
-            {
-                markDupsForFrags(vpCache, containsPairs, dupIdx, notDupIdx);
-            }
-            vpCache.clear();
-            vpCache.push_back(&re);
-            pReadEnd = &re;
-            containsPairs = re.IsPaired();
-            containsFrags = !re.IsPaired();
-        }
-    }
-    if (vpCache.size() > 1 && containsFrags)
-    {
-        markDupsForFrags(vpCache, containsPairs, dupIdx, notDupIdx);
-    }
-}
-
-/* 单线程markdup (第二步)*/
-static void markdups(SerailMarkDupArg *arg)
-{
-    auto &p = *arg;
-    p.pairDupIdx.clear();
-    p.pairOpticalDupIdx.clear();
-    p.fragDupIdx.clear();
-    /* generateDuplicateIndexes，计算冗余read在所有read中的位置索引 */
-    // 先处理 pair
-    processPairs(p.pairs, &p.pairDupIdx, &p.pairOpticalDupIdx);
-
-    // 再处理frag
-    processFrags(p.frags, &p.fragDupIdx);
-}
-
-/* 获取交叉部分的数据 */
-static inline void getIntersectData(vector<ReadEnds> &leftArr,
-                                    vector<ReadEnds> &rightArr,
-                                    vector<ReadEnds> *dst,
-                                    bool isPairCmp = false)
-{
-    const size_t leftEndIdx = leftArr.size() - 1;
-    const size_t rightStartIdx = 0;
-    size_t leftSpan = 0;
-    size_t rightSpan = 0;
-
-    while (!ReadEnds::ReadLittleThan(leftArr[leftEndIdx - leftSpan], rightArr[rightStartIdx], isPairCmp))
-    {
-        leftSpan += 1;
-        if (leftSpan > leftEndIdx)
-        {
-            leftSpan = leftArr.size() - 1;
-            break;
-        }
-    }
-
-    while (!ReadEnds::ReadLittleThan(leftArr[leftEndIdx], rightArr[rightSpan], isPairCmp))
-    {
-        rightSpan += 1;
-        if (rightSpan == rightArr.size() - 1)
-            break;
-    }
-    dst->insert(dst->end(), leftArr.end() - leftSpan, leftArr.end());
-    dst->insert(dst->end(), rightArr.begin(), rightArr.begin() + rightSpan);
-    std::sort(dst->begin(), dst->end());
-}
-
-/* 将frags重叠部分的dup idx放进数据中 */
-static inline void refreshFragDupIdx(set<int64_t> &dupIdx,
-                              set<int64_t> &notDupIdx, 
-                              SerailMarkDupArg * lastArg, 
-                              SerailMarkDupArg *curArg)
-{
-    auto &lp = *lastArg;
-    auto &p = *curArg;
-    for (auto idx : dupIdx)
-    {
-        lp.fragDupIdx.insert(idx);
-        p.fragDupIdx.erase(idx);
-    }
-    for (auto idx : notDupIdx)
-    {
-        lp.fragDupIdx.erase(idx);
-        p.fragDupIdx.erase(idx);
-    }
-}
-
-/* 将pairs重叠部分的dup idx放进数据中 */
-static inline void refreshPairDupIdx(set<int64_t> &dupIdx,
-                              set<int64_t> &opticalDupIdx,
-                              set<int64_t> &notDupIdx,
-                              SerailMarkDupArg *lastArg,
-                              SerailMarkDupArg *curArg)
-{
-    auto &lp = *lastArg;
-    auto &p = *curArg;
-    for (auto idx : dupIdx)
-    {
-        lp.pairDupIdx.insert(idx);
-        p.pairDupIdx.erase(idx);
-    }
-    for (auto idx : opticalDupIdx)
-    {
-        lp.pairOpticalDupIdx.insert(idx);
-        p.pairOpticalDupIdx.erase(idx);
-    }
-    for (auto idx : notDupIdx)
-    {
-        lp.pairDupIdx.erase(idx);
-        lp.pairOpticalDupIdx.erase(idx);
-        p.pairDupIdx.erase(idx);
-        p.pairOpticalDupIdx.erase(idx);
-    }
-}
-
-// 用来分别处理dup和optical dup
-static void refeshTaskDupInfo(set<int64_t> &dupIdx,
-                              set<int64_t> &opticalDupIdx,
-                              set<int64_t> &notDupIdx,
-                              set<int64_t> &latterDupIdx,
-                              set<int64_t> &latterOpticalDupIdx,
-                              set<int64_t> &latterNotDupIdx)
-{
-    for (auto idx : dupIdx)
-        latterDupIdx.insert(idx);
-    for (auto idx : opticalDupIdx)
-        latterOpticalDupIdx.insert(idx);
-    for (auto idx : notDupIdx)
-        latterNotDupIdx.insert(idx);
-}
-
-/* 最后合并数据并排序 */
-static void refeshFinalTaskDupInfo(set<int64_t> &dupIdx,
-                                   set<int64_t> &notDupIdx,
-                                   vector<int64_t> &dupArr)
-{
-    vector<int64_t> midArr;
-
-    auto ai = dupArr.begin();
-    auto bi = dupIdx.begin();
-    auto ae = dupArr.end();
-    auto be = dupIdx.end();
-
-    int64_t val = 0;
-    while (ai != ae && bi != be)
-    {
-        if (*ai < *bi)
-        {
-            val = *ai++;
-        }
-        else if (*bi < *ai)
-        {
-            val = *bi++;
-        }
-        else
-        {
-            val = *ai++;
-            bi++;
-        }
-        if (notDupIdx.find(val) == notDupIdx.end())
-        {
-            midArr.push_back(val);
-        }
-    }
-    while (ai != ae)
-    {
-        val = *ai++;
-        if (notDupIdx.find(val) == notDupIdx.end())
-        {
-            midArr.push_back(val);
-        }
-    }
-    while (bi != be)
-    {
-        val = *bi++;
-        if (notDupIdx.find(val) == notDupIdx.end())
-        {
-            midArr.push_back(val);
-        }
-    }
-    dupArr = midArr;
-}
-
-/* 处理相邻的两个任务，有相交叉的数据  */
-static void handleIntersectData(SerailMarkDupArg *lastArg, SerailMarkDupArg *curArg, GlobalDataArg *gDataArg)
-{
-    auto &lp = *lastArg;
-    auto &p = *curArg;
-    auto &g = *gDataArg;
-
-    vector<ReadEnds> reArr;
-    set<int64_t> dupIdx;
-    set<int64_t> notDupIdx;
-    // 先处理重叠的frags
-    getIntersectData(lp.frags, p.frags, &reArr);
-    processFrags(reArr, &dupIdx, &notDupIdx);
-    refreshFragDupIdx(dupIdx, notDupIdx, &lp, &p);
-
-    // 再处理重叠的pairs
-    reArr.clear();
-    dupIdx.clear();
-    notDupIdx.clear();
-    set<int64_t> opticalDupIdx;
-    getIntersectData(lp.pairs, p.pairs, &reArr, true);
-    processPairs(reArr, &dupIdx, &opticalDupIdx, &notDupIdx);
-    refreshPairDupIdx(dupIdx, opticalDupIdx, notDupIdx, &lp, &p);
-
-    // 处理之前未匹配的部分
-    map<CalcKey, int64_t> recalcPos;
-    set<CalcKey> alreadyAdd; // 与该位点相同的pair都添加到数组里了
-    set<int64_t> addToGlobal;
-    int64_t prevLastPos = 0, nextFirstPos = 0;
-    if (lp.frags.size() > 0)
-        prevLastPos = lp.frags.back().posKey;
-    if (p.frags.size() > 0)
-        nextFirstPos = p.frags[0].posKey;
-    // cout << "range: " << nextFirstPos << '\t' << prevLastPos << endl;
-    for (auto &prevUnpair : lp.unpairedDic) // 遍历上一个任务中的每个未匹配的read
-    {
-        auto &readName = prevUnpair.first;
-        auto &prevPosInfo = prevUnpair.second;
-        auto prevFragEnd = prevPosInfo.unpairedRE; // 未匹配的read end
-
-        if (p.unpairedDic.find(readName) != p.unpairedDic.end())           // 在当前这个任务里找到了这个未匹配的read
-        {
-            auto &nextPosInfo = p.unpairedDic[readName];
-            auto &nextFragEnd = nextPosInfo.unpairedRE;
-            int64_t prevPosKey = prevFragEnd.posKey;
-            modifyPairedEnds(nextFragEnd, &prevFragEnd); // 在某些clip情况下，poskey可能是后面的read
-            int64_t nextPosKey = max(prevPosKey, nextFragEnd.posKey);
-            CalcKey ck = {prevPosKey, nextPosKey};
-            UnpairedPosInfo *prevUnpairInfoP = nullptr;
-            UnpairedPosInfo *nextUnpairInfoP = nullptr;
-            if (lp.unpairedPosArr.find(prevPosKey) != lp.unpairedPosArr.end())
-                prevUnpairInfoP = &lp.unpairedPosArr[prevPosKey];
-            if (p.unpairedPosArr.find(prevPosKey) != p.unpairedPosArr.end())
-                nextUnpairInfoP = &p.unpairedPosArr[prevPosKey];
-
-            // pos分为两种情况，根据poskey(pair中两个read分别的pos)的位置确定
-            //  1. prevpos在交叉部分之前，nextpos在交叉部分之后，这种情况不需要获取pairarr中的数据;
-            //  2. prevpos在交叉部分之前，nextpos在交叉部分，需要获取lp中的相等read pair进行重新计算
-            //     复杂情况1. g中包含prevPosKey对应的unpair，p中有对应的pair，此时应该把这些pair考虑进去
-            //  3. prevpos在交叉部分，nextpos在交叉部分之后，需要获取p中的相等read pair进行重新计算
-            //     复杂情况2. p中是否包含prevPosKey对应的unpair
-            //  4. prevpos在交叉部分，nextpos在交叉部分，需要获取lp和p中的相等read pair进行重新计算
-            
-            bool addDataToPos = true;
-            if (alreadyAdd.find(ck) != alreadyAdd.end())
-            {
-                addDataToPos = false; // 之前已经添加过了，后面就不用再添加数据了
-            }
-            else
-                alreadyAdd.insert(ck);
-
-            if (prevPosKey < nextFirstPos) // prevpos在交叉部分之前
-            {
-                auto &prevPairArr = prevUnpairInfoP->pairArr; // prevUnpairInfoP肯定不是nullptr
-                prevPairArr.push_back(prevFragEnd);
-                if (nextPosKey <= prevLastPos && addDataToPos) // 第二种情况
-                {
-                    getEqualRE(prevFragEnd, lp.pairs, &prevPairArr);
-                }
-                // 第一种情况，第二种情况下都会出现，复杂情况一
-                auto gPosInfo = g.unpairedPosArr.find(prevPosKey);
-                if (gPosInfo != g.unpairedPosArr.end()) // 可能g和p有匹配的，刚好和该位点一致
-                {
-                    auto &gUnpairInfo = gPosInfo->second;
-                    auto pPosInfo = p.unpairedPosArr.find(nextPosKey);
-                    if (pPosInfo != p.unpairedPosArr.end())
-                    {
-                        auto &pUnpairInfo = pPosInfo->second;
-                        for (auto &rn : gUnpairInfo.readNameSet) // 遍历每一个readname，看是否有匹配的
-                        {
-                            if (pUnpairInfo.readNameSet.find(rn) != pUnpairInfo.readNameSet.end())
-                            {
-                                auto pe = g.unpairedDic[rn].unpairedRE;
-                                auto fe = p.unpairedDic[rn].unpairedRE;
-                                modifyPairedEnds(fe, &pe);
-                                prevPairArr.push_back(pe);
-                                g.unpairedDic.erase(rn);
-                                p.unpairedDic.erase(rn);
-                                // cout << "找到了！" << rn << endl;
-                            }
-                        }
-                    }
-                }
-                recalcPos[ck] = prevPosInfo.taskSeq;
-                std::sort(prevPairArr.begin(), prevPairArr.end());
-            }
-            else // prevpos在交叉部分
-            {
-                if (nextPosKey > prevLastPos) // nextpos在交叉部分之后
-                { // 第三种情况
-                    if (nextUnpairInfoP != nullptr) // 且在pos点，next task有unpair，这样才把这些数据放到next task里
-                    {
-                        auto &nextPairArr = nextUnpairInfoP->pairArr;
-                        nextPairArr.push_back(prevFragEnd);
-                        auto &prevPairArr = prevUnpairInfoP->pairArr;
-                        prevPairArr.push_back(prevFragEnd);
-                        if (addDataToPos)
-                        {
-                            getEqualRE(prevFragEnd, p.pairs, &prevPairArr);
-                        }
-                        recalcPos[ck] = nextPosInfo.taskSeq; // 将数据放到next task里, （这个位点以后会可能还会计算到，目前方案是都计算，只是把冗余剔除）
-                        std::sort(prevPairArr.begin(), prevPairArr.end());
-                    }
-                    else // next task在该位点没有unpair，那就把数据放到prev task里
-                    {
-                        auto &prevPairArr = prevUnpairInfoP->pairArr; // prevUnpairInfoP肯定不是nullptr
-                        prevPairArr.push_back(prevFragEnd);
-                        if (addDataToPos) // 第二种情况
-                            getEqualRE(prevFragEnd, p.pairs, &prevPairArr);
-                        recalcPos[ck] = prevPosInfo.taskSeq;
-                        std::sort(prevPairArr.begin(), prevPairArr.end());
-                    }
-                }
-                else
-                { // 第四种情况
-                    if (prevUnpairInfoP == nullptr) {
-                        prevUnpairInfoP = &lp.unpairedPosArr[prevPosKey];
-                        prevUnpairInfoP->taskSeq = lp.taskSeq;
-                    }
-                    auto &prevPairArr = prevUnpairInfoP->pairArr;
-                    prevPairArr.push_back(prevFragEnd);
-                    if (addDataToPos)
-                    {
-                        getEqualRE(prevFragEnd, lp.pairs, &prevPairArr);
-                        getEqualRE(prevFragEnd, p.pairs, &prevPairArr);
-                    }
-                    recalcPos[ck] = prevPosInfo.taskSeq;
-                    std::sort(prevPairArr.begin(), prevPairArr.end());
-                }
-            }
-            p.unpairedDic.erase(readName); // 在next task里删除该read
-        }
-        else if (g.unpairedDic.find(readName) != g.unpairedDic.end()) // 在遗留数据中找到了匹配的read
-        {
-            auto &remainPosInfo = g.unpairedDic[readName];
-            auto remainFragEnd = remainPosInfo.unpairedRE;
-            int64_t remainPosKey = remainFragEnd.posKey;
-            modifyPairedEnds(prevFragEnd, &remainFragEnd); // 在某些clip情况下，poskey可能是后面的read
-            auto &remainUnpairInfo = g.unpairedPosArr[remainPosKey];
-            auto &remainPairArr = remainUnpairInfo.pairArr;
-            remainPairArr.push_back(remainFragEnd);
-            CalcKey ck = {remainPosKey, prevFragEnd.posKey};
-            recalcPos[ck] = remainPosInfo.taskSeq;
-            std::sort(remainPairArr.begin(), remainPairArr.end());
-
-            g.unpairedDic.erase(readName);
-        }
-        else // 都没找到，那就保存到遗留数据里
-        {
-            int64_t prevPosKey = prevFragEnd.posKey;
-            g.unpairedDic.insert(prevUnpair);
-            addToGlobal.insert(prevPosKey);
-        }
-    }
-    for (auto posKey : addToGlobal) // 最后再添加，以防开始赋值，后来这个位置要是又添加了新的数据
-        g.unpairedPosArr[posKey] = lp.unpairedPosArr[posKey];
-
-    map<int64_t, TaskSeqDupInfo> taskChanged;
-    set<int64_t> posProcessed;
-    for (auto &e : recalcPos)
-    {
-        auto posKey = e.first.read1Pos;
-        if (posProcessed.find(posKey) != posProcessed.end())
-            continue;
-        posProcessed.insert(posKey);
-        auto taskSeq = e.second;
-        auto &t = taskChanged[taskSeq];
-        // 在对应的任务包含的dup idx里修改结果数据
-        vector<ReadEnds> *pairArrP = nullptr;
-        if (taskSeq < lp.taskSeq)
-            pairArrP = &g.unpairedPosArr[posKey].pairArr;
-        else
-            pairArrP = &lp.unpairedPosArr[posKey].pairArr;
-        processPairs(*pairArrP, &t.dupIdx, &t.opticalDupIdx, &t.notDupIdx);
-        if (taskSeq < lp.taskSeq)
-            g.unpairedPosArr.erase(posKey);
-    }
-    // 更新结果
-
-    for (auto &e : taskChanged)
-    {
-        auto taskSeq = e.first;
-        auto &t = e.second;
-        if (taskSeq < lp.taskSeq)
-        {
-            refeshTaskDupInfo(t.dupIdx, t.opticalDupIdx, t.notDupIdx,
-                              g.latterDupIdxArr[taskSeq], g.latterOpticalDupIdxArr[taskSeq], g.latterNotDupIdxArr[taskSeq]);
-        }
-        else if (taskSeq == lp.taskSeq)
-        {
-            refreshPairDupIdx(t.dupIdx, t.opticalDupIdx, t.notDupIdx, &lp, &p);
-        }
-        else
-        {
-            refreshPairDupIdx(t.dupIdx, t.opticalDupIdx, t.notDupIdx, &p, &lp); // 把结果放到p中
-        }
-    }
-
-    // cout << "remain unpaired: " << g.unpairedDic.size() << '\t' << g.unpairedPosArr.size() << endl;
-    // cout << "calc g time: " << t.seconds_elapsed() << " s" << endl;
-    // 将dupidx放进全局数据
-    g.latterDupIdxArr.push_back(set<int64_t>());
-    g.latterOpticalDupIdxArr.push_back(set<int64_t>());
-    g.latterNotDupIdxArr.push_back(set<int64_t>());
-
-    g.dupIdxArr.push_back(vector<int64_t>());
-    auto &vIdx = g.dupIdxArr.back();
-    lp.pairDupIdx.insert(lp.fragDupIdx.begin(), lp.fragDupIdx.end());
-    vIdx.insert(vIdx.end(), lp.pairDupIdx.begin(), lp.pairDupIdx.end());
-
-    g.opticalDupIdxArr.push_back(vector<int64_t>());
-    auto &vOpticalIdx = g.opticalDupIdxArr.back();
-    vOpticalIdx.insert(vOpticalIdx.end(), lp.pairOpticalDupIdx.begin(), lp.pairOpticalDupIdx.end());
-}
-
-/* 当所有任务结束后，global data里还有未处理的数据 */
-static void handleLastTask(SerailMarkDupArg *task, GlobalDataArg *gDataArg)
-{
-    auto &lp = *task;
-    auto &g = *gDataArg;
-    // 遗留的未匹配的pair
-    for (auto &prevUnpair : lp.unpairedDic) // 遍历上一个任务中的每个未匹配的read
-    {
-        auto &readName = prevUnpair.first;
-        auto &prevPosInfo = prevUnpair.second;
-        auto prevFragEnd = prevPosInfo.unpairedRE; // 未匹配的read end
-
-        if (g.unpairedDic.find(readName) != g.unpairedDic.end()) // 在遗留数据中找到了匹配的read
-        {
-            auto &remainPosInfo = g.unpairedDic[readName];
-            auto remainFragEnd = remainPosInfo.unpairedRE;
-            int64_t remainPosKey = remainFragEnd.posKey;
-            modifyPairedEnds(prevFragEnd, &remainFragEnd); // 在某些clip情况下，poskey可能是后面的read
-            auto &remainUnpairInfo = g.unpairedPosArr[remainPosKey];
-
-            remainUnpairInfo.pairArr.push_back(remainFragEnd);
-            g.unpairedDic.erase(readName);
-        }
-    }
-    map<int64_t, TaskSeqDupInfo> taskChanged;
-    for (auto &e : g.unpairedPosArr)
-    {
-        auto posKey = e.first;
-        auto taskSeq = e.second.taskSeq;
-        auto &t = taskChanged[taskSeq];
-        auto &arr = g.unpairedPosArr[posKey].pairArr;
-
-        if (arr.size() > 1)
-        {
-            std::sort(arr.begin(), arr.end());
-            processPairs(arr, &t.dupIdx, &t.opticalDupIdx, &t.notDupIdx);
-        }
-    }
-    // 更新结果
-    vector<int64_t> addDup;
-    map<int64_t, int64_t> ndPosVal;
-    for (auto &e : taskChanged)
-    {
-        auto taskSeq = e.first;
-        auto &t = e.second;
-        refeshTaskDupInfo(t.dupIdx, t.opticalDupIdx, t.notDupIdx,
-                          g.latterDupIdxArr[taskSeq], g.latterOpticalDupIdxArr[taskSeq], g.latterNotDupIdxArr[taskSeq]);
-    }
-
-    cout << "last unpair info: " << g.unpairedDic.size() << '\t' << g.unpairedPosArr.size() << endl;
-    g.unpairedPosArr.clear();
-    g.unpairedDic.clear();
-
-    // 将dupidx放进全局数据
-    for (int i = 0; i < g.dupIdxArr.size() - 1; ++i)
-        refeshFinalTaskDupInfo(g.latterDupIdxArr[i], g.latterNotDupIdxArr[i], g.dupIdxArr[i]);
-    for (int i = 0; i < g.opticalDupIdxArr.size() - 1; ++i)
-        refeshFinalTaskDupInfo(g.latterOpticalDupIdxArr[i], g.latterNotDupIdxArr[i], g.opticalDupIdxArr[i]);
-
-    g.dupIdxArr.push_back(vector<int64_t>());
-    auto &vIdx = g.dupIdxArr.back();
-    lp.pairDupIdx.insert(lp.fragDupIdx.begin(), lp.fragDupIdx.end());
-    vIdx.insert(vIdx.end(), lp.pairDupIdx.begin(), lp.pairDupIdx.end());
-
-    g.opticalDupIdxArr.push_back(vector<int64_t>());
-    auto &vOpticalIdx = g.opticalDupIdxArr.back();
-    vOpticalIdx.insert(vOpticalIdx.end(), lp.pairOpticalDupIdx.begin(), lp.pairOpticalDupIdx.end());
-}
-
-/* 串行处理数据，标记冗余 */
-static void serialMarkDups()
-{
-    tm_arr[5].acc_start();
-    Timer::log_time("serial start");
-    // 读取缓存初始化
-    BamBufType inBamBuf(g_gArg.use_asyncio);
-    inBamBuf.Init(g_inBamFp, g_inBamHeader, g_gArg.max_mem);
-    // BamBufType inBamBuf(false);
-    // inBamBuf.Init(g_inBamFp, g_inBamHeader, 100 * 1024 * 1024);
-    int64_t processedBamNum = 0;
-
-    SerailMarkDupArg smdArg1, smdArg2;
-    SerailMarkDupArg *lastArgP = &smdArg1;
-    SerailMarkDupArg *curArgP = &smdArg2;
-
-    bool isFirstRound = true;
-    int roundNum = 0;
-    while (inBamBuf.ReadStat() >= 0)
-    {
-        Timer t_round;
-        // 读取bam文件中的read
-        tm_arr[4].acc_start();
-        size_t readNum = inBamBuf.ReadBam();
-        tm_arr[4].acc_end();
-        cout << "read num: " << readNum << endl;
-        // lastArgP = curArgP;
-        tm_arr[6].acc_start();
-        curArgP->taskSeq = roundNum;
-        curArgP->bamStartIdx = processedBamNum;
-        curArgP->bams = inBamBuf.GetBamArr();
-        tm_arr[6].acc_end();
-
-        tm_arr[0].acc_start();
-        Timer t1;
-        generateReadEnds(curArgP);
-        //cout << "calc read end time: " << t1.seconds_elapsed() << " s" << endl;
-        tm_arr[0].acc_end();
-
-        tm_arr[1].acc_start();
-        t1.reinit();
-        markdups(curArgP);
-        //cout << "markdups time: " << t1.seconds_elapsed() << " s" << endl;
-        tm_arr[1].acc_end();
-
-        if (!isFirstRound)
-        {
-            tm_arr[2].acc_start();
-            t1.reinit();
-            handleIntersectData(lastArgP, curArgP, &gData);
-            //cout << "intersect time: " << t1.seconds_elapsed() << " s" << endl;
-            // addTaskIdxToSet(lastArgP, &gData);
-            tm_arr[2].acc_end();
-        }
-        else
-        {
-            isFirstRound = false;
-        }
-        inBamBuf.ClearAll(); // 清理上一轮读入的数据
-        processedBamNum += readNum;
-
-        // 交换
-        auto tmp = lastArgP;
-        lastArgP = curArgP;
-        curArgP = tmp;
-        cout << "round time: " << t_round.seconds_elapsed() << endl;
-        roundNum++;
-        if (roundNum > 9){
-//            break;
-        }
-    }
-    // cout << "here" << endl;
-    tm_arr[3].acc_start();
-    // 处理剩下的全局数据
-    handleLastTask(lastArgP, &gData);
-    // cout << "here 2" << endl;
-    tm_arr[3].acc_end();
-
-    tm_arr[5].acc_end();
-    // 统计所有冗余index数量
-    int64_t dupNum = 0;
-    set<int64_t> dup;
-
-    // int taskSeq = 0;
-    // for (auto &arr : gData.dupIdxArr)
-    // {
-    //     for (auto idx : arr) {
-    //         if (dup.find(idx) != dup.end())
-    //         {
-    //             cout << "dup index: " << taskSeq << '\t' << idx << endl;
-    //         }
-    //         dup.insert(idx);
-    //     }
-    //     taskSeq++;
-    // }
-    // #include <fstream>
-    // ofstream out("tumor_dup.txt");
-    // for (auto idx : dup)
-    // {
-    //     out << idx << endl;
-    // }
-    // out.close();
-
-    for (auto &arr : gData.dupIdxArr)
-        dupNum += arr.size();
-
-    cout << "dup num     : " << dupNum << '\t' << dup.size() << endl;
-
-    cout << "calc readend: " << tm_arr[0].acc_seconds_elapsed() << endl;
-    cout << "markdup     : " << tm_arr[1].acc_seconds_elapsed() << endl;
-    cout << "handle tail : " << tm_arr[2].acc_seconds_elapsed() << endl;
-    cout << "handle last : " << tm_arr[3].acc_seconds_elapsed() << endl;
-    cout << "read bam    : " << tm_arr[4].acc_seconds_elapsed() << endl;
-    cout << "new arg     : " << tm_arr[6].acc_seconds_elapsed() << endl;
-    cout << "del arg     : " << tm_arr[7].acc_seconds_elapsed() << endl;
-    cout << "build ends  : " << tm_arr[8].acc_seconds_elapsed() << endl;
-    cout << "sort frags  : " << tm_arr[9].acc_seconds_elapsed() << endl;
-    cout << "sort pairs  : " << tm_arr[10].acc_seconds_elapsed() << endl;
-    cout << "all         : " << tm_arr[5].acc_seconds_elapsed() << endl;
-
-    Timer::log_time("serial end  ");
-
-    //for (auto i : gData.dupArr)
-    //    cout << i << endl;
-}
\ No newline at end of file
+// 串行运行mark duplicate
+void serialMarkDups();
\ No newline at end of file
diff --git a/src/sam/markdups/shared_args.h b/src/sam/markdups/shared_args.h
new file mode 100644
index 0000000..3f739ec
--- /dev/null
+++ b/src/sam/markdups/shared_args.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include <common/utils/timer.h>
+#include <common/utils/util.h>
+#include <htslib/sam.h>
+#include <htslib/thread_pool.h>
+#include <sam/utils/read_ends.h>
+#include <sam/utils/read_name_parser.h>
+
+extern Timer tm_arr[20];  // 用来测试性能
+/* 全局本地变量 */
+extern vector<ReadNameParser> g_vRnParser;  // 每个线程一个read name parser
+extern samFile *g_inBamFp;                  // 输入的bam文件
+extern sam_hdr_t *g_inBamHeader;            // 输入的bam文件头信息
+extern samFile *g_outBamFp;       // 输出文件, sam或者bam格式
+extern sam_hdr_t *g_outBamHeader;           // 输出文件的header
+
+/* 参数对象作为全局对象，免得多次作为参数传入函数中 */
+class GlobalArg;
+extern GlobalArg &g_gArg;
+class MarkDupsArg;
+extern MarkDupsArg &g_mdArg;
+class GlobalDataArg;
+extern GlobalDataArg &gData;
+class DuplicationMetrics;
+extern DuplicationMetrics &gMetrics;
\ No newline at end of file
diff --git a/src/sam/utils/read_ends.h b/src/sam/utils/read_ends.h
index 1ec6b0e..25ff0b8 100644
--- a/src/sam/utils/read_ends.h
+++ b/src/sam/utils/read_ends.h
@@ -1,5 +1,6 @@
 /*
-Description: read ends结构体主要用来标记冗余，包含一些序列的测序过程中的物理信息等
+Description: read
+ends结构体主要用来标记冗余，包含一些序列的测序过程中的物理信息等
 
 Copyright : All right reserved by ICT
 
@@ -13,18 +14,20 @@ Date : 2023/11/3
 #include <stdint.h>
 
 /**
- * Small interface that provides access to the physical location information about a cluster.
- * All values should be defaulted to -1 if unavailable.  ReadGroup and Tile should only allow
- * non-zero positive integers, x and y coordinates may be negative.
+ * Small interface that provides access to the physical location information
+ * about a cluster. All values should be defaulted to -1 if unavailable.
+ * ReadGroup and Tile should only allow non-zero positive integers, x and y
+ * coordinates may be negative.
  */
-struct PhysicalLocation
-{
+struct PhysicalLocation {
+    static const int NO_VALUE = -1;
     /**
-     * Small class that provides access to the physical location information about a cluster.
-     * All values should be defaulted to -1 if unavailable.  Tile should only allow
-     * non-zero positive integers, x and y coordinates must be non-negative.
-     * This is different from PhysicalLocationShort in that the x and y positions are ints, not shorts
-     * thus, they do not overflow within a HiSeqX tile.
+     * Small class that provides access to the physical location information
+     * about a cluster. All values should be defaulted to -1 if unavailable.
+     * Tile should only allow non-zero positive integers, x and y coordinates
+     * must be non-negative. This is different from PhysicalLocationShort in
+     * that the x and y positions are ints, not shorts thus, they do not
+     * overflow within a HiSeqX tile.
      */
     int16_t tile = -1;
     int32_t x = -1;
@@ -32,28 +35,33 @@ struct PhysicalLocation
 };
 
 /* 包含了所有read ends信息，如picard里边的 ReadEndsForMarkDuplicates*/
-struct ReadEnds : PhysicalLocation
-{
+struct ReadEnds : PhysicalLocation {
     static const int8_t F = 0, R = 1, FF = 2, FR = 3, RR = 4, RF = 5;
     /* 保留一些bam记录中的数据 */
     bool read1FirstOfPair = true;
     /* ReadEnds中的成员变量 */
-    /** Little struct-like class to hold read pair (and fragment) end data for duplicate marking. */
+    /** Little struct-like class to hold read pair (and fragment) end data for
+     * duplicate marking. */
     // int16_t libraryId; // 没用，不考虑多样本
     int8_t orientation = -1;
     int32_t read1ReferenceIndex = -1;
     int32_t read1Coordinate = -1;
     int32_t read2ReferenceIndex = -1;
-    int32_t read2Coordinate = -1; // This field is overloaded for flow based processing as the end coordinate of read 1. (paired reads not supported)
+    // This field is overloaded for flow based processing as the end coordinate of read 1. (paired reads not supported)
+    int32_t read2Coordinate = -1;
     /* Additional information used to detect optical dupes */
-    // int16_t readGroup = -1; 一般经过比对后的bam文件只有一个read group，normal或者tumor
-    /** For optical duplicate detection the orientation matters regard to 1st or 2nd end of a mate */
+    // int16_t readGroup = -1; 一般经过比对后的bam文件只有一个read
+    // group，normal或者tumor
+    /** For optical duplicate detection the orientation matters regard to 1st or
+     * 2nd end of a mate */
     int8_t orientationForOpticalDuplicates = -1;
-    /** A *transient* flag marking this read end as being an optical duplicate. */
+    /** A *transient* flag marking this read end as being an optical duplicate.
+     */
     bool isOpticalDuplicate = false;
 
     /* ReadEndsForMarkDuplicates中的成员变量 */
-    /** Little struct-like class to hold read pair (and fragment) end data for MarkDuplicatesWithMateCigar **/
+    /** Little struct-like class to hold read pair (and fragment) end data for
+     * MarkDuplicatesWithMateCigar **/
     int16_t score = 0;
     int64_t read1IndexInFile = -1;
     int64_t read2IndexInFile = -1;
@@ -62,23 +70,22 @@ struct ReadEnds : PhysicalLocation
     /* ReadEndsForMarkDuplicatesWithBarcodes中的成员变量 (好像用不到) */
     // int32_t barcode = 0; // primary barcode for this read (and pair)
     // int32_t readOneBarcode = 0; // read one barcode, 0 if not present
-    // int32_t readTwoBarcode = 0; // read two barcode, 0 if not present or not paired
+    // int32_t readTwoBarcode = 0; // read two barcode, 0 if not present or not
+    // paired
 
     /* zzh增加的成员变量 */
-    int64_t posKey = -1; // 根据位置信息生成的关键字 return (int64_t)tid << MAX_CONTIG_LEN_SHIFT | (int64_t)pos;
+    int64_t posKey = -1;  // 根据位置信息生成的关键字 return (int64_t)tid <<
+                          // MAX_CONTIG_LEN_SHIFT | (int64_t)pos;
 
     /* 根据pairend read的比对方向，来确定整体的比对方向 */
-    static int8_t GetOrientationByte(bool read1NegativeStrand, bool read2NegativeStrand)
-    {
-        if (read1NegativeStrand)
-        {
+    static int8_t GetOrientationByte(bool read1NegativeStrand,
+                                     bool read2NegativeStrand) {
+        if (read1NegativeStrand) {
             if (read2NegativeStrand)
                 return RR;
             else
                 return RF;
-        }
-        else
-        {
+        } else {
             if (read2NegativeStrand)
                 return FR;
             else
@@ -87,47 +94,38 @@ struct ReadEnds : PhysicalLocation
     }
 
     /* 比较两个readends是否一样（有个冗余） */
-    static bool AreComparableForDuplicates(const ReadEnds &lhs, const ReadEnds &rhs, bool compareRead2)
-    {
+    static bool AreComparableForDuplicates(const ReadEnds &lhs,
+                                           const ReadEnds &rhs,
+                                           bool compareRead2) {
         bool areComparable = true;
         areComparable = lhs.read1ReferenceIndex == rhs.read1ReferenceIndex &&
                         lhs.read1Coordinate == rhs.read1Coordinate &&
                         lhs.orientation == rhs.orientation;
-        if (areComparable && compareRead2)
-        {
-            areComparable = lhs.read2ReferenceIndex == rhs.read2ReferenceIndex &&
-                            lhs.read2Coordinate == rhs.read2Coordinate;
+        if (areComparable && compareRead2) {
+            areComparable =
+                lhs.read2ReferenceIndex == rhs.read2ReferenceIndex &&
+                lhs.read2Coordinate == rhs.read2Coordinate;
         }
         return areComparable;
     }
 
     /* 比对方向是否正向 */
-    bool IsPositiveStrand() const
-    {
-        return orientation == F;
-    }
+    bool IsPositiveStrand() const { return orientation == F; }
 
     /* pairend是否合适的比对上了 */
-    bool IsPaired() const
-    {
-        return read2ReferenceIndex != -1;
-    }
+    bool IsPaired() const { return read2ReferenceIndex != -1; }
 
-    bool IsNegativeStrand() const
-    {
-        return orientation == R;
-    }
+    bool IsNegativeStrand() const { return orientation == R; }
 
     // 对于相交的数据进行比对，a是否小于b，根据AreComparableForDuplicates函数得来
-    static inline bool ReadLittleThan(const ReadEnds &a, const ReadEnds &b, bool compareRead2 = false)
-    {
+    static inline bool ReadLittleThan(const ReadEnds &a, const ReadEnds &b,
+                                      bool compareRead2 = false) {
         int comp = a.read1ReferenceIndex - b.read1ReferenceIndex;
         if (comp == 0)
             comp = a.read1Coordinate - b.read1Coordinate;
         if (comp == 0)
             comp = a.orientation - b.orientation;
-        if (compareRead2)
-        {
+        if (compareRead2) {
             if (comp == 0)
                 comp = a.read2ReferenceIndex - b.read2ReferenceIndex;
             if (comp == 0)
@@ -137,14 +135,12 @@ struct ReadEnds : PhysicalLocation
     }
 
     // 找某一个位置的所有readend时需要
-    static bool pairsLittleThan(const ReadEnds &lhs, const ReadEnds &rhs)
-    {
+    static bool PairsLittleThan(const ReadEnds &lhs, const ReadEnds &rhs) {
         return ReadLittleThan(lhs, rhs, true);
     }
 
     // 比较函数
-    bool operator < (const ReadEnds &o) const
-    {
+    bool operator<(const ReadEnds &o) const {
         int comp = read1ReferenceIndex - o.read1ReferenceIndex;
         if (comp == 0)
             comp = read1Coordinate - o.read1Coordinate;
diff --git a/src/sam/utils/read_name_parser.h b/src/sam/utils/read_name_parser.h
index 34dfcb0..a7572a7 100644
--- a/src/sam/utils/read_name_parser.h
+++ b/src/sam/utils/read_name_parser.h
@@ -9,11 +9,12 @@ Date : 2023/11/6
 #ifndef READ_NAME_PARSER_H_
 #define READ_NAME_PARSER_H_
 
-#include "read_ends.h"
 #include <common/utils/util.h>
-
 #include <stdint.h>
+
 #include <string>
+
+#include "read_ends.h"
 // #include <regex>
 #include <boost/regex.hpp>
 
@@ -24,103 +25,107 @@ using std::string;
 
 /**
  * Provides access to the physical location information about a cluster.
- * All values should be defaulted to -1 if unavailable.  ReadGroup and Tile should only allow
- * non-zero positive integers, x and y coordinates may be negative.
- * 非线程安全
+ * All values should be defaulted to -1 if unavailable.  ReadGroup and Tile
+ * should only allow non-zero positive integers, x and y coordinates may be
+ * negative. 非线程安全
  */
-struct ReadNameParser
-{
+struct ReadNameParser {
     /**
-     * The read name regular expression (regex) is used to extract three pieces of information from the read name: tile, x location,
-     * and y location.  Any read name regex should parse the read name to produce these and only these values.  An example regex is:
+     * The read name regular expression (regex) is used to extract three pieces
+     * of information from the read name: tile, x location, and y location.  Any
+     * read name regex should parse the read name to produce these and only
+     * these values.  An example regex is:
      *  (?:.*:)?([0-9]+)[^:]*:([0-9]+)[^:]*:([0-9]+)[^:]*$
-     * which assumes that fields in the read name are delimited by ':' and the last three fields correspond to the tile, x and y locations,
-     * ignoring any trailing non-digit characters.
+     * which assumes that fields in the read name are delimited by ':' and the
+     * last three fields correspond to the tile, x and y locations, ignoring any
+     * trailing non-digit characters.
      *
-     * The default regex is optimized for fast parsing (see {@link #getLastThreeFields(String, char, int[])}) by searching for the last
-     * three fields, ignoring any trailing non-digit characters, assuming the delimiter ':'.  This should consider correctly read names
-     * where we have 5 or 7 field with the last three fields being tile/x/y, as is the case for the majority of read names produced by
-     * Illumina technology.
+     * The default regex is optimized for fast parsing (see {@link
+     * #getLastThreeFields(String, char, int[])}) by searching for the last
+     * three fields, ignoring any trailing non-digit characters, assuming the
+     * delimiter ':'.  This should consider correctly read names where we have 5
+     * or 7 field with the last three fields being tile/x/y, as is the case for
+     * the majority of read names produced by Illumina technology.
      */
-    const string DEFAULT_READ_NAME_REGEX = "(?:.*:)?([0-9]+)[^:]*:([0-9]+)[^:]*:([0-9]+)[^:]*$";
+    const string DEFAULT_READ_NAME_REGEX =
+        "(?:.*:)?([0-9]+)[^:]*:([0-9]+)[^:]*:([0-9]+)[^:]*$";
 
     string readNameStored = "";
     PhysicalLocation physicalLocationStored;
-    int tmpLocationFields[3];               // for optimization of addLocationInformation
-    bool useOptimizedDefaultParsing = true; // was the regex default?
+    int tmpLocationFields[3];  // for optimization of addLocationInformation
+    bool useOptimizedDefaultParsing = true;  // was the regex default?
     string readNameRegex = DEFAULT_READ_NAME_REGEX;
     regex readNamePattern;
     bool warnedAboutRegexNotMatching = true;
 
     ReadNameParser() : ReadNameParser(DEFAULT_READ_NAME_REGEX) {}
-    ReadNameParser(const string &strReadNameRegex) : ReadNameParser(strReadNameRegex, true) {}
-    ReadNameParser(const string &strReadNameRegex, bool isWarn)
-    {
+    ReadNameParser(const string &strReadNameRegex)
+        : ReadNameParser(strReadNameRegex, true) {}
+    ReadNameParser(const string &strReadNameRegex, bool isWarn) {
         readNameRegex = strReadNameRegex;
         if (strReadNameRegex == DEFAULT_READ_NAME_REGEX)
             useOptimizedDefaultParsing = true;
         else
             useOptimizedDefaultParsing = false;
-        readNamePattern = boost::regex(strReadNameRegex, boost::regex_constants::optimize);
+        readNamePattern =
+            boost::regex(strReadNameRegex, boost::regex_constants::optimize);
         warnedAboutRegexNotMatching = isWarn;
     }
 
     /* 重新设置readNameRegex */
-    void SetReadNameRegex(const string &strReadNameRegex)
-    {
+    void SetReadNameRegex(const string &strReadNameRegex) {
         readNameRegex = strReadNameRegex;
         if (strReadNameRegex == DEFAULT_READ_NAME_REGEX)
             useOptimizedDefaultParsing = true;
         else
             useOptimizedDefaultParsing = false;
-        readNamePattern = boost::regex(strReadNameRegex, boost::regex_constants::optimize);
+        readNamePattern =
+            boost::regex(strReadNameRegex, boost::regex_constants::optimize);
         // readNamePattern = strReadNameRegex;
     }
 
     /* 添加测序时候的tile x y 信息 */
-    bool AddLocationInformation(const string &readName, PhysicalLocation *loc)
-    {
-        if (!(readName == readNameStored))
-        {
-            if (ReadLocationInformation(readName, loc))
-            {
+    bool AddLocationInformation(const string &readName, PhysicalLocation *loc) {
+        if (!(readName == readNameStored)) {
+            if (ReadLocationInformation(readName, loc)) {
                 readNameStored = readName;
                 physicalLocationStored = *loc;
                 return true;
             }
             // return false if read name cannot be parsed
             return false;
-        }
-        else
-        {
+        } else {
             *loc = physicalLocationStored;
             return true;
         }
     }
 
     /**
-     * Method used to extract tile/x/y from the read name and add it to the PhysicalLocationShort so that it
-     * can be used later to determine optical duplication
+     * Method used to extract tile/x/y from the read name and add it to the
+     * PhysicalLocationShort so that it can be used later to determine optical
+     * duplication
      *
      * @param readName the name of the read/cluster
      * @param loc the object to add tile/x/y to
-     * @return true if the read name contained the information in parsable form, false otherwise
+     * @return true if the read name contained the information in parsable form,
+     * false otherwise
      */
-    bool ReadLocationInformation(const string &readName, PhysicalLocation *loc) 
-    {
+    bool ReadLocationInformation(const string &readName,
+                                 PhysicalLocation *loc) {
         try {
-            // Optimized version if using the default read name regex (== used on purpose):
-            if (useOptimizedDefaultParsing)
-            {
+            // Optimized version if using the default read name regex (== used
+            // on purpose):
+            if (useOptimizedDefaultParsing) {
                 const int fields = getLastThreeFields(readName, ':');
-                if (!(fields == 5 || fields == 7))
-                {
-                    if (warnedAboutRegexNotMatching)
-                    {
+                if (!(fields == 5 || fields == 7)) {
+                    if (warnedAboutRegexNotMatching) {
                         Warn(
-                            "Default READ_NAME_REGEX '%s' did not match read name '%s'."
-                            "You may need to specify a READ_NAME_REGEX in order to correctly identify optical duplicates.  "
-                            "Note that this message will not be emitted again even if other read names do not match the regex.",
+                            "Default READ_NAME_REGEX '%s' did not match read "
+                            "name '%s'."
+                            "You may need to specify a READ_NAME_REGEX in "
+                            "order to correctly identify optical duplicates.  "
+                            "Note that this message will not be emitted again "
+                            "even if other read names do not match the regex.",
                             readNameRegex.c_str(), readName.c_str());
                         warnedAboutRegexNotMatching = false;
                     }
@@ -130,13 +135,9 @@ struct ReadNameParser
                 loc->x = tmpLocationFields[1];
                 loc->y = tmpLocationFields[2];
                 return true;
-            }
-            else if (readNameRegex.empty())
-            {
+            } else if (readNameRegex.empty()) {
                 return false;
-            }
-            else
-            {
+            } else {
                 // Standard version that will use the regex
                 cmatch m;
                 if (boost::regex_match(readName.c_str(), m, readNamePattern)) {
@@ -144,28 +145,25 @@ struct ReadNameParser
                     loc->x = std::stoi(m[2].str());
                     loc->y = std::stoi(m[3].str());
                     return true;
-                }
-                else
-                {
-                    if (warnedAboutRegexNotMatching)
-                    {
+                } else {
+                    if (warnedAboutRegexNotMatching) {
                         Warn(
                             "READ_NAME_REGEX '%s' did not match read name '%s'."
                             "Your regex may not be correct.  "
-                            "Note that this message will not be emitted again even if other read names do not match the regex.",
+                            "Note that this message will not be emitted again "
+                            "even if other read names do not match the regex.",
                             readNameRegex.c_str(), readName.c_str());
                         warnedAboutRegexNotMatching = false;
                     }
                     return false;
                 }
             }
-        }
-        catch (const std::runtime_error &e)
-        {
-            if (warnedAboutRegexNotMatching) 
-            {
+        } catch (const std::runtime_error &e) {
+            if (warnedAboutRegexNotMatching) {
                 Warn(
-                    "A field parsed out of a read name was expected to contain an integer and did not. READ_NAME_REGEX: %s; Read name: %s; Error Msg: %s",
+                    "A field parsed out of a read name was expected to contain "
+                    "an integer and did not. READ_NAME_REGEX: %s; Read name: "
+                    "%s; Error Msg: %s",
                     readNameRegex.c_str(), readName.c_str(), e.what());
                 warnedAboutRegexNotMatching = false;
             }
@@ -175,39 +173,39 @@ struct ReadNameParser
     }
 
     /**
-     * Given a string, splits the string by the delimiter, and returns the the last three fields parsed as integers.  Parsing a field
-     * considers only a sequence of digits up until the first non-digit character.  The three values are stored in the passed-in array.
+     * Given a string, splits the string by the delimiter, and returns the the
+     * last three fields parsed as integers.  Parsing a field considers only a
+     * sequence of digits up until the first non-digit character.  The three
+     * values are stored in the passed-in array.
      *
-     * @throws NumberFormatException if any of the tokens that should contain numbers do not start with parsable numbers
+     * @throws NumberFormatException if any of the tokens that should contain
+     * numbers do not start with parsable numbers
      */
-    int getLastThreeFields(const string &readName, char delim)
-    {
-        int tokensIdx = 2; // start at the last token
+    int getLastThreeFields(const string &readName, char delim) {
+        int tokensIdx = 2;  // start at the last token
         int numFields = 0;
         int i, endIdx;
         endIdx = readName.size();
         // find the last three tokens only
-        for (i = readName.size() - 1; 0 <= i && 0 <= tokensIdx; i--)
-        {
-            if (readName.at(i) == delim || 0 == i)
-            {
+        for (i = (int)readName.size() - 1; 0 <= i && 0 <= tokensIdx; i--) {
+            if (readName.at(i) == delim || 0 == i) {
                 numFields++;
                 const int startIdx = (0 == i) ? 0 : (i + 1);
-                tmpLocationFields[tokensIdx] = std::stoi(readName.substr(startIdx, endIdx - startIdx));
+                tmpLocationFields[tokensIdx] =
+                    std::stoi(readName.substr(startIdx, endIdx - startIdx));
                 tokensIdx--;
                 endIdx = i;
             }
         }
         // continue to find the # of fields
-        while (0 <= i) 
-        {
+        while (0 <= i) {
             if (readName.at(i) == delim || 0 == i)
                 numFields++;
             i--;
         }
-        if (numFields < 3)
-        {
-            tmpLocationFields[0] = tmpLocationFields[1] = tmpLocationFields[2] = -1;
+        if (numFields < 3) {
+            tmpLocationFields[0] = tmpLocationFields[1] = tmpLocationFields[2] =
+                -1;
             numFields = -1;
         }