From ab7da8a7d711db2654ca6e658ae74b02565ae9ea Mon Sep 17 00:00:00 2001
From: zzh <zzhofhust@gmail.com>
Date: Wed, 23 Apr 2025 14:59:40 +0800
Subject: [PATCH] Change some annotations

---
 src/markdup/dup_metrics.h      |   8 +-
 src/markdup/markdup.cpp        |  76 ++++++-------
 src/markdup/md_args.h          |  24 ++--
 src/markdup/md_funcs.cpp       |  14 +--
 src/markdup/md_funcs.h         |  18 +--
 src/markdup/md_pipeline.cpp    | 198 ++++++++++++++++-----------------
 src/markdup/md_pipeline.h      |  48 ++++----
 src/markdup/md_types.h         |  62 +++++------
 src/markdup/read_ends.h        |  46 ++++----
 src/markdup/read_name_parser.h |   8 +-
 src/util/bam_buf.cpp           |  74 ++++++------
 src/util/bam_buf.h             |  92 +++++++--------
 src/util/bam_wrap.h            |  80 ++++++-------
 src/util/murmur3.h             |   2 +-
 14 files changed, 375 insertions(+), 375 deletions(-)
diff --git a/src/markdup/dup_metrics.h b/src/markdup/dup_metrics.h
index 1fba5de..d072524 100644
--- a/src/markdup/dup_metrics.h
+++ b/src/markdup/dup_metrics.h
@@ -11,7 +11,7 @@ using std::string;
 using std::vector;
 
 /*
- * 标记冗余过程中的一些数据统计
+ * 
  */
 struct DuplicationMetrics {
     /**
@@ -57,15 +57,15 @@ struct DuplicationMetrics {
      */
     uint64_t ESTIMATED_LIBRARY_SIZE = 0;
 
-    // 其他的统计数据
+    // 
     vector<double> CoverageMult;
 
-    // 比如在该位置，有4个冗余，那么所有4个冗余的位置数量
+    // ，4，4
     MDMap DuplicateCountHist;
     MDMap NonOpticalDuplicateCountHist;
     MDMap OpticalDuplicatesCountHist;
 
-    // 没有冗余的位置总数量 for test
+    //  for test
     MDSet<int64_t> singletonReads;
     MDSet<int64_t> dupReads;  // for test
     MDSet<int64_t> bestReads;
diff --git a/src/markdup/markdup.cpp b/src/markdup/markdup.cpp
index 9dec53a..f7d4986 100644
--- a/src/markdup/markdup.cpp
+++ b/src/markdup/markdup.cpp
@@ -1,6 +1,6 @@
 /*
 Description:
-标记bam文件中的冗余信息，只处理按照坐标排序后的bam，且bam为单一样本数据
+bam，bam，bam
 
 Copyright : All right reserved by ICT
 
@@ -26,26 +26,26 @@ Date : 2023/10/23
 
 namespace nsgv {
 
-MarkDupsArg gMdArg;                   // 参数
-std::vector<ReadNameParser> gNameParsers;  // 每个线程一个read name parser
-samFile *gInBamFp;                    // 输入的bam文件
-sam_hdr_t *gInBamHeader;              // 输入的bam文件头信息
-samFile *gOutBamFp;                   // 输出文件, sam或者bam格式
-sam_hdr_t *gOutBamHeader;             // 输出文件的header
-DuplicationMetrics gMetrics;          // 统计信息
+MarkDupsArg gMdArg;                   // 
+std::vector<ReadNameParser> gNameParsers;  // read name parser
+samFile *gInBamFp;                    // bam
+sam_hdr_t *gInBamHeader;              // bam
+samFile *gOutBamFp;                   // , sambam
+sam_hdr_t *gOutBamHeader;             // header
+DuplicationMetrics gMetrics;          // 
 DupResult gDupRes;
 PipelineArg gPipe(&gDupRes);
 };  // namespace nsgv
 
-// 字节缓冲区
+// 
 struct ByteBuf {
     uint8_t *buf = nullptr;
-    int size = 0;      // 当前使用量
-    int capacity = 0;  // 最大容量
+    int size = 0;      // 
+    int capacity = 0;  // 
 };
 
 /*
- * 获取文件名后缀
+ * 
  */
 static string getFileExtension(const string &filename) {
     auto last_dot = filename.find_last_of('.');
@@ -58,25 +58,25 @@ static string getFileExtension(const string &filename) {
 // entrance of mark duplicates
 int MarkDuplicates() {
     PROF_START(whole_process);
-    /* 初始化一些参数和变量*/
+    /* */
     nsgv::gNameParsers.resize(nsgv::gMdArg.NUM_THREADS);
     for (auto &parser : nsgv::gNameParsers)
-        parser.SetReadNameRegex(nsgv::gMdArg.READ_NAME_REGEX);  // 用来解析read name中的tile，x，y信息
+        parser.SetReadNameRegex(nsgv::gMdArg.READ_NAME_REGEX);  // read nametile，x，y
 
-    /* 打开输入bam文件 */
+    /* bam */
     nsgv::gInBamFp = sam_open_format(nsgv::gMdArg.INPUT_FILE.c_str(), "r", nullptr);
     if (!nsgv::gInBamFp) {
         spdlog::error("[{}] load sam/bam file failed.\n", __func__);
         return -1;
     }
     hts_set_opt(nsgv::gInBamFp, HTS_OPT_BLOCK_SIZE, BAM_BLOCK_SIZE);
-    nsgv::gInBamHeader = sam_hdr_read(nsgv::gInBamFp);  // 读取header
-    // 获取样本名称(libraryId)
+    nsgv::gInBamHeader = sam_hdr_read(nsgv::gInBamFp);  // header
+    // (libraryId)
     nsgv::gMetrics.LIBRARY = sam_hdr_line_name(nsgv::gInBamHeader, "RG", 0);
 
-    /* 利用线程池对输入输出文件进行读写 */
-    htsThreadPool htsPoolRead = {NULL, 0};   // 多线程读取，创建线程池
-    htsThreadPool htsPoolWrite = {NULL, 0};  // 读写用不同的线程池
+    /*  */
+    htsThreadPool htsPoolRead = {NULL, 0};   // ，
+    htsThreadPool htsPoolWrite = {NULL, 0};  // 
     htsPoolRead.pool = hts_tpool_init(nsgv::gMdArg.NUM_THREADS);
     htsPoolWrite.pool = hts_tpool_init(nsgv::gMdArg.NUM_THREADS);
     if (!htsPoolRead.pool || !htsPoolWrite.pool) {
@@ -86,12 +86,12 @@ int MarkDuplicates() {
     }
     hts_set_opt(nsgv::gInBamFp, HTS_OPT_THREAD_POOL, &htsPoolRead);
 
-    /* 冗余检查和标记 */
+    /*  */
     PROF_START(markdup_all);
     PipelineMarkDups();
     PROF_END(gprof[GP_markdup_all], markdup_all);
 
-    /* 标记冗余, 将处理后的结果写入文件 */
+    /* ,  */
     char modeout[12] = "wb";
     sam_open_mode(modeout + 1, nsgv::gMdArg.OUTPUT_FILE.c_str(), NULL);
     nsgv::gOutBamFp = sam_open(nsgv::gMdArg.OUTPUT_FILE.c_str(), modeout);
@@ -100,14 +100,14 @@ int MarkDuplicates() {
         return -1;
     }
     nsgv::gOutBamHeader = sam_hdr_dup(nsgv::gInBamHeader);
-    // 修改输出文件的header
+    // header
     sam_hdr_add_line(nsgv::gOutBamHeader, "PG", "ID", nsgv::gMdArg.PROGRAM_RECORD_ID.c_str(), "VN", FASTDUP_VERSION,
                      "CL", nsgv::gMdArg.CLI_STR.c_str(), NULL);
 
-    // 用同样的线程数量处理输出文件
+    // 
     hts_set_opt(nsgv::gOutBamFp, HTS_OPT_BLOCK_SIZE, BAM_BLOCK_SIZE);
     hts_set_opt(nsgv::gOutBamFp, HTS_OPT_THREAD_POOL, &htsPoolWrite);
-    sam_close(nsgv::gInBamFp);  // 重新打开bam文件
+    sam_close(nsgv::gInBamFp);  // bam
     nsgv::gInBamFp = sam_open_format(nsgv::gMdArg.INPUT_FILE.c_str(), "r", nullptr);
     if (!nsgv::gInBamFp) {
         spdlog::error("[{}] load sam/bam file failed.\n", __func__);
@@ -122,8 +122,8 @@ int MarkDuplicates() {
         sam_close(nsgv::gInBamFp);
         return -1;
     }
-    // 输出index文件
-    string indexFn = nsgv::gMdArg.OUTPUT_FILE + ".bai";  // min_shift = 0 是bai格式
+    // index
+    string indexFn = nsgv::gMdArg.OUTPUT_FILE + ".bai";  // min_shift = 0 bai
     if ("sam" == getFileExtension(nsgv::gMdArg.OUTPUT_FILE) || !nsgv::gMdArg.CREATE_INDEX) {
         indexFn = "";
     }
@@ -141,7 +141,7 @@ int MarkDuplicates() {
         }
     }
 
-    // 读取输入文件，标记冗余并输出
+    // ，
     BamBufType inBuf(nsgv::gMdArg.DUPLEX_IO);
     inBuf.Init(nsgv::gInBamFp, nsgv::gInBamHeader, nsgv::gMdArg.MAX_MEM);
 
@@ -190,13 +190,13 @@ int MarkDuplicates() {
             isOpticalDup = false;
             isInDuplicateSet = false;
 
-            // 删除原来的duplicate tag
+            // duplicate tag
             if (nsgv::gMdArg.CLEAR_DT) {
                 uint8_t *oldTagVal = bam_aux_get(bw->b, nsgv::gMdArg.DUPLICATE_TYPE_TAG.c_str());
                 if (oldTagVal != NULL) bam_aux_del(bw->b, oldTagVal);
             }
 
-            // 统计信息
+            // 
             if (bw->GetReadUnmappedFlag()) {
                 ++nsgv::gMetrics.UNMAPPED_READS;
             } else if (bw->IsSecondaryOrSupplementary()) {
@@ -207,7 +207,7 @@ int MarkDuplicates() {
                 ++nsgv::gMetrics.READ_PAIRS_EXAMINED;  // will need to be divided by 2 at the end
             }
 
-            /* 判断是否冗余 */
+            /*  */
             if (bamIdx == dupIdx) {
                 ++realDupSize;  // for test
                 isDup = true;
@@ -217,7 +217,7 @@ int MarkDuplicates() {
                     duplicateSetSize = dupIdx.dupSet;
                 }
 
-                // 为了防止小内存运行的时候，有重复的dupidx，这时候dup的repIdx和dupSetSize可能会有不同
+                // ，dupidx，duprepIdxdupSetSize
                 while ((dupIdx = dupIdxQue.Pop()) == bamIdx);
                 if (opticalIdx == bamIdx)
                     isOpticalDup = true;
@@ -227,7 +227,7 @@ int MarkDuplicates() {
                         isOpticalDup = true;
                 }
 
-                // 添加冗余标识
+                // 
                 bw->SetDuplicateReadFlag(true);
 
                 uint8_t *oldTagVal = bam_aux_get(bw->b, nsgv::gMdArg.DUPLICATE_TYPE_TAG.c_str());
@@ -242,7 +242,7 @@ int MarkDuplicates() {
                                    nsgv::gMdArg.DUPLICATE_TYPE_LIBRARY.size() + 1,
                                    (const uint8_t *)nsgv::gMdArg.DUPLICATE_TYPE_LIBRARY.c_str());
 
-                // 计算统计信息
+                // 
                 if (!bw->IsSecondaryOrSupplementary() && !bw->GetReadUnmappedFlag()) {
                     // Update the duplication metrics
                     if (!bw->GetReadPairedFlag() || bw->GetMateUnmappedFlag()) {
@@ -282,7 +282,7 @@ int MarkDuplicates() {
                                    (const uint8_t *)&duplicateSetSize);
                 }
             }
-            // 每个read都要写到output，只是添加标识，或者删掉这些冗余record
+            // readoutput，，record
             ++bamIdx;
             if (isDup && nsgv::gMdArg.REMOVE_DUPLICATES) {
                 continue;
@@ -312,7 +312,7 @@ int MarkDuplicates() {
     }
     bam_destroy1(bp);
 
-    // 计算统计信息
+    // 
     nsgv::gMetrics.READ_PAIRS_EXAMINED /= 2;
     nsgv::gMetrics.READ_PAIR_DUPLICATES /= 2;
     for (auto &arr : nsgv::gDupRes.opticalDupIdxArr) nsgv::gMetrics.READ_PAIR_OPTICAL_DUPLICATES += arr.size();
@@ -329,7 +329,7 @@ int MarkDuplicates() {
     }
     calculateRoiHistogram(nsgv::gMetrics);
 
-    // 将统计信息写到文件里
+    // 
     if (!nsgv::gMdArg.METRICS_FILE.empty()) {
         ofstream ofsM(nsgv::gMdArg.METRICS_FILE);
         ofsM << "## StringHeader" << endl;
@@ -364,7 +364,7 @@ int MarkDuplicates() {
         return -1;
     }
 
-    /* 关闭文件，收尾清理 */
+    /* ， */
     sam_close(nsgv::gOutBamFp);
     sam_close(nsgv::gInBamFp);
 
diff --git a/src/markdup/md_args.h b/src/markdup/md_args.h
index 5ce449b..299fde3 100644
--- a/src/markdup/md_args.h
+++ b/src/markdup/md_args.h
@@ -1,5 +1,5 @@
 /*
-Description: Markduplicate需要用到的一些参数
+Description: Markduplicate
 
 Copyright : All right reserved by ICT
 
@@ -40,7 +40,7 @@ enum ValidationStringency {
  */
 enum DuplicateTaggingPolicy { DontTag, OpticalOnly, All };
 
-/* 排序的方式 */
+/*  */
 enum SortOrder {
     unsorted,
     queryname,
@@ -49,14 +49,14 @@ enum SortOrder {
     unknown
 };
 
-/* 计算reads分数的方式（比那个read得分更高） */
+/* reads（read） */
 enum ScoringStrategy { SUM_OF_BASE_QUALITIES, TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM };
 
-/* 索引文件的格式 （bai或者csi） */
+/*  （baicsi） */
 enum IndexFormat { BAI, CSI };
 }  // namespace nsmd
 
-/* markduplicate 需要的参数*/
+/* markduplicate */
 struct MarkDupsArg {
     string INPUT_FILE;  // input bam filename
 
@@ -64,9 +64,9 @@ struct MarkDupsArg {
 
     int NUM_THREADS = 1;
 
-    size_t MAX_MEM = ((size_t)1) << 30; //  // 最小1G
+    size_t MAX_MEM = ((size_t)1) << 30; //  // 1G
 
-    bool DUPLEX_IO = true; // 同时读写
+    bool DUPLEX_IO = true; // 
 
     /**
      * The optional attribute in SAM/BAM/CRAM files used to store the duplicate type.
@@ -156,7 +156,7 @@ struct MarkDupsArg {
               optional = true) */
     string MOLECULAR_IDENTIFIER_TAG = "";
 
-    /* 继承自 AbstractMarkDuplicatesCommandLineProgram 的参数*/
+    /*  AbstractMarkDuplicatesCommandLineProgram */
     /* "File to write duplication metrics to" */
     string METRICS_FILE;
 
@@ -196,7 +196,7 @@ struct MarkDupsArg {
               optional = true */
     vector<string> COMMENT;
 
-    /* 继承自 AbstractOpticalDuplicateFinderCommandLineProgram 的参数 */
+    /*  AbstractOpticalDuplicateFinderCommandLineProgram  */
 
     /* "MarkDuplicates can use the tile and cluster positions to estimate the rate of optical duplication " +
             "in addition to the dominant source of duplication, PCR, to provide a more accurate estimation of library
@@ -227,7 +227,7 @@ struct MarkDupsArg {
        completely disable this check, " + "set the value to -1." */
     long MAX_OPTICAL_DUPLICATE_SET_SIZE = DEFAULT_MAX_DUPLICATE_SET_SIZE;
 
-    /* 继承自 CommandLineProgram 的参数*/
+    /*  CommandLineProgram */
 
     /* "Whether to suppress job-summary info on System.err.", common = true */
     bool QUIET = false;
@@ -256,9 +256,9 @@ struct MarkDupsArg {
     /* Add PG tag to each read in a SAM or BAM (PGTagArgumentCollection)*/
     bool ADD_PG_TAG_TO_READS = true;
 
-    // 命令行字符串
+    // 
     string CLI_STR;
 
-    // 开始运行时间
+    // 
     string START_TIME;
 };
\ No newline at end of file
diff --git a/src/markdup/md_funcs.cpp b/src/markdup/md_funcs.cpp
index b62e2fe..cad85f4 100644
--- a/src/markdup/md_funcs.cpp
+++ b/src/markdup/md_funcs.cpp
@@ -30,7 +30,7 @@ extern DuplicationMetrics gMetrics;
 };
 
 /*
- * 计算read的分数
+ * read
  */
 int16_t computeDuplicateScore(BamWrap &bw) {
     int16_t score = 0;
@@ -71,7 +71,7 @@ int16_t computeDuplicateScore(BamWrap &bw) {
 
 /*
  * Builds a read ends object that represents a single read.
- * 用来表示一个read的特征结构
+ * read
  */
 void buildReadEnds(BamWrap &bw, int64_t index, ReadNameParser &rnParser, ReadEnds *pKey) {
     auto &k = *pKey;
@@ -93,11 +93,11 @@ void buildReadEnds(BamWrap &bw, int64_t index, ReadNameParser &rnParser, ReadEnd
     else
         nsgv::gMdArg.CHECK_OPTICAL_DUP = false;
     // cout << k.tile << ' ' << k.x << ' ' << k.y << endl;
-    // 计算位置key
+    // key
     k.posKey = BamWrap::bam_global_pos(k.read1ReferenceIndex, k.read1Coordinate);  // << 1 | k.orientation;
 }
 
-/* 对找到的pairend read end添加一些信息 */
+/* pairend read end */
 void modifyPairedEnds(const ReadEnds &fragEnd, ReadEnds *pPairedEnds) {
     auto &pairedEnds = *pPairedEnds;
 
@@ -202,7 +202,7 @@ static void findOpticalDuplicates(vector<const ReadEnds *> &readEndsArr, const R
             if (currentLoc == pBestRe)
                 keeperIndex = i;
             if (currentLoc->tile != ReadEnds::NO_VALUE) {
-                int key = currentLoc->tile;  // 只处理一个样本，所以只有一个read group
+                int key = currentLoc->tile;  // ，read group
                 tileRGmap[key].push_back(i);
             }
             opticalDistanceRelationGraph.addNode(i);
@@ -323,7 +323,7 @@ static int checkOpticalDuplicates(vector<const ReadEnds *> &readEndsArr, const R
 }
 
 /**
- * 记录光学原因造成的冗余
+ * 
  */
 void trackOpticalDuplicates(vector<const ReadEnds *> &readEndsArr, const ReadEnds *pBestRe) {
     bool hasFR = false, hasRF = false;
@@ -358,7 +358,7 @@ void trackOpticalDuplicates(vector<const ReadEnds *> &readEndsArr, const ReadEnd
         nOpticalDup = checkOpticalDuplicates(readEndsArr, pBestRe);
     }
 
-    // 统计信息，trackDuplicateCounts
+    // ，trackDuplicateCounts
     ++nsgv::gMetrics.DuplicateCountHist[readEndsArr.size()];
     if (readEndsArr.size() > nOpticalDup)
         ++nsgv::gMetrics.NonOpticalDuplicateCountHist[readEndsArr.size() - nOpticalDup];
diff --git a/src/markdup/md_funcs.h b/src/markdup/md_funcs.h
index c8a70cd..b3ff9c5 100644
--- a/src/markdup/md_funcs.h
+++ b/src/markdup/md_funcs.h
@@ -14,19 +14,19 @@ using std::unordered_map;
 using std::unordered_set;
 using std::vector;
 
-/* 前向声明 */
+/*  */
 class BamWrap;
 class ReadEnds;
 class ReadNameParser;
 
 /*
- * 用来检测optical duplication的graph
+ * optical duplicationgraph
  */
 template <class Node>
-struct Graph {           // 用set？
-    vector<Node> nodes;  // 图中的结点
+struct Graph {           // set？
+    vector<Node> nodes;  // 
     unordered_map<Node, int> nodeIdxMap;
-    unordered_map<int, unordered_set<int>> neighbors;  // 邻接列表
+    unordered_map<int, unordered_set<int>> neighbors;  // 
 
     int addNode(const Node &singleton) {
         int idx = -1;
@@ -98,18 +98,18 @@ struct Graph {           // 用set？
 };
 
 /*
- * 计算read的分数
+ * read
  */
 int16_t computeDuplicateScore(BamWrap &bw);
 
 /*
  * Builds a read ends object that represents a single read.
- * 用来表示一个read的特征结构
+ * read
  */
 void buildReadEnds(BamWrap &bw, int64_t index, ReadNameParser &rnParser, ReadEnds *pKey);
 
 /*
- * 对找到的pairend read end添加一些信息
+ * pairend read end
  */
 void modifyPairedEnds(const ReadEnds &fragEnd, ReadEnds *pPairedEnds);
 
@@ -118,7 +118,7 @@ void modifyPairedEnds(const ReadEnds &fragEnd, ReadEnds *pPairedEnds);
  * in fact optical duplicates, and stores the data in the instance level histogram.
  * Additionally sets the transient isOpticalDuplicate flag on each read end that is
  * identified as an optical duplicate.
- * 记录光学原因造成的冗余
+ * 
  */
 void trackOpticalDuplicates(vector<const ReadEnds *> &readEndsArr, const ReadEnds *pBestRe);
 
diff --git a/src/markdup/md_pipeline.cpp b/src/markdup/md_pipeline.cpp
index 8096d5e..5f51905 100644
--- a/src/markdup/md_pipeline.cpp
+++ b/src/markdup/md_pipeline.cpp
@@ -22,16 +22,16 @@ using namespace std;
 
 namespace nsgv {
 
-extern MarkDupsArg gMdArg;           // 用来测试性能
-extern samFile *gInBamFp;            // 输入的bam文件
-extern sam_hdr_t *gInBamHeader;      // 输入的bam文件头信息
-extern DuplicationMetrics gMetrics;  // 统计信息
+extern MarkDupsArg gMdArg;           // 
+extern samFile *gInBamFp;            // bam
+extern sam_hdr_t *gInBamHeader;      // bam
+extern DuplicationMetrics gMetrics;  // 
 extern vector<ReadNameParser> gNameParsers;
 extern DupResult gDupRes;
 extern PipelineArg gPipe;
 };  // namespace nsgv
 
-/* 处理一组pairend的readends，标记冗余, 这个函数需要串行运行，因为需要做一些统计*/
+/* pairendreadends，, ，*/
 static void markDupsForPairs(vector<const ReadEnds *> &vpRe, DPSet<DupInfo> *dupIdx, MDSet<int64_t> *opticalDupIdx,
                              DPSet<DupInfo> *repIdx, MDSet<int64_t> *notDupIdx = nullptr,
                              MDSet<int64_t> *notOpticalDupIdx = nullptr, MDSet<int64_t> *notRepIdx = nullptr) {
@@ -41,7 +41,7 @@ static void markDupsForPairs(vector<const ReadEnds *> &vpRe, DPSet<DupInfo> *dup
     int maxScore = 0;
     const ReadEnds *pBest = nullptr;
     /** All read ends should have orientation FF, FR, RF, or RR **/
-    for (auto pe : vpRe) {  // 找分数最高的readend
+    for (auto pe : vpRe) {  // readend
         if (pe->score > maxScore || pBest == nullptr) {
             maxScore = pe->score;
             pBest = pe;
@@ -53,7 +53,7 @@ static void markDupsForPairs(vector<const ReadEnds *> &vpRe, DPSet<DupInfo> *dup
         notDupIdx->insert(pBest->read2IndexInFile);
     }
 
-    if (nsgv::gMdArg.CHECK_OPTICAL_DUP) {  // 检查光学冗余
+    if (nsgv::gMdArg.CHECK_OPTICAL_DUP) {  // 
         // trackOpticalDuplicates
         vector<const ReadEnds *> prevOpticalRe;
         if (notOpticalDupIdx != nullptr) {
@@ -64,7 +64,7 @@ static void markDupsForPairs(vector<const ReadEnds *> &vpRe, DPSet<DupInfo> *dup
             }
         }
         trackOpticalDuplicates(vpRe, pBest);
-        // 由于重叠问题，可能会更新数据
+        // ，
         if (notOpticalDupIdx != nullptr) {
             for (auto pe : prevOpticalRe) {
                 if (!pe->isOpticalDuplicate) {
@@ -74,13 +74,13 @@ static void markDupsForPairs(vector<const ReadEnds *> &vpRe, DPSet<DupInfo> *dup
             }
         }
     }
-    for (auto pe : vpRe) {  // 对非best read标记冗余
-        if (pe != pBest) {  // 非best
-            dupIdx->insert(DupInfo(pe->read1IndexInFile, pBest->read1IndexInFile, (int16_t)vpRe.size()));  // 添加read1
+    for (auto pe : vpRe) {  // best read
+        if (pe != pBest) {  // best
+            dupIdx->insert(DupInfo(pe->read1IndexInFile, pBest->read1IndexInFile, (int16_t)vpRe.size()));  // read1
             if (pe->read2IndexInFile != pe->read1IndexInFile)
                 dupIdx->insert(DupInfo(pe->read2IndexInFile, pBest->read1IndexInFile, (int16_t)vpRe.size()));  // read2,
-            //  注意这里代表是read1的索引
-            //  检查是否optical dup
+            //  read1
+            //  optical dup
             if (pe->isOpticalDuplicate && opticalDupIdx != nullptr) {
                 opticalDupIdx->insert(pe->read1IndexInFile);
                 if (pe->read2IndexInFile != pe->read1IndexInFile)
@@ -88,7 +88,7 @@ static void markDupsForPairs(vector<const ReadEnds *> &vpRe, DPSet<DupInfo> *dup
             }
         }
     }
-    // 在输出的bam文件中添加tag, best作为dupset的代表
+    // bamtag, bestdupset
     if (nsgv::gMdArg.TAG_DUPLICATE_SET_MEMBERS) {
         repIdx->insert(DupInfo(pBest->read1IndexInFile, pBest->read1IndexInFile, (int16_t)vpRe.size()));
         repIdx->insert(DupInfo(pBest->read2IndexInFile, pBest->read1IndexInFile, (int16_t)vpRe.size()));
@@ -104,7 +104,7 @@ static void markDupsForPairs(vector<const ReadEnds *> &vpRe, DPSet<DupInfo> *dup
     }
 }
 
-/* 处理一组非paired的readends，标记冗余 */
+/* pairedreadends， */
 static void markDupsForFrags(vector<const ReadEnds *> &vpRe, bool containsPairs, DPSet<DupInfo> *dupIdx,
                              MDSet<int64_t> *notDupIdx = nullptr) {
     if (containsPairs) {
@@ -133,28 +133,28 @@ static void markDupsForFrags(vector<const ReadEnds *> &vpRe, bool containsPairs,
     }
 }
 
-/* 找到与readend pos相等的所有readend */
+/* readend posreadend */
 static void getEqualRE(const ReadEnds &re, vector<ReadEnds> &src, vector<ReadEnds> *dst) {
-    auto range = std::equal_range(src.begin(), src.end(), re, ReadEnds::CorLittleThan);  // 只比对位点
+    auto range = std::equal_range(src.begin(), src.end(), re, ReadEnds::CorLittleThan);  // 
     dst->insert(dst->end(), range.first, range.second);
 }
 
 #define LOWER_BOUND(tid, nthread, ndata) ((tid) * (ndata) / (nthread))
 #define UPPER_BOUND(tid, nthread, ndata) ((tid + 1) * (ndata) / (nthread))
 
-/* 处理pairs */
+/* pairs */
 static void processPairs(vector<ReadEnds> &readEnds, DPSet<DupInfo> *dupIdx, MDSet<int64_t> *opticalDupIdx,
                          DPSet<DupInfo> *repIdx, MDSet<int64_t> *notDupIdx = nullptr,
                          MDSet<int64_t> *notOpticalDupIdx = nullptr, MDSet<int64_t> *notRepIdx = nullptr) {
     // return;
-    vector<const ReadEnds *> vpCache;  // 有可能是冗余的reads
+    vector<const ReadEnds *> vpCache;  // reads
     const ReadEnds *pReadEnd = nullptr;
     for (auto &re : readEnds) {
-        if (pReadEnd != nullptr && ReadEnds::AreComparableForDuplicates(*pReadEnd, re, true))  // 跟前一个一样
-            vpCache.push_back(&re);                                                            // 处理一个潜在的冗余组
+        if (pReadEnd != nullptr && ReadEnds::AreComparableForDuplicates(*pReadEnd, re, true))  // 
+            vpCache.push_back(&re);                                                            // 
         else {
             markDupsForPairs(vpCache, dupIdx, opticalDupIdx, repIdx, notDupIdx, notOpticalDupIdx,
-                             notRepIdx);  // 不一样
+                             notRepIdx);  // 
             vpCache.clear();
             vpCache.push_back(&re);
             pReadEnd = &re;
@@ -163,11 +163,11 @@ static void processPairs(vector<ReadEnds> &readEnds, DPSet<DupInfo> *dupIdx, MDS
     markDupsForPairs(vpCache, dupIdx, opticalDupIdx, repIdx, notDupIdx, notOpticalDupIdx, notRepIdx);
 }
 
-/* 处理frags */
+/* frags */
 static void processFrags(vector<ReadEnds> &readEnds, DPSet<DupInfo> *dupIdx, MDSet<int64_t> *notDupIdx = nullptr) {
     bool containsPairs = false;
     bool containsFrags = false;
-    vector<const ReadEnds *> vpCache;  // 有可能是冗余的reads
+    vector<const ReadEnds *> vpCache;  // reads
     const ReadEnds *pReadEnd = nullptr;
     for (auto &re : readEnds) {
         if (pReadEnd != nullptr && ReadEnds::AreComparableForDuplicates(*pReadEnd, re, false)) {
@@ -191,7 +191,7 @@ static void processFrags(vector<ReadEnds> &readEnds, DPSet<DupInfo> *dupIdx, MDS
 }
 
 
-/* 获取交叉部分的数据 */
+/*  */
 static inline void getIntersectData(vector<ReadEnds> &leftArr, vector<ReadEnds> &rightArr, vector<ReadEnds> *dst,
                                     bool isPairCmp = false) {
     if (leftArr.empty() || rightArr.empty()) {
@@ -204,7 +204,7 @@ static inline void getIntersectData(vector<ReadEnds> &leftArr, vector<ReadEnds>
 
     while (!ReadEnds::ReadLittleThan(leftArr[leftEndIdx - leftSpan], rightArr[rightStartIdx], isPairCmp)) {
         leftSpan += 1;
-        if (leftSpan > leftEndIdx) {  // 上一个的范围被下一个全部包围了（可能会有bug，上上个也与下一个有交集呢？）
+        if (leftSpan > leftEndIdx) {  // （bug，？）
             leftSpan = leftArr.size() - 1;
             break;
         }
@@ -212,7 +212,7 @@ static inline void getIntersectData(vector<ReadEnds> &leftArr, vector<ReadEnds>
 
     while (!ReadEnds::ReadLittleThan(leftArr[leftEndIdx], rightArr[rightSpan], isPairCmp)) {
         rightSpan += 1;
-        if (rightSpan == rightArr.size() - 1)  // 同上，可能会有bug
+        if (rightSpan == rightArr.size() - 1)  // ，bug
             break;
     }
     dst->insert(dst->end(), leftArr.end() - leftSpan, leftArr.end());
@@ -223,7 +223,7 @@ static inline void getIntersectData(vector<ReadEnds> &leftArr, vector<ReadEnds>
         std::sort(dst->begin(), dst->end(), ReadEnds::FragLittleThan);
 }
 
-/* 将frags重叠部分的dup idx放进数据中 */
+/* fragsdup idx */
 static inline void refreshFragDupIdx(DPSet<DupInfo> &dupIdx, MDSet<int64_t> &notDupIdx, MarkDupDataArg *lastArg,
                                      MarkDupDataArg *curArg) {
     auto &lp = *lastArg;
@@ -259,26 +259,26 @@ static void mtGenerateReadEnds(void *data, long idx, int tid) {
     PROF_START(gen);
     size_t start_id = LOWER_BOUND(idx, nThread, bams.size());
     size_t end_id = UPPER_BOUND(idx, nThread, bams.size());
-    for (size_t i = start_id; i < end_id; ++i) {  // 循环处理每个read
+    for (size_t i = start_id; i < end_id; ++i) {  // read
         BamWrap *bw = bams[i];
         const int64_t bamIdx = bamStartIdx + i;
         if (bw->GetReadUnmappedFlag()) {
             if (bw->b->core.tid == -1)
                 // When we hit the unmapped reads with no coordinate, no reason to continue (only in coordinate sort).
                 break;
-        } else if (!bw->IsSecondaryOrSupplementary()) {  // 是主要比对
+        } else if (!bw->IsSecondaryOrSupplementary()) {  // 
             ReadEnds fragEnd;
             buildReadEnds(*bw, bamIdx, rnParser, &fragEnd);
-            frags.push_back(fragEnd);                                     // 添加进frag集合
-            if (bw->GetReadPairedFlag() && !bw->GetMateUnmappedFlag()) {  // 是pairend而且互补的read也比对上了
+            frags.push_back(fragEnd);                                     // frag
+            if (bw->GetReadPairedFlag() && !bw->GetMateUnmappedFlag()) {  // pairendread
                 string key = bw->query_name();
                 if (unpairedDic.find(key) == unpairedDic.end()) {
                     unpairedDic[key] = {taskSeq, fragEnd};
-                } else {  // 找到了pairend
+                } else {  // pairend
                     auto &pairedEnds = unpairedDic.at(key).unpairedRE;
                     modifyPairedEnds(fragEnd, &pairedEnds);
                     pairs.push_back(pairedEnds);
-                    unpairedDic.erase(key);  // 删除找到的pairend
+                    unpairedDic.erase(key);  // pairend
                 }
             }
         }
@@ -302,8 +302,8 @@ static void doGenRE(PipelineArg &pipeArg) {
     const int numThread = pipeArg.numThread;
 
     kt_for(numThread, mtGenerateReadEnds, &pipeArg, numThread);
-    // 用来在genRE阶段计算一些Sort阶段应该计算的数据，保持每个step用时更平衡
-    // 轮询每个线程中未找到匹配的read，找到匹配的
+    // genRESort，step
+    // read，
     genREData.unpairedDic.clear();
     vector<ReadEnds> &pairs = genREData.pairsArr[numThread];
     pairs.clear();
@@ -316,11 +316,11 @@ static void doGenRE(PipelineArg &pipeArg) {
             const ReadEnds &fragEnd = val.second.unpairedRE;
             if (genREData.unpairedDic.find(key) == genREData.unpairedDic.end()) {
                 genREData.unpairedDic[key] = {readData.taskSeq, fragEnd};
-            } else {  // 找到了pairend
+            } else {  // pairend
                 auto &pairedEnds = genREData.unpairedDic.at(key).unpairedRE;
                 modifyPairedEnds(fragEnd, &pairedEnds);
                 pairs.push_back(pairedEnds);
-                genREData.unpairedDic.erase(key);  // 删除找到的pairend
+                genREData.unpairedDic.erase(key);  // pairend
             }
         }
     }
@@ -367,11 +367,11 @@ static void doMarkDup(PipelineArg &pipeArg) {
     sortData.dataPtr = tmpPtr;
 
     SortMarkData &smd = *(SortMarkData *)mdData.dataPtr;
-    //  先处理 pair
+    //   pair
     PROF_START(markdup_pair);
     processPairs(smd.pairs, &mdData.pairDupIdx, &mdData.pairOpticalDupIdx, &mdData.pairRepIdx);
     PROF_END(gprof[GP_markdup_pair], markdup_pair);
-    // 再处理frag
+    // frag
     PROF_START(markdup_frag);
     processFrags(smd.frags, &mdData.fragDupIdx);
     PROF_END(gprof[GP_markdup_frag], markdup_frag);
@@ -418,7 +418,7 @@ static void refreshMarkDupData(DPSet<DupInfo> &dupIdx, MDSet<int64_t> &opticalDu
     refreshNotDupIdx(repIdx, p.pairRepIdx);
 }
 
-// 处理相邻数据块之间重叠的部分
+// 
 static void processIntersectFragPairs(MarkDupData &lp, MarkDupData &cp) {
     SortMarkData &lsm = *(SortMarkData *)lp.dataPtr;
     SortMarkData &csm = *(SortMarkData *)cp.dataPtr;
@@ -430,7 +430,7 @@ static void processIntersectFragPairs(MarkDupData &lp, MarkDupData &cp) {
     MDSet<int64_t> notOpticalDupIdx;
     MDSet<int64_t> notDupIdx;
     MDSet<int64_t> notRepIdx;
-    // 先处理重叠的frags
+    // frags
     getIntersectData(lsm.frags, csm.frags, &reArr);
     processFrags(reArr, &dupIdx, &notDupIdx);
     refreshDupIdx(dupIdx, lp.fragDupIdx);
@@ -438,34 +438,34 @@ static void processIntersectFragPairs(MarkDupData &lp, MarkDupData &cp) {
     refreshNotDupIdx(notDupIdx, lp.fragDupIdx);
     refreshNotDupIdx(notDupIdx, cp.fragDupIdx);
 
-    // 再处理重叠的pairs
+    // pairs
     reArr.clear();
     dupIdx.clear();
     notDupIdx.clear();
 
     getIntersectData(lsm.pairs, csm.pairs, &reArr, true);
     processPairs(reArr, &dupIdx, &opticalDupIdx, &repIdx, &notDupIdx, &notOpticalDupIdx, &notRepIdx);
-    refreshMarkDupData(dupIdx, opticalDupIdx, repIdx, notDupIdx, notOpticalDupIdx, notRepIdx, cp, lp); // 放在cp里，因为后面global里可能有相同的dup，防止多次出现
+    refreshMarkDupData(dupIdx, opticalDupIdx, repIdx, notDupIdx, notOpticalDupIdx, notRepIdx, cp, lp); // cp，globaldup，
 }
 
-// 在相邻的数据块之间寻找未匹配的readends, 找到匹配的放到lp里
+// readends, lp
 static void findUnpairedInDatas(MarkDupData &lp, MarkDupData &cp) {
     auto &interPairedData = lp.ckeyReadEndsMap;
     SortMarkData &lsm = *(SortMarkData *)lp.dataPtr;
     SortMarkData &csm = *(SortMarkData *)cp.dataPtr;
-    for (auto itr = lsm.unpairedDic.begin(); itr != lsm.unpairedDic.end(); ) {  // 遍历上一个任务中的每个未匹配的read
+    for (auto itr = lsm.unpairedDic.begin(); itr != lsm.unpairedDic.end(); ) {  // read
         auto &lastUnpair = *itr;
         auto &readName = lastUnpair.first;
         auto &lastUnpairInfo = lastUnpair.second;
-        auto lastRE = lastUnpairInfo.unpairedRE;                        // 未匹配的read end
-        if (csm.unpairedDic.find(readName) != csm.unpairedDic.end()) {  // 在当前这个任务里找到了这个未匹配的read
+        auto lastRE = lastUnpairInfo.unpairedRE;                        // read end
+        if (csm.unpairedDic.find(readName) != csm.unpairedDic.end()) {  // read
             auto &curUnpairInfo = csm.unpairedDic[readName];
             auto &curRE = curUnpairInfo.unpairedRE;
-            modifyPairedEnds(curRE, &lastRE); // lastRE当做找到匹配后，完整的ReadEnds
+            modifyPairedEnds(curRE, &lastRE); // lastRE，ReadEnds
             CalcKey ck(lastRE);
             auto &pairArr = interPairedData[ck];
             pairArr.push_back(lastRE);
-            // 从dict中清除配对后的readends
+            // dictreadends
             csm.unpairedDic.erase(readName);
             itr = lsm.unpairedDic.erase(itr);
         } else {
@@ -474,23 +474,23 @@ static void findUnpairedInDatas(MarkDupData &lp, MarkDupData &cp) {
     }
 }
 
-// 在global和lp中寻找未匹配的readends, 找到匹配的放到global里
+// globallpreadends, global
 static void findUnpairedInGlobal(IntersectData &g, MarkDupData &lp) {
     auto &interPairedData = g.ckeyReadEndsMap;
     SortMarkData &lsm = *(SortMarkData *)lp.dataPtr;
-    for (auto itr = lsm.unpairedDic.begin(); itr != lsm.unpairedDic.end();) {  // 遍历上一个任务中的每个未匹配的read
+    for (auto itr = lsm.unpairedDic.begin(); itr != lsm.unpairedDic.end();) {  // read
         auto &lastUnpair = *itr;
         auto &readName = lastUnpair.first;
         auto &lastUnpairInfo = lastUnpair.second;
-        auto lastRE = lastUnpairInfo.unpairedRE;                    // 未匹配的read end
-        if (g.unpairedDic.find(readName) != g.unpairedDic.end()) {  // 在global里找到了这个未匹配的read
+        auto lastRE = lastUnpairInfo.unpairedRE;                    // read end
+        if (g.unpairedDic.find(readName) != g.unpairedDic.end()) {  // globalread
             auto &gUnpairInfo = g.unpairedDic[readName];
             auto &gRE = gUnpairInfo.unpairedRE;
-            modifyPairedEnds(lastRE, &gRE);  // gRE当做找到匹配后，完整的ReadEnds
+            modifyPairedEnds(lastRE, &gRE);  // gRE，ReadEnds
             CalcKey ck(gRE);
             auto &pairArr = interPairedData[ck];
             pairArr.push_back(gRE);
-            // 从dict中清除配对后的readends
+            // dictreadends
             g.unpairedDic.erase(readName);
             itr = lsm.unpairedDic.erase(itr);
         } else {
@@ -528,10 +528,10 @@ static void doIntersect(PipelineArg &pipeArg) {
     SortMarkData &lsm = *(SortMarkData *)lp.dataPtr;
     SortMarkData &csm = *(SortMarkData *)cp.dataPtr;
 
-    // 处理相邻数据块之间重叠的部分
+    // 
     processIntersectFragPairs(lp, cp);
 
-    // 检查确保lp和np之间没有数据交叉
+    // lpnp
     int64_t lastLeft = INT64_MIN, lastRight = INT64_MAX, curLeft = INT64_MAX, curRight = INT64_MAX;
     if (lsm.pairs.size() > 0) {
         lastLeft = lsm.frags[0].Left();
@@ -549,21 +549,21 @@ static void doIntersect(PipelineArg &pipeArg) {
     }
     g.rightPos = lastRight;
 
-    findUnpairedInDatas(lp, cp);  // 找到的匹配放到lp里
-    findUnpairedInGlobal(g, cp);  // 把cp中未匹配的放到global里保存
+    findUnpairedInDatas(lp, cp);  // lp
+    findUnpairedInGlobal(g, cp);  // cpglobal
 
-    // 处理lp中的新找到的匹配
+    // lp
     TaskSeqDupInfo t;
     for (auto itr = lp.ckeyReadEndsMap.begin(); itr != lp.ckeyReadEndsMap.end();) {
         auto &ckVal = *itr;
         auto &ck = ckVal.first;
         auto &pairArr = ckVal.second;
-        getEqualRE(pairArr[0], lsm.pairs, &pairArr); // 如果不在计算范围内，会放在global里
-        if (ck.Right() <= lastRight) { // 必须在当前数据块的范围内, 才进行处理
-            if (ck.Left() >= curLeft) {  // 在交叉的范围内才去加上这些在cp中的数据
+        getEqualRE(pairArr[0], lsm.pairs, &pairArr); // ，global
+        if (ck.Right() <= lastRight) { // , 
+            if (ck.Left() >= curLeft) {  // cp
                 getEqualRE(pairArr[0], csm.pairs, &pairArr);
             }
-            // 在global里找一找ck
+            // globalck
             auto gitr = g.ckeyReadEndsMap.find(ck);
             if (gitr != g.ckeyReadEndsMap.end()) {
                 auto &gPairArr = gitr->second;
@@ -578,7 +578,7 @@ static void doIntersect(PipelineArg &pipeArg) {
         }
     }
 
-    // 处理找到匹配的global数据
+    // global
     for (auto itr = g.ckeyReadEndsMap.begin(); itr != g.ckeyReadEndsMap.end();) {
         auto &ckVal = *itr;
         auto &ck = ckVal.first;
@@ -586,7 +586,7 @@ static void doIntersect(PipelineArg &pipeArg) {
         if (ck.Left() >= lastLeft) {
             getEqualRE(pairArr[0], lsm.pairs, &pairArr);
         }
-        if (ck.Right() <= lastRight) {  // 只有在这个范围内，对应位点的所有reads才完全都包含了
+        if (ck.Right() <= lastRight) {  // ，reads
             sort(pairArr.begin(), pairArr.end(), ReadEnds::PairLittleThan);
             processPairs(pairArr, &t.dupIdx, &t.opticalDupIdx, &t.repIdx, &t.notDupIdx, &t.notOpticalDupIdx, &t.notRepIdx);
             itr = g.ckeyReadEndsMap.erase(itr);
@@ -595,16 +595,16 @@ static void doIntersect(PipelineArg &pipeArg) {
         }
     }
 
-    // 剩余的在lp中没处理的放到global里
+    // lpglobal
     for (auto &ckVal : lp.ckeyReadEndsMap) {
         auto &pairArr = g.ckeyReadEndsMap[ckVal.first];
         pairArr.insert(pairArr.end(), ckVal.second.begin(), ckVal.second.end());
 
     }
     lp.ckeyReadEndsMap.clear();
-    // 更新一下冗余结果
+    // 
     refreshMarkDupData(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx, lp, cp);
-    // 处理g中新找到的匹配
+    // g
     putDupinfoToGlobal(g, lp);
 
     for (auto &unPair : lsm.unpairedDic) {
@@ -620,16 +620,16 @@ static void *pipeRead(void *data) {
     while (1) {
         PROF_START(read_wait);
         yarn::POSSESS(pipeArg.readSig);
-        yarn::WAIT_FOR(pipeArg.readSig, yarn::NOT_TO_BE, 1);  // 只有一个坑，因为在bambuf内部支持异步读取
+        yarn::WAIT_FOR(pipeArg.readSig, yarn::NOT_TO_BE, 1);  // ，bambuf
         PROF_END(gprof[GP_read_wait], read_wait);
         size_t readNum = 0;
         PROF_START(read);
         if (inBamBuf.ReadStat() >= 0)
-            readNum = inBamBuf.ReadBam();  // 读入新一轮的数据
+            readNum = inBamBuf.ReadBam();  // 
         PROF_END(gprof[GP_read], read);
         if (readNum < 1) {
             pipeArg.readFinish = 1;
-            yarn::TWIST(pipeArg.readSig, yarn::BY, 1);  // 读入了一轮数据
+            yarn::TWIST(pipeArg.readSig, yarn::BY, 1);  // 
             break;
         }
         spdlog::info("{} reads processed in {} round", readNum, pipeArg.readOrder);
@@ -640,9 +640,9 @@ static void *pipeRead(void *data) {
         pipeArg.readData.taskSeq = pipeArg.readOrder;
 
         readNumSum += readNum;
-        inBamBuf.ClearAll();                        // 清理上一轮读入的数据
+        inBamBuf.ClearAll();                        // 
         pipeArg.readOrder += 1;                     // for next
-        yarn::TWIST(pipeArg.readSig, yarn::BY, 1);  // 读入了一轮数据
+        yarn::TWIST(pipeArg.readSig, yarn::BY, 1);  // 
     }
     spdlog::info("{} total reads processed", readNumSum);
     return 0;
@@ -659,21 +659,21 @@ static void *pipeGenRE(void *data) {
     while (1) {
         PROF_START(gen_wait);
         yarn::POSSESS(pipeArg.readSig);
-        yarn::WAIT_FOR(pipeArg.readSig, yarn::NOT_TO_BE, 0);  // 等待有数据
+        yarn::WAIT_FOR(pipeArg.readSig, yarn::NOT_TO_BE, 0);  // 
         yarn::POSSESS(pipeArg.genRESig);
         PROF_END(gprof[GP_gen_wait], gen_wait);
 
-        yarn::WAIT_FOR(pipeArg.genRESig, yarn::NOT_TO_BE, pipeArg.GENBUFNUM);  // 有BUFNUM个坑
-        yarn::RELEASE(pipeArg.genRESig);                                       // 因为有不止一个位置，所以要释放
+        yarn::WAIT_FOR(pipeArg.genRESig, yarn::NOT_TO_BE, pipeArg.GENBUFNUM);  // BUFNUM
+        yarn::RELEASE(pipeArg.genRESig);                                       // ，
 
-        if (pipeArg.readFinish) { // 读取结束的时候，没有新的数据需要处理了
+        if (pipeArg.readFinish) { // ，
             yarn::POSSESS(pipeArg.genRESig);
             pipeArg.genREFinish = 1;
             yarn::TWIST(pipeArg.genRESig, yarn::BY, 1);
             yarn::TWIST(pipeArg.readSig, yarn::BY, -1);
             break;
         }
-        /* 处理bam，生成readends */
+        /* bam，readends */
         PROF_START(gen);
         doGenRE(pipeArg);
         PROF_END(gprof[GP_gen], gen);
@@ -681,7 +681,7 @@ static void *pipeGenRE(void *data) {
         yarn::POSSESS(pipeArg.genRESig);
         pipeArg.genREOrder += 1;
         yarn::TWIST(pipeArg.genRESig, yarn::BY, 1);
-        yarn::TWIST(pipeArg.readSig, yarn::BY, -1);  // 使用了一次读入的数据
+        yarn::TWIST(pipeArg.readSig, yarn::BY, -1);  // 
     }
     return 0;
 }
@@ -691,19 +691,19 @@ static void *pipeSort(void *data) {
     while (1) {
         PROF_START(sort_wait);
         yarn::POSSESS(pipeArg.genRESig);
-        yarn::WAIT_FOR(pipeArg.genRESig, yarn::NOT_TO_BE, 0);  // 等待有数据
+        yarn::WAIT_FOR(pipeArg.genRESig, yarn::NOT_TO_BE, 0);  // 
         yarn::RELEASE(pipeArg.genRESig);
         PROF_END(gprof[GP_sort_wait], sort_wait);
 
         yarn::POSSESS(pipeArg.sortSig);
-        yarn::WAIT_FOR(pipeArg.sortSig, yarn::NOT_TO_BE, pipeArg.SORTBUFNUM);  // 有BUFNUM个位置
+        yarn::WAIT_FOR(pipeArg.sortSig, yarn::NOT_TO_BE, pipeArg.SORTBUFNUM);  // BUFNUM
         yarn::RELEASE(pipeArg.sortSig);
 
         if (pipeArg.genREFinish) {
-            // 处理完剩余数据
+            // 
             while (pipeArg.sortOrder < pipeArg.genREOrder) {
                 yarn::POSSESS(pipeArg.sortSig);
-                yarn::WAIT_FOR(pipeArg.sortSig, yarn::NOT_TO_BE, pipeArg.SORTBUFNUM);  // 有BUFNUM个位置
+                yarn::WAIT_FOR(pipeArg.sortSig, yarn::NOT_TO_BE, pipeArg.SORTBUFNUM);  // BUFNUM
                 yarn::RELEASE(pipeArg.sortSig);
 
                 PROF_START(sort);
@@ -719,7 +719,7 @@ static void *pipeSort(void *data) {
             yarn::TWIST(pipeArg.sortSig, yarn::BY, 1);
             break;
         }
-        /* 排序 readends */
+        /*  readends */
         PROF_START(sort);
         doSort(pipeArg);
         PROF_END(gprof[GP_sort], sort);
@@ -739,7 +739,7 @@ static void *pipeMarkDup(void *data) {
     while (1) {
         PROF_START(markdup_wait);
         yarn::POSSESS(pipeArg.sortSig);
-        yarn::WAIT_FOR(pipeArg.sortSig, yarn::NOT_TO_BE, 0);  // 等待有数据
+        yarn::WAIT_FOR(pipeArg.sortSig, yarn::NOT_TO_BE, 0);  // 
         yarn::RELEASE(pipeArg.sortSig);
         PROF_END(gprof[GP_markdup_wait], markdup_wait);
 
@@ -748,7 +748,7 @@ static void *pipeMarkDup(void *data) {
         yarn::RELEASE(pipeArg.markDupSig);
 
         if (pipeArg.sortFinish) {
-            // 应该还得处理剩余的数据
+            // 
             while (pipeArg.markDupOrder < pipeArg.sortOrder) {
                 yarn::POSSESS(pipeArg.markDupSig);
                 yarn::WAIT_FOR(pipeArg.markDupSig, yarn::NOT_TO_BE, pipeArg.MARKBUFNUM);
@@ -767,7 +767,7 @@ static void *pipeMarkDup(void *data) {
             yarn::TWIST(pipeArg.markDupSig, yarn::TO, 2 + pipeArg.MARKBUFNUM);
             break;
         }
-        /* 冗余检测 readends */
+        /*  readends */
         PROF_START(markdup);
         doMarkDup(pipeArg);
         PROF_END(gprof[GP_markdup], markdup);
@@ -787,7 +787,7 @@ static void *pipeIntersect(void *data) {
     while (1) {
         PROF_START(intersect_wait);
         yarn::POSSESS(pipeArg.markDupSig);
-        yarn::WAIT_FOR(pipeArg.markDupSig, yarn::TO_BE_MORE_THAN, kInitIntersectOrder);  // 等待有数据
+        yarn::WAIT_FOR(pipeArg.markDupSig, yarn::TO_BE_MORE_THAN, kInitIntersectOrder);  // 
         yarn::RELEASE(pipeArg.markDupSig);
         PROF_END(gprof[GP_intersect_wait], intersect_wait);
 
@@ -801,7 +801,7 @@ static void *pipeIntersect(void *data) {
 
             break;
         }
-        /* 交叉数据处理 readends */
+        /*  readends */
         PROF_START(intersect);
         doIntersect(pipeArg);
         PROF_END(gprof[GP_intersect], intersect);
@@ -814,7 +814,7 @@ static void *pipeIntersect(void *data) {
     return 0;
 }
 
-/* 当所有任务结束后，global data里还有未处理的数据 */
+/* ，global data */
 static void processLastData(PipelineArg &pipeArg) {
     IntersectData &g = pipeArg.intersectData;
     MarkDupData &lp = pipeArg.markDupData[(pipeArg.intersectOrder - 1) % pipeArg.MARKBUFNUM];
@@ -823,7 +823,7 @@ static void processLastData(PipelineArg &pipeArg) {
     if (lsm.pairs.size() > 0) {
         lastLeft = lsm.frags[0].Left();
     }
-    // 处理找到匹配的global数据
+    // global
     TaskSeqDupInfo t;
     for (auto itr = g.ckeyReadEndsMap.begin(); itr != g.ckeyReadEndsMap.end();) {
         auto &ckVal = *itr;
@@ -836,9 +836,9 @@ static void processLastData(PipelineArg &pipeArg) {
         processPairs(pairArr, &t.dupIdx, &t.opticalDupIdx, &t.repIdx);
         itr = g.ckeyReadEndsMap.erase(itr);
     }
-    // 更新一下冗余结果
+    // 
     refreshMarkDupData(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx, lp);
-    // 处理g中新找到的匹配
+    // g
     putDupinfoToGlobal(g, lp);
 }
 
@@ -866,7 +866,7 @@ static void parallelPipeline() {
     // spdlog::info("result size : {} GB", nsgv::gDupRes.byteSize() / 1024.0 / 1024 / 1024);
 }
 
-/* 并行流水线方式处理数据，标记冗余 */
+/* ， */
 void PipelineMarkDups() {
     if (nsgv::gMdArg.NUM_THREADS > 1)
         return parallelPipeline();
@@ -879,11 +879,11 @@ void PipelineMarkDups() {
     for (int i = 0; i < pipeArg.GENBUFNUM; ++i) {
         pipeArg.genREData[i].Init(pipeArg.numThread);
     }
-    pipeArg.intersectOrder = 1;  // do intersect 从1开始
+    pipeArg.intersectOrder = 1;  // do intersect 1
     while (1) {
         size_t readNum = 0;
         if (inBamBuf.ReadStat() >= 0)
-            readNum = inBamBuf.ReadBam();  // 读入新一轮的数据
+            readNum = inBamBuf.ReadBam();  // 
         if (readNum < 1) {
             break;
         }
@@ -908,7 +908,7 @@ void PipelineMarkDups() {
         }
 
         readNumSum += readNum;
-        inBamBuf.ClearAll();     // 清理上一轮读入的数据
+        inBamBuf.ClearAll();     // 
         pipeArg.readOrder += 1;  // for next
     }
     processLastData(pipeArg);
diff --git a/src/markdup/md_pipeline.h b/src/markdup/md_pipeline.h
index 4cfd189..489e1b4 100644
--- a/src/markdup/md_pipeline.h
+++ b/src/markdup/md_pipeline.h
@@ -8,22 +8,22 @@
 
 struct ReadData {
     vector<BamWrap *> bams;   // read step output
-    int64_t bamStartIdx = 0;  // 每轮读入的bam数组，起始位置在全局bam中的索引
-    int64_t taskSeq = 0;      // 任务序号
+    int64_t bamStartIdx = 0;  // bam，bam
+    int64_t taskSeq = 0;      // 
 };
 
 struct GenREData {
-    vector<vector<ReadEnds>> pairsArr;       // 成对的reads
-    vector<vector<ReadEnds>> fragsArr;       // 暂未找到配对的reads
-    vector<UnpairedNameMap> unpairedDicArr;  // 用来寻找pair end
+    vector<vector<ReadEnds>> pairsArr;       // reads
+    vector<vector<ReadEnds>> fragsArr;       // reads
+    vector<UnpairedNameMap> unpairedDicArr;  // pair end
     void Init(int nThread) {
-        for (int i = 0; i <= nThread; ++i) {  // 比线程多一个，主要是pairs多一个，其他没用
+        for (int i = 0; i <= nThread; ++i) {  // ，pairs，
             pairsArr.push_back(vector<ReadEnds>());
             fragsArr.push_back(vector<ReadEnds>());
             unpairedDicArr.push_back(UnpairedNameMap());
         }
     }
-    UnpairedNameMap unpairedDic;         // 代替sort step中一部分计算
+    UnpairedNameMap unpairedDic;         // sort step
     size_t byteSize() {
         size_t bytes = 0;
         for (auto &v : pairsArr)
@@ -38,9 +38,9 @@ struct GenREData {
 };
 
 struct SortMarkData {
-    vector<ReadEnds> pairs;              // 成对的reads
-    vector<ReadEnds> frags;              // 暂未找到配对的reads
-    UnpairedNameMap unpairedDic;         // 用来寻找pair end
+    vector<ReadEnds> pairs;              // reads
+    vector<ReadEnds> frags;              // reads
+    UnpairedNameMap unpairedDic;         // pair end
     size_t byteSize() {
         size_t bytes = 0;
         for (auto &r : pairs) bytes += sizeof(r);
@@ -56,11 +56,11 @@ struct SortData {
 };
 
 struct MarkDupData {
-    int64_t taskSeq = 0;               // 任务序号
-    DPSet<DupInfo> pairDupIdx;         // pair的冗余read的索引
-    MDSet<int64_t> pairOpticalDupIdx;  // optical冗余read的索引
-    DPSet<DupInfo> fragDupIdx;         // frag的冗余read的索引
-    DPSet<DupInfo> pairRepIdx;         // pair的dupset代表read的索引
+    int64_t taskSeq = 0;               // 
+    DPSet<DupInfo> pairDupIdx;         // pairread
+    MDSet<int64_t> pairOpticalDupIdx;  // opticalread
+    DPSet<DupInfo> fragDupIdx;         // fragread
+    DPSet<DupInfo> pairRepIdx;         // pairdupsetread
     CkeyReadEndsMap ckeyReadEndsMap;
 
     volatile void *dataPtr;  // SortMarkData pointer
@@ -111,11 +111,11 @@ struct DupResult {
 };
 
 struct IntersectData {
-    UnpairedNameMap unpairedDic;  // 用来寻找pair end
+    UnpairedNameMap unpairedDic;  // pair end
     CkeyReadEndsMap ckeyReadEndsMap;
     int64_t rightPos = 0;
 
-    // 每个task对应一个vector
+    // taskvector
     vector<vector<DupInfo>> &dupIdxArr;
     vector<vector<int64_t>> &opticalDupIdxArr;
     vector<vector<DupInfo>> &repIdxArr;
@@ -138,7 +138,7 @@ struct IntersectData {
     }
 };
 
-// 记录流水线状态，task的序号，以及某阶段是否结束
+// ，task，
 struct PipelineArg {
     static const int GENBUFNUM = 2;
     static const int SORTBUFNUM = 2;
@@ -161,8 +161,8 @@ struct PipelineArg {
     yarn::lock_t *markDupSig;
 
     PipelineArg(DupResult *resPtr) : intersectData(resPtr) {
-        readSig = yarn::NEW_LOCK(0);   // 最大值1, 双buffer在bambuf中实现了，对调用透明
-        genRESig = yarn::NEW_LOCK(0);  // 最大值2, 双buffer
+        readSig = yarn::NEW_LOCK(0);   // 1, bufferbambuf，
+        genRESig = yarn::NEW_LOCK(0);  // 2, buffer
         sortSig = yarn::NEW_LOCK(0);
         markDupSig = yarn::NEW_LOCK(0);
         for (int i = 0; i < SORTBUFNUM; ++i) {
@@ -208,8 +208,8 @@ struct PipelineArg {
 };
 
 struct REArrIdIdx {
-    int arrId = 0;        // 数组索引
-    uint64_t arrIdx = 0;  // 数组内部当前索引
+    int arrId = 0;        // 
+    uint64_t arrIdx = 0;  // 
     const ReadEnds *pE = nullptr;
 };
 
@@ -224,7 +224,7 @@ struct REPairGreaterThan {
 
 template <typename CMP>
 struct ReadEndsHeap {
-    // 将冗余索引和他对应的task vector对应起来
+    // task vector
     vector<vector<ReadEnds>> *arr2d;
     priority_queue<REArrIdIdx, vector<REArrIdIdx>, CMP> minHeap;
     uint64_t popNum = 0;
@@ -269,5 +269,5 @@ struct ReadEndsHeap {
     }
 };
 
-// 并行运行mark duplicate
+// mark duplicate
 void PipelineMarkDups();
\ No newline at end of file
diff --git a/src/markdup/md_types.h b/src/markdup/md_types.h
index 7993773..47fdd73 100644
--- a/src/markdup/md_types.h
+++ b/src/markdup/md_types.h
@@ -19,13 +19,13 @@ using std::string;
 using std::unordered_set;
 using std::vector;
 
-/* 存放未匹配readend相同位点的所有readend */
+/* readendreadend */
 struct UnpairedREInfo {
-    int64_t taskSeq;  // 对应第几轮计算
+    int64_t taskSeq;  // 
     ReadEnds unpairedRE;
 };
 
-/* 对于一个pair数据，一个完整的计算点，包含read1的比对位置和read2的比对位置 */
+/* pair，，read1read2 */
 struct CalcKey {
     int8_t orientation = -1;
     int32_t read1ReferenceIndex = -1;
@@ -70,7 +70,7 @@ struct CalcKey {
             comp = read2ReferenceIndex - o.read2ReferenceIndex;
         if (comp == 0)
             comp = read2Coordinate - o.read2Coordinate;
-        // 需要orientation，因为要跟排序的比较方式和顺序一致
+        // orientation，
         if (comp == 0)
             comp = orientation - o.orientation;
         return comp < 0;
@@ -91,7 +91,7 @@ struct CalcKey {
             comp = read2ReferenceIndex - o.read2ReferenceIndex;
         if (comp == 0)
             comp = read2Coordinate - o.read2Coordinate;
-        // 需要orientation，因为要跟排序的比较方式和顺序一致
+        // orientation，
         if (comp == 0)
             comp = orientation - o.orientation;
         return comp < 0;
@@ -113,10 +113,10 @@ struct CalcKeyEqual {
     bool operator()(const CalcKey &o1, const CalcKey &o2) const { return o1 == o2; }
 };
 
-/* 用来记录冗余索引相关的信息 */
+/*  */
 struct DupInfo {
     int16_t dupSet = 0;  // dup set size
-    uint16_t repIdxHigh = 0;  // 这一批冗余中的非冗余read 代表的索引
+    uint16_t repIdxHigh = 0;  // read 
     uint32_t repIdxLow = 0;
     int64_t idx;
 
@@ -164,7 +164,7 @@ using CalcSet = set<T>;
 
 using ReadEndsSet = tsl::robin_set<ReadEnds, ReadEndsHash, ReadEndsEqual>;
 
-/* 当遗留数据在当前任务找到了pair read后，进行冗余计算时候存放结果的数据结构 */
+/* pair read， */
 struct TaskSeqDupInfo {
     DPSet<DupInfo> dupIdx;
     MDSet<int64_t> opticalDupIdx;
@@ -174,7 +174,7 @@ struct TaskSeqDupInfo {
     MDSet<int64_t> notRepIdx;
 };
 
-/* 保存有未匹配pair位点的信息，包括read end数组和有几个未匹配的read end */
+/* pair，read endread end */
 struct UnpairedPosInfo {
     int unpairedNum = 0;
     int64_t taskSeq;
@@ -184,33 +184,33 @@ struct UnpairedPosInfo {
 // typedef unordered_map<string, UnpairedREInfo> UnpairedNameMap;
 // typedef unordered_map<int64_t, UnpairedPosInfo> UnpairedPositionMap;
 
-typedef tsl::robin_map<string, UnpairedREInfo> UnpairedNameMap;  // 以read name为索引，保存未匹配的pair read
-//typedef map<string, UnpairedREInfo> UnpairedNameMap;  // 以read name为索引，保存未匹配的pair read
-typedef tsl::robin_map<int64_t, UnpairedPosInfo> UnpairedPositionMap;  // 以位点为索引，保存该位点包含的对应的所有read和该位点包含的剩余未匹配的read的数量
-// typedef map<CalcKey, vector<ReadEnds>> CkeyReadEndsMap;  // 以calckey为关键字，保存在相邻数据块之前找到的匹配readEnds
+typedef tsl::robin_map<string, UnpairedREInfo> UnpairedNameMap;  // read name，pair read
+//typedef map<string, UnpairedREInfo> UnpairedNameMap;  // read name，pair read
+typedef tsl::robin_map<int64_t, UnpairedPosInfo> UnpairedPositionMap;  // ，readread
+// typedef map<CalcKey, vector<ReadEnds>> CkeyReadEndsMap;  // calckey，readEnds
 typedef unordered_map<CalcKey, vector<ReadEnds>, CalcKeyHash, CalcKeyEqual>
-    CkeyReadEndsMap;  // 以calckey为关键字，保存在相邻数据块之前找到的匹配readEnds
+    CkeyReadEndsMap;  // calckey，readEnds
 // typedef tsl::robin_map<CalcKey, vector<ReadEnds>, CalcKeyHash, CalcKeyEqual> CkeyReadEndsMap;  //
-// 以calckey为关键字，保存在相邻数据块之前找到的匹配readEnds
+// calckey，readEnds
 
-/* 单线程处理冗余参数结构体 */
+/*  */
 struct MarkDupDataArg {
-    int64_t taskSeq;                     // 任务序号
-    int64_t bamStartIdx;                 // 当前vBam数组中第一个bam记录在整体bam中所处的位置
-    vector<BamWrap *> bams;              // 存放待处理的bam read
-    vector<ReadEnds> pairs;              // 成对的reads
-    vector<ReadEnds> frags;              // 暂未找到配对的reads
-    DPSet<DupInfo> pairDupIdx;           // pair的冗余read的索引
-    MDSet<int64_t> pairOpticalDupIdx;    // optical冗余read的索引
-    DPSet<DupInfo> fragDupIdx;           // frag的冗余read的索引
-    DPSet<DupInfo> pairRepIdx;           // pair的dupset代表read的索引
-    MDSet<int64_t> pairSingletonIdx;     // 某位置只有一对read的所有read pair个数
-    UnpairedNameMap unpairedDic;         // 用来寻找pair end
-    UnpairedPositionMap unpairedPosArr;  // 存放未匹配的ReadEnd对应位点的所有ReadEnd，为了避免重复存储
+    int64_t taskSeq;                     // 
+    int64_t bamStartIdx;                 // vBambambam
+    vector<BamWrap *> bams;              // bam read
+    vector<ReadEnds> pairs;              // reads
+    vector<ReadEnds> frags;              // reads
+    DPSet<DupInfo> pairDupIdx;           // pairread
+    MDSet<int64_t> pairOpticalDupIdx;    // opticalread
+    DPSet<DupInfo> fragDupIdx;           // fragread
+    DPSet<DupInfo> pairRepIdx;           // pairdupsetread
+    MDSet<int64_t> pairSingletonIdx;     // readread pair
+    UnpairedNameMap unpairedDic;         // pair end
+    UnpairedPositionMap unpairedPosArr;  // ReadEndReadEnd，
 };
 
 /*
- * 优先队列，用最小堆来实现对所有冗余索引的排序
+ * ，
  */
 template <typename T>
 struct PairArrIdIdx {
@@ -226,9 +226,9 @@ struct IdxGreaterThan {
 
 template <typename T>
 struct DupIdxQueue {
-    // 将冗余索引和他对应的task vector对应起来
+    // task vector
 
-    // 由于是多个task来查找冗余，所以每次找到的冗余index都放在一个独立的vector中，vector之间可能有重叠，所以需要用一个最小堆来维护
+    // task，indexvector，vector，
     vector<vector<T>> *dupIdx2DArr;
     priority_queue<PairArrIdIdx<T>, vector<PairArrIdIdx<T>>, IdxGreaterThan<T>> minHeap;
     uint64_t popNum = 0;
diff --git a/src/markdup/read_ends.h b/src/markdup/read_ends.h
index 866f165..fa5257a 100644
--- a/src/markdup/read_ends.h
+++ b/src/markdup/read_ends.h
@@ -1,6 +1,6 @@
 /*
 Description: read
-ends结构体主要用来标记冗余，包含一些序列的测序过程中的物理信息等
+ends，
 
 Copyright : All right reserved by ICT
 
@@ -39,15 +39,15 @@ struct PhysicalLocation {
     int16_t y = -1;
 };
 
-/* 包含了所有read ends信息，如picard里边的 ReadEndsForMarkDuplicates*/
+/* read ends，picard ReadEndsForMarkDuplicates*/
 struct ReadEnds : PhysicalLocation {
     static const int8_t F = 0, R = 1, FF = 2, FR = 3, RR = 4, RF = 5;
-    /* 保留一些bam记录中的数据 */
+    /* bam */
     bool read1FirstOfPair = true;
-    /* ReadEnds中的成员变量 */
+    /* ReadEnds */
     /** Little struct-like class to hold read pair (and fragment) end data for
      * duplicate marking. */
-    // int16_t libraryId; // 没用，不考虑多样本
+    // int16_t libraryId; // ，
     int8_t orientation = -1;
     int32_t read1ReferenceIndex = -1;
     int32_t read1Coordinate = -1;
@@ -55,8 +55,8 @@ struct ReadEnds : PhysicalLocation {
     // This field is overloaded for flow based processing as the end coordinate of read 1. (paired reads not supported)
     int32_t read2Coordinate = -1;
     /* Additional information used to detect optical dupes */
-    // int16_t readGroup = -1; 一般经过比对后的bam文件只有一个read
-    // group，normal或者tumor
+    // int16_t readGroup = -1; bamread
+    // group，normaltumor
     /** For optical duplicate detection the orientation matters regard to 1st or
      * 2nd end of a mate */
     int8_t orientationForOpticalDuplicates = -1;
@@ -64,7 +64,7 @@ struct ReadEnds : PhysicalLocation {
      */
     bool isOpticalDuplicate = false;
 
-    /* ReadEndsForMarkDuplicates中的成员变量 */
+    /* ReadEndsForMarkDuplicates */
     /** Little struct-like class to hold read pair (and fragment) end data for
      * MarkDuplicatesWithMateCigar **/
     int16_t score = 0;
@@ -72,20 +72,20 @@ struct ReadEnds : PhysicalLocation {
     int64_t read2IndexInFile = -1;
     int64_t duplicateSetSize = -1;
 
-    /* ReadEndsForMarkDuplicatesWithBarcodes中的成员变量 (好像用不到) */
+    /* ReadEndsForMarkDuplicatesWithBarcodes () */
     // int32_t barcode = 0; // primary barcode for this read (and pair)
     // int32_t readOneBarcode = 0; // read one barcode, 0 if not present
     // int32_t readTwoBarcode = 0; // read two barcode, 0 if not present or not
     // paired
 
-    /* zzh增加的成员变量 */
-    int64_t posKey = -1;  // 根据位置信息生成的关键字 return (int64_t)tid <<
-                          // MAX_CONTIG_LEN_SHIFT | (int64_t)pos; （包含clip的序列，也就是可能比map结果更偏左）
+    /* zzh */
+    int64_t posKey = -1;  //  return (int64_t)tid <<
+                          // MAX_CONTIG_LEN_SHIFT | (int64_t)pos; （clip，map）
 
-    /* 用来做一些判断，因为一些readends会做多次操作，比如task之间有重叠等等 */
+    /* ，readends，task */
     int oprateTime = 0;
 
-    /* 根据pairend read的比对方向，来确定整体的比对方向 */
+    /* pairend read， */
     static int8_t GetOrientationByte(bool read1NegativeStrand, bool read2NegativeStrand) {
         if (read1NegativeStrand) {
             if (read2NegativeStrand)
@@ -100,7 +100,7 @@ struct ReadEnds : PhysicalLocation {
         }
     }
 
-    /* 比较两个readends是否一样（有个冗余） */
+    /* readends（） */
     static bool AreComparableForDuplicates(const ReadEnds &lhs, const ReadEnds &rhs, bool compareRead2) {
         bool areComparable = true;
         areComparable = lhs.read1ReferenceIndex == rhs.read1ReferenceIndex &&
@@ -112,15 +112,15 @@ struct ReadEnds : PhysicalLocation {
         return areComparable;
     }
 
-    /* 比对方向是否正向 */
+    /*  */
     bool IsPositiveStrand() const { return orientation == F; }
 
-    /* pairend是否合适的比对上了 */
+    /* pairend */
     bool IsPaired() const { return read2ReferenceIndex != -1; }
 
     bool IsNegativeStrand() const { return orientation == R; }
 
-    // 对于相交的数据进行比对，a是否小于b，根据AreComparableForDuplicates函数得来
+    // ，ab，AreComparableForDuplicates
     static inline bool ReadLittleThan(const ReadEnds &a, const ReadEnds &b, bool compareRead2 = false) {
         int comp = a.read1ReferenceIndex - b.read1ReferenceIndex;
         if (comp == 0)
@@ -141,7 +141,7 @@ struct ReadEnds : PhysicalLocation {
         int comp = a.read1ReferenceIndex - b.read1ReferenceIndex;
         if (comp == 0)
             comp = a.read1Coordinate - b.read1Coordinate;
-        if (comp == 0)  // 这个放在坐标比较之前，因为在解析bam的时候，可能有的给read2ReferenceIndex赋值了,有的则没赋值
+        if (comp == 0)  // ，bam，read2ReferenceIndex,
             comp = a.orientation - b.orientation;
         if (comp == 0)
             comp = a.read2ReferenceIndex - b.read2ReferenceIndex;
@@ -150,7 +150,7 @@ struct ReadEnds : PhysicalLocation {
 //        if (comp == 0)
 //            comp = a.tile - b.tile;
 //        if (comp == 0)
-//            comp = a.x - b.x; // 由于picard的bug，用short类型来表示x，y，导致其可能为负数
+//            comp = a.x - b.x; // picardbug，shortx，y，
 //        if (comp == 0)
 //            comp - a.y - b.y;
         if (comp == 0)
@@ -168,12 +168,12 @@ struct ReadEnds : PhysicalLocation {
             comp = a.read2ReferenceIndex - b.read2ReferenceIndex;
         if (comp == 0)
             comp = a.read2Coordinate - b.read2Coordinate;
-        if (comp == 0)  // 这个放在坐标比较了之后，把坐标范围的放在之前，这样对分段数据块比较好处理
+        if (comp == 0)  // ，，
             comp = a.orientation - b.orientation;
 //        if (comp == 0)
 //            comp = a.tile - b.tile;
 //        if (comp == 0)
-//            comp = a.x - b.x; // 由于picard的bug，用short类型来表示x，y，导致其可能为负数
+//            comp = a.x - b.x; // picardbug，shortx，y，
 //        if (comp == 0)
 //            comp - a.y - b.y;
         if (comp == 0)
@@ -191,7 +191,7 @@ struct ReadEnds : PhysicalLocation {
             comp = a.read2ReferenceIndex - b.read2ReferenceIndex;
         if (comp == 0)
             comp = a.read2Coordinate - b.read2Coordinate;
-        if (comp == 0)  // 这个放在坐标比较了之后，把坐标范围的放在之前，这样对分段数据块比较好处理
+        if (comp == 0)  // ，，
             comp = a.orientation - b.orientation;
         return comp < 0;
     }
diff --git a/src/markdup/read_name_parser.h b/src/markdup/read_name_parser.h
index 1746d4b..817bb31 100644
--- a/src/markdup/read_name_parser.h
+++ b/src/markdup/read_name_parser.h
@@ -1,5 +1,5 @@
 /*
-Description: 解析read的name中的信息，比如tile, x, y等
+Description: readname，tile, x, y
 
 Copyright : All right reserved by ICT
 
@@ -28,7 +28,7 @@ using std::string;
  * Provides access to the physical location information about a cluster.
  * All values should be defaulted to -1 if unavailable.  ReadGroup and Tile
  * should only allow non-zero positive integers, x and y coordinates may be
- * negative. 非线程安全
+ * negative. 
  */
 struct ReadNameParser {
     /**
@@ -72,7 +72,7 @@ struct ReadNameParser {
         warnAboutRegexNotMatching = isWarn;
     }
 
-    /* 重新设置readNameRegex */
+    /* readNameRegex */
     void SetReadNameRegex(const string &strReadNameRegex) {
         readNameRegex = strReadNameRegex;
         if (strReadNameRegex == DEFAULT_READ_NAME_REGEX)
@@ -83,7 +83,7 @@ struct ReadNameParser {
         // readNamePattern = strReadNameRegex;
     }
 
-    /* 添加测序时候的tile x y 信息 */
+    /* tile x y  */
     bool AddLocationInformation(const string &readName, PhysicalLocation *loc) {
         if (!(readName == readNameStored)) {
             if (ReadLocationInformation(readName, loc)) {
diff --git a/src/util/bam_buf.cpp b/src/util/bam_buf.cpp
index 0473772..8f88976 100644
--- a/src/util/bam_buf.cpp
+++ b/src/util/bam_buf.cpp
@@ -1,5 +1,5 @@
 /*
-     Description: 读入sam/bam时，开辟一个大的buf，存放这些数据
+     Description: sam/bam，buf，
 
      Copyright : All right reserved by ICT
 
@@ -10,25 +10,25 @@
 #include "bam_buf.h"
 
 /*
- * BamBuf类
+ * BamBuf
  */
-// 读取数据直到读完，或者缓冲区满
+// ，
 int BamBuf::ReadBam() {
     int read_num = 0;
-    if (handle_last) {             // 处理上次读入的最后一个bam
-        if (has_enough_space()) {  // 必须调用，在边界处调整memffset
+    if (handle_last) {             // bam
+        if (has_enough_space()) {  // ，memffset
             ++read_num;
             append_one_bam();
         } else {
-            return read_num;  // 还是没空间
+            return read_num;  // 
         }
     }
     while (read_stat_ >= 0 && (read_stat_ = sam_read1(fp, hdr, bw->b)) >= 0) {
         bw->end_pos_ = BamWrap::BamEndPos(bw->b);
-        if (has_enough_space()) {  // 还有空间
-                                   // if (true) {  // 还有空间
+        if (has_enough_space()) {  // 
+                                   // if (true) {  // 
             append_one_bam();
-            ++read_num;  // 放进缓存才算读取到
+            ++read_num;  // 
         } else {
             break;
         }
@@ -41,7 +41,7 @@ int BamBuf::ReadBam() {
     return read_num;
 }
 
-// 初始化缓存
+// 
 void BamBuf::Init(samFile *fp, sam_hdr_t *hdr, int64_t mem_size) {
     this->fp = fp;
     this->hdr = hdr;
@@ -71,9 +71,9 @@ void BamBuf::ClearAll() {
     prepare_read();
 }
 
-// 为下一次读取做准备, 计算一些边界条件
+// , 
 inline void BamBuf::prepare_read() {
-    // 计算余留的下次计算可能用到的bam所占的位置
+    // bam
     if (bv.size() > 0) {
         BamWrap *bw = bv[0];
         legacy_start = (int64_t)bw - (int64_t)mem;
@@ -81,11 +81,11 @@ inline void BamBuf::prepare_read() {
         legacy_end = (int64_t)bw + bw->length() - (int64_t)mem;
     } else {
         legacy_start = legacy_end = 0;
-        mem_offset = 0;  // 上次没剩下，那就从头存储
+        mem_offset = 0;  // ，
     }
 }
 
-// 检查缓存是否还有空间
+// 
 inline bool BamBuf::has_enough_space() {
     const uint32_t bam_len = bw->length();
     int64_t potential_end = mem_offset + bam_len;
@@ -104,7 +104,7 @@ inline bool BamBuf::has_enough_space() {
     return potential_end < legacy_start;
 }
 
-// 处理一个读取后的bam
+// bam
 inline void BamBuf::append_one_bam() {
     BamWrap *bwp = (BamWrap *)(mem + mem_offset);
     *bwp = *bw;
@@ -113,15 +113,15 @@ inline void BamBuf::append_one_bam() {
     *bp = *bw->b;
     bp->data = (uint8_t *)((char *)bwp->b + sizeof(bam1_t));
     memcpy(bp->data, bw->b->data, bw->b->l_data);
-    // 更新下次存储的位置
+    // 
     mem_offset = (mem_offset + bw->length() + 8 - 1) & ~((size_t)(8 - 1));
     bv.push_back(bwp);
 }
 
-// 处理上次读入的最后一个read
+// read
 inline bool BamBuf::handle_last_read() {
-    if (handle_last) {             // 处理上次读入的最后一个bam
-        if (has_enough_space()) {  // 必须调用，在边界处调整memffset
+    if (handle_last) {             // bam
+        if (has_enough_space()) {  // ，memffset
             append_one_bam();
             handle_last = false;
             return true;
@@ -131,9 +131,9 @@ inline bool BamBuf::handle_last_read() {
 }
 
 /*
- * AsyncIoBamBuf 类
+ * AsyncIoBamBuf 
  */
-// 初始化缓存
+// 
 void AsyncIoBamBuf::Init(samFile *fp, sam_hdr_t *hdr, int64_t mem_size) {
     if (use_async_io_) {
         buf1_.Init(fp, hdr, mem_size >> 1);
@@ -147,7 +147,7 @@ void AsyncIoBamBuf::Init(samFile *fp, sam_hdr_t *hdr, int64_t mem_size) {
     }
 }
 
-// 读取数据
+// 
 int AsyncIoBamBuf::ReadBam() {
     if (use_async_io_) {
         hasThread = true;
@@ -178,11 +178,11 @@ int AsyncIoBamBuf::async_read_bam() {
         first_read_ = false;
         refresh_bam_arr();
     } else {
-        // join, 交换缓冲区指针
+        // join, 
         pthread_join(*tid_, 0);
         resize_buf();
 
-        if (need_read_) {  // 需要交换指针
+        if (need_read_) {  // 
             BamBuf *tmp = pi_;
             pi_ = po_;
             po_ = tmp;
@@ -190,14 +190,14 @@ int AsyncIoBamBuf::async_read_bam() {
         read_num = last_read_num_;
         refresh_bam_arr();
     }
-    // 异步读
+    // 
     pthread_create(tid_, 0, async_read, this);
     return read_num;
 }
 
 void *AsyncIoBamBuf::async_read(void *data) {
     AsyncIoBamBuf *ab = (AsyncIoBamBuf *)data;
-    if (ab->need_read_ && ab->ReadStat() >= 0) {  // 需要读取
+    if (ab->need_read_ && ab->ReadStat() >= 0) {  // 
         ab->last_read_num_ = ab->po_->ReadBam();
     } else {
         ab->last_read_num_ = 0;
@@ -205,23 +205,23 @@ void *AsyncIoBamBuf::async_read(void *data) {
     pthread_exit(0);
 }
 
-// 为下一次读取做准备,
-// 计算一些边界条件，延迟操作，因为此时可能po_对应的buf正在读取
+// ,
+// ，，po_buf
 void AsyncIoBamBuf::ClearBeforeIdx(size_t idxInBv) { clear_before_idx_ = idxInBv; }
 
-// 清空上一次所有读入的数据，延迟操作，因为此时可能po_对应的buf正在读取
+// ，，po_buf
 void AsyncIoBamBuf::ClearAll() { clear_all_ = true; }
 
 inline void AsyncIoBamBuf::resize_buf() {
-    if (clear_all_) {  // 清理上一轮的数据
+    if (clear_all_) {  // 
         clear_all_ = false;
         po_->ClearBeforeIdx(legacy_size_);
         pi_->ClearAll();
-        if (pi_->handle_last_read()) {  // 上次读取有一个read没放入缓存
+        if (pi_->handle_last_read()) {  // read
             last_read_num_ += 1;
-            legacy_size_ = pi_->Size();  // 应该只有一个read
+            legacy_size_ = pi_->Size();  // read
             need_read_ = true;
-        } else {  // 没空间存放，则不交换指针，或者文件已经读取完毕
+        } else {  // ，，
             legacy_size_ = 0;
             need_read_ = false;
         }
@@ -229,16 +229,16 @@ inline void AsyncIoBamBuf::resize_buf() {
         if (clear_before_idx_ < legacy_size_) {
             po_->ClearBeforeIdx(clear_before_idx_);
             legacy_size_ -= clear_before_idx_;
-            // 不需要交换指针，不需要读取
+            // ，
             need_read_ = false;
         } else {
             po_->ClearBeforeIdx(legacy_size_);
             pi_->ClearBeforeIdx(clear_before_idx_ - legacy_size_);
-            if (pi_->handle_last_read()) {  // 上次读取有一个read没放入缓存
+            if (pi_->handle_last_read()) {  // read
                 last_read_num_ += 1;
-                legacy_size_ = pi_->Size();  // 应该只有一个read
+                legacy_size_ = pi_->Size();  // read
                 need_read_ = true;
-            } else {  // 没空间存放，则不交换指针，或者文件已经读取完毕
+            } else {  // ，，
                 legacy_size_ = 0;
                 need_read_ = false;
             }
diff --git a/src/util/bam_buf.h b/src/util/bam_buf.h
index ee6a3c8..d0bb7e7 100644
--- a/src/util/bam_buf.h
+++ b/src/util/bam_buf.h
@@ -1,5 +1,5 @@
 /*
-     Description: 读入sam/bam时，开辟一个大的buf，存放这些数据
+     Description: sam/bam，buf，
 
      Copyright : All right reserved by ICT
 
@@ -27,32 +27,32 @@ using std::vector;
 using namespace std;
 
 /*
- * 存放读入的bam数据
+ * bam
  */
 struct BamBuf {
-    sam_hdr_t *hdr;            // sam文件的header信息
-    samFile *fp;               // sam文件指针
-    BamWrap *bw = nullptr;     // 用来循环读入bam
-    uint8_t *mem = nullptr;    // 用来存放bam的数据,
-                               // 程序结束后自动释放，所以没在析构函数里释放
-    int64_t mem_offset = 0;    // 下一次要存放的位置
-    int64_t mem_size;          // 缓存大小
-    int read_stat_ = 0;        // 读取状态，是否读完
-    vector<BamWrap *> bv;      // 方便对bam数据的访问
-    int64_t legacy_start = 0;  // 没处理完的bam在mem中的起始位置, 闭区间
-    int64_t legacy_end = 0;    // 没处理完的bam在mem中的结束位置, 开区间
-    bool handle_last = false;  // 上次最后读入的bam是否需要处理
+    sam_hdr_t *hdr;            // samheader
+    samFile *fp;               // sam
+    BamWrap *bw = nullptr;     // bam
+    uint8_t *mem = nullptr;    // bam,
+                               // ，
+    int64_t mem_offset = 0;    // 
+    int64_t mem_size;          // 
+    int read_stat_ = 0;        // ，
+    vector<BamWrap *> bv;      // bam
+    int64_t legacy_start = 0;  // bammem, 
+    int64_t legacy_end = 0;    // bammem, 
+    bool handle_last = false;  // bam
 
-    // 初始化缓存
+    // 
     void Init(samFile *fp, sam_hdr_t *hdr, int64_t mem_size);
-    // 读取数据直到读完，或者缓冲区满
+    // ，
     int ReadBam();
-    // 为下一次读取做准备, 计算一些边界条件
+    // , 
     void ClearBeforeIdx(size_t idxInBv);
-    // 清空上一次所有读入的数据
+    // 
     void ClearAll();
-    inline int64_t Size() { return bv.size(); }  // 包含多少个read
-    inline int ReadStat() { return read_stat_; }  // 文件的读取状态，是否可读（读取完全）
+    inline int64_t Size() { return bv.size(); }  // read
+    inline int ReadStat() { return read_stat_; }  // ，（）
     ~BamBuf() {
         if (this->mem != nullptr) {
             free(this->mem);
@@ -62,7 +62,7 @@ struct BamBuf {
             free(bw);
         }
     }
-    void FreeMemory()  // 释放开辟的内存
+    void FreeMemory()  // 
     {
         if (this->mem != nullptr) {
             free(this->mem);
@@ -75,14 +75,14 @@ struct BamBuf {
         this->bw = nullptr;
     }
     void prepare_read();
-    // 检查缓存是否还有空间
+    // 
     bool has_enough_space();
-    // 处理一个读取后的bam
+    // bam
     void append_one_bam();
-    // 处理上次读入的最后一个read
+    // read
     bool handle_last_read();
 
-    // 针对bv的操作
+    // bv
     inline BamWrap *operator[](int64_t pos) { return bv[pos]; }
     inline void push_back(BamWrap *val) { bv.push_back(val); }
     inline void clear() { bv.clear(); }
@@ -90,53 +90,53 @@ struct BamBuf {
 };
 
 /*
- * io异步缓冲区
+ * io
  */
 struct AsyncIoBamBuf {
     BamBuf buf1_;
     BamBuf buf2_;
-    BamBuf *pi_;  // 当前用的buf
-    BamBuf *po_;  // 后台在读取的buf
+    BamBuf *pi_;  // buf
+    BamBuf *po_;  // buf
     pthread_t *tid_ = NULL;
     bool hasThread = false;
-    int64_t legacy_size_ = 0;  // 上一轮运算之后，缓存中还剩余的上次读取的read数量
+    int64_t legacy_size_ = 0;  // ，read
     bool first_read_ = true;
-    int last_read_num_ = 0;  // 上一次读取了多少reads
+    int last_read_num_ = 0;  // reads
     bool need_read_ = true;
     bool use_async_io_ = true;
-    int64_t clear_before_idx_ = 0;  // 用户异步读取，下一轮读取之前清理掉clear_before_idx_之前的所有reads
-    bool clear_all_ = false;        // 用于异步读取，下一轮读取之前清理掉之前的所有reads
+    int64_t clear_before_idx_ = 0;  // ，clear_before_idx_reads
+    bool clear_all_ = false;        // ，reads
 
-    vector<BamWrap *> bam_arr_;  // 用来访问buf中的bam
+    vector<BamWrap *> bam_arr_;  // bufbam
 
     AsyncIoBamBuf() {}
     AsyncIoBamBuf(bool use_async) : use_async_io_(use_async) {}
-    // 析构
+    // 
     ~AsyncIoBamBuf() {
         if (tid_ != NULL) {
             if (hasThread)
                 pthread_join(*tid_, 0);
             free(tid_);
         }
-        // 其他的内存就等程序结束自动释放
-        // buf的析构函数会自动调用
+        // 
+        // buf
     }
 
-    // 初始化缓存
+    // 
     void Init(samFile *fp, sam_hdr_t *hdr, int64_t mem_size);
 
-    // 读取数据
+    // 
     int ReadBam();
-    // 为下一次读取做准备, 计算一些边界条件
+    // , 
     void ClearBeforeIdx(size_t idxInBv);
-    vector<BamWrap *> &GetBamArr() { return bam_arr_; }  // 获取bam array
-    // 清空上一次所有读入的数据
+    vector<BamWrap *> &GetBamArr() { return bam_arr_; }  // bam array
+    // 
     void ClearAll();
-    // 包含的read数量
+    // read
     inline int64_t Size() { return legacy_size_ + pi_->Size(); }
     inline int ReadStat() { return pi_->read_stat_; }
     inline BamWrap *operator[](int64_t pos) { return bam_arr_[pos]; }
-    // 获取某一段reads
+    // reads
     inline vector<BamWrap *> Slice(size_t startIdx, size_t endIdx) {
         if (endIdx > startIdx) {
             auto begItr = bam_arr_.begin();
@@ -149,11 +149,11 @@ struct AsyncIoBamBuf {
         buf2_.FreeMemory();
     }
 
-    // 同步读取
+    // 
     int sync_read_bam();
-    // 异步读取
+    // 
     int async_read_bam();
-    // 异步读取线程函数
+    // 
     static void *async_read(void *data);
     void resize_buf();
     inline void refresh_bam_arr() {
diff --git a/src/util/bam_wrap.h b/src/util/bam_wrap.h
index 3e5a20f..adeed8d 100644
--- a/src/util/bam_wrap.h
+++ b/src/util/bam_wrap.h
@@ -1,5 +1,5 @@
 /*
-     Description: 读入sam/bam时，开辟一个大的buf，存放这些数据
+     Description: sam/bam，buf，
 
      Copyright : All right reserved by ICT
 
@@ -20,37 +20,37 @@
 using namespace std;
 
 /*
-    这里的成员函数命名有点混乱，特此说明，小写加下划线的函数命名，无论是静态函数，还是普通成员函数，更侧重说明
-    这是类似bam的一个属性，而大写加驼峰命名的函数，更侧重说明这是通过计算得出的。
+    ，，，，，
+    bam，，。
 */
 /*
- * sam read的封装
+ * sam read
  */
 struct BamWrap {
-    // 将contig左移后加上pos作为全局位置
-    const static int MAX_CONTIG_LEN_SHIFT = 40;  // 将染色体id左移多少位，和位点拼合在一起
+    // contigpos
+    const static int MAX_CONTIG_LEN_SHIFT = 40;  // id，
     const static int READ_MAX_LENGTH = 200;
-    const static int READ_MAX_DEPTH = 1000;  // 这只是用来初始化空间用的，深度大于这个值也没关系
+    const static int READ_MAX_DEPTH = 1000;  // ，
 
-    // 成员变量尽量少，减少占用内存空间
+    // ，
     bam1_t *b;
-    int64_t end_pos_;  // bam的全局结束位置, 闭区间
+    int64_t end_pos_;  // bam, 
 
-    // 全局开始位置
+    // 
     inline int64_t start_pos() { return bam_global_pos(b); }
-    // 全局结束位置
+    // 
     inline int64_t end_pos() { return end_pos_; }
-    // 和reference对应的序列长度
+    // reference
     inline int16_t read_len() { return (end_pos_ - start_pos() + 1); }
 
-    // 在contig内的开始位置
+    // contig
     inline int32_t contig_pos() { return b->core.pos; }
-    // 在contig内部的结束位置
+    // contig
     inline int32_t contig_end_pos() { return bam_pos(end_pos_); }
-    // 序列的长度（AGTC字母个数）
+    // （AGTC）
     inline int16_t seq_len() { return b->core.l_qseq; }
 
-    // 算上开头的softclip
+    // softclip
     inline int32_t softclip_start() {
         const uint32_t *cigar = bam_get_cigar(b);
         const bam1_core_t &bc = b->core;
@@ -61,7 +61,7 @@ struct BamWrap {
         return bc.pos;
     }
 
-    // 算上结尾的softclip
+    // softclip
     inline int32_t softclip_end() {
         const uint32_t *cigar = bam_get_cigar(b);
         const bam1_core_t &bc = b->core;
@@ -72,7 +72,7 @@ struct BamWrap {
         return bam_pos(end_pos_);
     }
 
-    // 算上结尾的softclip
+    // softclip
     inline int32_t right_softclip_len() {
         const uint32_t *cigar = bam_get_cigar(b);
         const bam1_core_t &bc = b->core;
@@ -83,7 +83,7 @@ struct BamWrap {
         return 0;
     }
 
-    // 获取序列
+    // 
     inline std::string sequence() {
         ostringstream oss;
         char *seq = (char *)bam_get_seq(b);
@@ -96,9 +96,9 @@ struct BamWrap {
         return std::move(oss.str());
     }
 
-    // 获取名字
+    // 
     inline const char *query_name() { return bam_get_qname(b); }
-    // 获取cigar 字符串
+    // cigar 
     inline string cigar_str() {
         ostringstream oss;
         const uint32_t *cigar = bam_get_cigar(b);
@@ -111,10 +111,10 @@ struct BamWrap {
         return std::move(oss.str());
     }
 
-    // 占用的内存大小
+    // 
     inline int16_t length() { return sizeof(*this) + sizeof(bam1_t) + b->l_data; }
 
-    // 获取cigar中insert的总长度
+    // cigarinsert
     inline int32_t insert_cigar_len() {
         const uint32_t *cigar = bam_get_cigar(b);
         const bam1_core_t &bc = b->core;
@@ -128,7 +128,7 @@ struct BamWrap {
         return ret;
     }
 
-    // 获取cigar中delete的总长度
+    // cigardelete
     inline int32_t del_cigar_len() {
         const uint32_t *cigar = bam_get_cigar(b);
         const bam1_core_t &bc = b->core;
@@ -142,7 +142,7 @@ struct BamWrap {
         return ret;
     }
 
-    // 计算sam read的终点位置
+    // sam read
     static inline int64_t BamEndPos(const bam1_t *b) {
         const uint32_t *cigar = bam_get_cigar(b);
         const bam1_core_t &bc = b->core;
@@ -170,7 +170,7 @@ struct BamWrap {
         return hasWellDefinedFragmentSize;
     }
 
-    // 计算bam的adapterBoundary
+    // bamadapterBoundary
     int GetAdapterBoundary() {
         const bam1_core_t &bc = b->core;
         int adapterBoundary;
@@ -179,11 +179,11 @@ struct BamWrap {
         else if (bc.flag & BAM_FREVERSE)
             adapterBoundary = bc.mpos - 1;
         else
-            adapterBoundary = bc.pos + abs(bc.isize);  // GATK4.0 和 GATK3.5不一样，3.5的这里+1
+            adapterBoundary = bc.pos + abs(bc.isize);  // GATK4.0  GATK3.5，3.5+1
         return adapterBoundary;
     }
 
-    // 获取开头的I的长度
+    // I
     inline int GetHeadInsertLen() {
         int insLen = 0;
         const uint32_t *cigar = bam_get_cigar(b);
@@ -200,8 +200,8 @@ struct BamWrap {
         return insLen;
     }
 
-    // 获取soft clip开始位置(能处理H和S相连的情况，有这种情况么？,
-    // 注意开头的I要当做S？)
+    // soft clip(HS，？,
+    // IS？)
     inline int64_t GetSoftStart() {
         int64_t softStart = b->core.pos;
         const uint32_t *cigar = bam_get_cigar(b);
@@ -217,7 +217,7 @@ struct BamWrap {
         return softStart;
     }
 
-    // 获取unclipped开始位置(包括hardclip)
+    // unclipped(hardclip)
     inline int64_t GetUnclippedStart() {
         int64_t start = b->core.pos;
         const uint32_t *cigar = bam_get_cigar(b);
@@ -233,7 +233,7 @@ struct BamWrap {
         return start;
     }
 
-    // 获取unclipped结束位置(包括hardclip)
+    // unclipped(hardclip)
     inline int64_t GetUnclippedEnd() {
         int64_t end_pos = bam_endpos(b);
         const uint32_t *cigar = bam_get_cigar(b);
@@ -249,7 +249,7 @@ struct BamWrap {
         return end_pos - 1;
     }
 
-    /* 获取碱基质量分数的加和 */
+    /*  */
     /** Calculates a score for the read which is the sum of scores over Q15. */
     inline int GetSumOfBaseQualities() {
         int score = 0;
@@ -262,9 +262,9 @@ struct BamWrap {
         return score;
     }
 
-    /* 与flag相关的检测 */
+    /* flag */
 
-    /* 没有比对上 unmapped */
+    /*  unmapped */
     inline bool GetReadUnmappedFlag() { return b->core.flag & BAM_FUNMAP; }
 
     /* Template having multiple segments in sequencing  */
@@ -313,7 +313,7 @@ struct BamWrap {
      */
     bool GetMateNegativeStrandFlag() { return b->core.flag & BAM_FMREVERSE; }
 
-    /* 其他的一些信息 */
+    /*  */
     inline int GetReferenceLength() {
         int length = 0;
         const uint32_t *cigar = bam_get_cigar(b);
@@ -336,26 +336,26 @@ struct BamWrap {
         return length;
     }
 
-    // 计算bam的全局位置，算上染色体序号和比对位置
+    // bam，
     static inline int64_t bam_global_pos(bam1_t *b) {
         return (((int64_t)b->core.tid << MAX_CONTIG_LEN_SHIFT) | (int64_t)b->core.pos);
     }
     static inline int64_t bam_global_pos(int tid, int pos) {
         return (((int64_t)tid << MAX_CONTIG_LEN_SHIFT) | (int64_t)pos);
     }
-    // 根据全局位置获取bam的染色体序号
+    // bam
     static inline int32_t bam_tid(int64_t global_pos) {
         const int64_t mask = ~(((int64_t)1 << MAX_CONTIG_LEN_SHIFT) - 1);
         const int64_t high_tid = global_pos & mask;
         return (int32_t)(high_tid >> MAX_CONTIG_LEN_SHIFT);
     }
-    // 根据全局位置获取bam的比对位置(染色体内)
+    // bam()
     static inline int32_t bam_pos(int64_t global_pos) {
         const int64_t mask = ((int64_t)1 << MAX_CONTIG_LEN_SHIFT) - 1;
         return (int32_t)(global_pos & mask);
     }
 
-    // 设置是否冗余的标记
+    // 
     void SetDuplicateReadFlag(bool flag) { setFlag(flag, BAM_FDUP); }
 
     void setFlag(bool flag, int bit) {
diff --git a/src/util/murmur3.h b/src/util/murmur3.h
index d404ecc..48c304a 100644
--- a/src/util/murmur3.h
+++ b/src/util/murmur3.h
@@ -1,5 +1,5 @@
 /*
-Description: Murmur哈希
+Description: Murmur
 
 Copyright : All right reserved by ICT