Change some annotations

2025-04-23 14:59:40 +08:00 · 2025-04-23 14:59:40 +08:00 · ab7da8a7d7
parent b0b9efdc66
commit ab7da8a7d7
14 changed files with 375 additions and 375 deletions
--- a/src/markdup/dup_metrics.h
+++ b/src/markdup/dup_metrics.h
@ -11,7 +11,7 @@ using std::string;
 using std::vector;

 /*
- * 标记冗余过程中的一些数据统计
+ * 
 */
 struct DuplicationMetrics {
    /**
@ -57,15 +57,15 @@ struct DuplicationMetrics {
     */
    uint64_t ESTIMATED_LIBRARY_SIZE = 0;

-    // 其他的统计数据
+    // 
    vector<double> CoverageMult;

-    // 比如在该位置，有4个冗余，那么所有4个冗余的位置数量
+    // ，4，4
    MDMap DuplicateCountHist;
    MDMap NonOpticalDuplicateCountHist;
    MDMap OpticalDuplicatesCountHist;

-    // 没有冗余的位置总数量 for test
+    //  for test
    MDSet<int64_t> singletonReads;
    MDSet<int64_t> dupReads;  // for test
    MDSet<int64_t> bestReads;
--- a/src/markdup/markdup.cpp
+++ b/src/markdup/markdup.cpp
@ -1,6 +1,6 @@
 /*
 Description:
-标记bam文件中的冗余信息，只处理按照坐标排序后的bam，且bam为单一样本数据
+bam，bam，bam

 Copyright : All right reserved by ICT

@ -26,26 +26,26 @@ Date : 2023/10/23

 namespace nsgv {

-MarkDupsArg gMdArg;                   // 参数
-std::vector<ReadNameParser> gNameParsers;  // 每个线程一个read name parser
-samFile *gInBamFp;                    // 输入的bam文件
-sam_hdr_t *gInBamHeader;              // 输入的bam文件头信息
-samFile *gOutBamFp;                   // 输出文件, sam或者bam格式
-sam_hdr_t *gOutBamHeader;             // 输出文件的header
-DuplicationMetrics gMetrics;          // 统计信息
+MarkDupsArg gMdArg;                   // 
+std::vector<ReadNameParser> gNameParsers;  // read name parser
+samFile *gInBamFp;                    // bam
+sam_hdr_t *gInBamHeader;              // bam
+samFile *gOutBamFp;                   // , sambam
+sam_hdr_t *gOutBamHeader;             // header
+DuplicationMetrics gMetrics;          // 
 DupResult gDupRes;
 PipelineArg gPipe(&gDupRes);
 };  // namespace nsgv

-// 字节缓冲区
+// 
 struct ByteBuf {
    uint8_t *buf = nullptr;
-    int size = 0;      // 当前使用量
-    int capacity = 0;  // 最大容量
+    int size = 0;      // 
+    int capacity = 0;  // 
 };

 /*
- * 获取文件名后缀
+ * 
 */
 static string getFileExtension(const string &filename) {
    auto last_dot = filename.find_last_of('.');
@ -58,25 +58,25 @@ static string getFileExtension(const string &filename) {
 // entrance of mark duplicates
 int MarkDuplicates() {
    PROF_START(whole_process);
-    /* 初始化一些参数和变量*/
+    /* */
    nsgv::gNameParsers.resize(nsgv::gMdArg.NUM_THREADS);
    for (auto &parser : nsgv::gNameParsers)
-        parser.SetReadNameRegex(nsgv::gMdArg.READ_NAME_REGEX);  // 用来解析read name中的tile，x，y信息
+        parser.SetReadNameRegex(nsgv::gMdArg.READ_NAME_REGEX);  // read nametile，x，y

-    /* 打开输入bam文件 */
+    /* bam */
    nsgv::gInBamFp = sam_open_format(nsgv::gMdArg.INPUT_FILE.c_str(), "r", nullptr);
    if (!nsgv::gInBamFp) {
        spdlog::error("[{}] load sam/bam file failed.\n", __func__);
        return -1;
    }
    hts_set_opt(nsgv::gInBamFp, HTS_OPT_BLOCK_SIZE, BAM_BLOCK_SIZE);
-    nsgv::gInBamHeader = sam_hdr_read(nsgv::gInBamFp);  // 读取header
-    // 获取样本名称(libraryId)
+    nsgv::gInBamHeader = sam_hdr_read(nsgv::gInBamFp);  // header
+    // (libraryId)
    nsgv::gMetrics.LIBRARY = sam_hdr_line_name(nsgv::gInBamHeader, "RG", 0);

-    /* 利用线程池对输入输出文件进行读写 */
-    htsThreadPool htsPoolRead = {NULL, 0};   // 多线程读取，创建线程池
-    htsThreadPool htsPoolWrite = {NULL, 0};  // 读写用不同的线程池
+    /*  */
+    htsThreadPool htsPoolRead = {NULL, 0};   // ，
+    htsThreadPool htsPoolWrite = {NULL, 0};  // 
    htsPoolRead.pool = hts_tpool_init(nsgv::gMdArg.NUM_THREADS);
    htsPoolWrite.pool = hts_tpool_init(nsgv::gMdArg.NUM_THREADS);
    if (!htsPoolRead.pool || !htsPoolWrite.pool) {
@ -86,12 +86,12 @@ int MarkDuplicates() {
    }
    hts_set_opt(nsgv::gInBamFp, HTS_OPT_THREAD_POOL, &htsPoolRead);

-    /* 冗余检查和标记 */
+    /*  */
    PROF_START(markdup_all);
    PipelineMarkDups();
    PROF_END(gprof[GP_markdup_all], markdup_all);

-    /* 标记冗余, 将处理后的结果写入文件 */
+    /* ,  */
    char modeout[12] = "wb";
    sam_open_mode(modeout + 1, nsgv::gMdArg.OUTPUT_FILE.c_str(), NULL);
    nsgv::gOutBamFp = sam_open(nsgv::gMdArg.OUTPUT_FILE.c_str(), modeout);
@ -100,14 +100,14 @@ int MarkDuplicates() {
        return -1;
    }
    nsgv::gOutBamHeader = sam_hdr_dup(nsgv::gInBamHeader);
-    // 修改输出文件的header
+    // header
    sam_hdr_add_line(nsgv::gOutBamHeader, "PG", "ID", nsgv::gMdArg.PROGRAM_RECORD_ID.c_str(), "VN", FASTDUP_VERSION,
                     "CL", nsgv::gMdArg.CLI_STR.c_str(), NULL);

-    // 用同样的线程数量处理输出文件
+    // 
    hts_set_opt(nsgv::gOutBamFp, HTS_OPT_BLOCK_SIZE, BAM_BLOCK_SIZE);
    hts_set_opt(nsgv::gOutBamFp, HTS_OPT_THREAD_POOL, &htsPoolWrite);
-    sam_close(nsgv::gInBamFp);  // 重新打开bam文件
+    sam_close(nsgv::gInBamFp);  // bam
    nsgv::gInBamFp = sam_open_format(nsgv::gMdArg.INPUT_FILE.c_str(), "r", nullptr);
    if (!nsgv::gInBamFp) {
        spdlog::error("[{}] load sam/bam file failed.\n", __func__);
@ -122,8 +122,8 @@ int MarkDuplicates() {
        sam_close(nsgv::gInBamFp);
        return -1;
    }
-    // 输出index文件
-    string indexFn = nsgv::gMdArg.OUTPUT_FILE + ".bai";  // min_shift = 0 是bai格式
+    // index
+    string indexFn = nsgv::gMdArg.OUTPUT_FILE + ".bai";  // min_shift = 0 bai
    if ("sam" == getFileExtension(nsgv::gMdArg.OUTPUT_FILE) || !nsgv::gMdArg.CREATE_INDEX) {
        indexFn = "";
    }
@ -141,7 +141,7 @@ int MarkDuplicates() {
        }
    }

-    // 读取输入文件，标记冗余并输出
+    // ，
    BamBufType inBuf(nsgv::gMdArg.DUPLEX_IO);
    inBuf.Init(nsgv::gInBamFp, nsgv::gInBamHeader, nsgv::gMdArg.MAX_MEM);

@ -190,13 +190,13 @@ int MarkDuplicates() {
            isOpticalDup = false;
            isInDuplicateSet = false;

-            // 删除原来的duplicate tag
+            // duplicate tag
            if (nsgv::gMdArg.CLEAR_DT) {
                uint8_t *oldTagVal = bam_aux_get(bw->b, nsgv::gMdArg.DUPLICATE_TYPE_TAG.c_str());
                if (oldTagVal != NULL) bam_aux_del(bw->b, oldTagVal);
            }

-            // 统计信息
+            // 
            if (bw->GetReadUnmappedFlag()) {
                ++nsgv::gMetrics.UNMAPPED_READS;
            } else if (bw->IsSecondaryOrSupplementary()) {
@ -207,7 +207,7 @@ int MarkDuplicates() {
                ++nsgv::gMetrics.READ_PAIRS_EXAMINED;  // will need to be divided by 2 at the end
            }

-            /* 判断是否冗余 */
+            /*  */
            if (bamIdx == dupIdx) {
                ++realDupSize;  // for test
                isDup = true;
@ -217,7 +217,7 @@ int MarkDuplicates() {
                    duplicateSetSize = dupIdx.dupSet;
                }

-                // 为了防止小内存运行的时候，有重复的dupidx，这时候dup的repIdx和dupSetSize可能会有不同
+                // ，dupidx，duprepIdxdupSetSize
                while ((dupIdx = dupIdxQue.Pop()) == bamIdx);
                if (opticalIdx == bamIdx)
                    isOpticalDup = true;
@ -227,7 +227,7 @@ int MarkDuplicates() {
                        isOpticalDup = true;
                }

-                // 添加冗余标识
+                // 
                bw->SetDuplicateReadFlag(true);

                uint8_t *oldTagVal = bam_aux_get(bw->b, nsgv::gMdArg.DUPLICATE_TYPE_TAG.c_str());
@ -242,7 +242,7 @@ int MarkDuplicates() {
                                   nsgv::gMdArg.DUPLICATE_TYPE_LIBRARY.size() + 1,
                                   (const uint8_t *)nsgv::gMdArg.DUPLICATE_TYPE_LIBRARY.c_str());

-                // 计算统计信息
+                // 
                if (!bw->IsSecondaryOrSupplementary() && !bw->GetReadUnmappedFlag()) {
                    // Update the duplication metrics
                    if (!bw->GetReadPairedFlag() || bw->GetMateUnmappedFlag()) {
@ -282,7 +282,7 @@ int MarkDuplicates() {
                                   (const uint8_t *)&duplicateSetSize);
                }
            }
-            // 每个read都要写到output，只是添加标识，或者删掉这些冗余record
+            // readoutput，，record
            ++bamIdx;
            if (isDup && nsgv::gMdArg.REMOVE_DUPLICATES) {
                continue;
@ -312,7 +312,7 @@ int MarkDuplicates() {
    }
    bam_destroy1(bp);

-    // 计算统计信息
+    // 
    nsgv::gMetrics.READ_PAIRS_EXAMINED /= 2;
    nsgv::gMetrics.READ_PAIR_DUPLICATES /= 2;
    for (auto &arr : nsgv::gDupRes.opticalDupIdxArr) nsgv::gMetrics.READ_PAIR_OPTICAL_DUPLICATES += arr.size();
@ -329,7 +329,7 @@ int MarkDuplicates() {
    }
    calculateRoiHistogram(nsgv::gMetrics);

-    // 将统计信息写到文件里
+    // 
    if (!nsgv::gMdArg.METRICS_FILE.empty()) {
        ofstream ofsM(nsgv::gMdArg.METRICS_FILE);
        ofsM << "## StringHeader" << endl;
@ -364,7 +364,7 @@ int MarkDuplicates() {
        return -1;
    }

-    /* 关闭文件，收尾清理 */
+    /* ， */
    sam_close(nsgv::gOutBamFp);
    sam_close(nsgv::gInBamFp);

--- a/src/markdup/md_args.h
+++ b/src/markdup/md_args.h
@ -1,5 +1,5 @@
 /*
-Description: Markduplicate需要用到的一些参数
+Description: Markduplicate

 Copyright : All right reserved by ICT

@ -40,7 +40,7 @@ enum ValidationStringency {
 */
 enum DuplicateTaggingPolicy { DontTag, OpticalOnly, All };

-/* 排序的方式 */
+/*  */
 enum SortOrder {
    unsorted,
    queryname,
@ -49,14 +49,14 @@ enum SortOrder {
    unknown
 };

-/* 计算reads分数的方式（比那个read得分更高） */
+/* reads（read） */
 enum ScoringStrategy { SUM_OF_BASE_QUALITIES, TOTAL_MAPPED_REFERENCE_LENGTH, RANDOM };

-/* 索引文件的格式 （bai或者csi） */
+/*  （baicsi） */
 enum IndexFormat { BAI, CSI };
 }  // namespace nsmd

-/* markduplicate 需要的参数*/
+/* markduplicate */
 struct MarkDupsArg {
    string INPUT_FILE;  // input bam filename

@ -64,9 +64,9 @@ struct MarkDupsArg {

    int NUM_THREADS = 1;

-    size_t MAX_MEM = ((size_t)1) << 30; //  // 最小1G
+    size_t MAX_MEM = ((size_t)1) << 30; //  // 1G

-    bool DUPLEX_IO = true; // 同时读写
+    bool DUPLEX_IO = true; // 

    /**
     * The optional attribute in SAM/BAM/CRAM files used to store the duplicate type.
@ -156,7 +156,7 @@ struct MarkDupsArg {
              optional = true) */
    string MOLECULAR_IDENTIFIER_TAG = "";

-    /* 继承自 AbstractMarkDuplicatesCommandLineProgram 的参数*/
+    /*  AbstractMarkDuplicatesCommandLineProgram */
    /* "File to write duplication metrics to" */
    string METRICS_FILE;

@ -196,7 +196,7 @@ struct MarkDupsArg {
              optional = true */
    vector<string> COMMENT;

-    /* 继承自 AbstractOpticalDuplicateFinderCommandLineProgram 的参数 */
+    /*  AbstractOpticalDuplicateFinderCommandLineProgram  */

    /* "MarkDuplicates can use the tile and cluster positions to estimate the rate of optical duplication " +
            "in addition to the dominant source of duplication, PCR, to provide a more accurate estimation of library
@ -227,7 +227,7 @@ struct MarkDupsArg {
       completely disable this check, " + "set the value to -1." */
    long MAX_OPTICAL_DUPLICATE_SET_SIZE = DEFAULT_MAX_DUPLICATE_SET_SIZE;

-    /* 继承自 CommandLineProgram 的参数*/
+    /*  CommandLineProgram */

    /* "Whether to suppress job-summary info on System.err.", common = true */
    bool QUIET = false;
@ -256,9 +256,9 @@ struct MarkDupsArg {
    /* Add PG tag to each read in a SAM or BAM (PGTagArgumentCollection)*/
    bool ADD_PG_TAG_TO_READS = true;

-    // 命令行字符串
+    // 
    string CLI_STR;

-    // 开始运行时间
+    // 
    string START_TIME;
 };
--- a/src/markdup/md_funcs.cpp
+++ b/src/markdup/md_funcs.cpp
@ -30,7 +30,7 @@ extern DuplicationMetrics gMetrics;
 };

 /*
- * 计算read的分数
+ * read
 */
 int16_t computeDuplicateScore(BamWrap &bw) {
    int16_t score = 0;
@ -71,7 +71,7 @@ int16_t computeDuplicateScore(BamWrap &bw) {

 /*
 * Builds a read ends object that represents a single read.
- * 用来表示一个read的特征结构
+ * read
 */
 void buildReadEnds(BamWrap &bw, int64_t index, ReadNameParser &rnParser, ReadEnds *pKey) {
    auto &k = *pKey;
@ -93,11 +93,11 @@ void buildReadEnds(BamWrap &bw, int64_t index, ReadNameParser &rnParser, ReadEnd
    else
        nsgv::gMdArg.CHECK_OPTICAL_DUP = false;
    // cout << k.tile << ' ' << k.x << ' ' << k.y << endl;
-    // 计算位置key
+    // key
    k.posKey = BamWrap::bam_global_pos(k.read1ReferenceIndex, k.read1Coordinate);  // << 1 | k.orientation;
 }

-/* 对找到的pairend read end添加一些信息 */
+/* pairend read end */
 void modifyPairedEnds(const ReadEnds &fragEnd, ReadEnds *pPairedEnds) {
    auto &pairedEnds = *pPairedEnds;

@ -202,7 +202,7 @@ static void findOpticalDuplicates(vector<const ReadEnds *> &readEndsArr, const R
            if (currentLoc == pBestRe)
                keeperIndex = i;
            if (currentLoc->tile != ReadEnds::NO_VALUE) {
-                int key = currentLoc->tile;  // 只处理一个样本，所以只有一个read group
+                int key = currentLoc->tile;  // ，read group
                tileRGmap[key].push_back(i);
            }
            opticalDistanceRelationGraph.addNode(i);
@ -323,7 +323,7 @@ static int checkOpticalDuplicates(vector<const ReadEnds *> &readEndsArr, const R
 }

 /**
- * 记录光学原因造成的冗余
+ * 
 */
 void trackOpticalDuplicates(vector<const ReadEnds *> &readEndsArr, const ReadEnds *pBestRe) {
    bool hasFR = false, hasRF = false;
@ -358,7 +358,7 @@ void trackOpticalDuplicates(vector<const ReadEnds *> &readEndsArr, const ReadEnd
        nOpticalDup = checkOpticalDuplicates(readEndsArr, pBestRe);
    }

-    // 统计信息，trackDuplicateCounts
+    // ，trackDuplicateCounts
    ++nsgv::gMetrics.DuplicateCountHist[readEndsArr.size()];
    if (readEndsArr.size() > nOpticalDup)
        ++nsgv::gMetrics.NonOpticalDuplicateCountHist[readEndsArr.size() - nOpticalDup];
--- a/src/markdup/md_funcs.h
+++ b/src/markdup/md_funcs.h
@ -14,19 +14,19 @@ using std::unordered_map;
 using std::unordered_set;
 using std::vector;

-/* 前向声明 */
+/*  */
 class BamWrap;
 class ReadEnds;
 class ReadNameParser;

 /*
- * 用来检测optical duplication的graph
+ * optical duplicationgraph
 */
 template <class Node>
-struct Graph {           // 用set？
-    vector<Node> nodes;  // 图中的结点
+struct Graph {           // set？
+    vector<Node> nodes;  // 
    unordered_map<Node, int> nodeIdxMap;
-    unordered_map<int, unordered_set<int>> neighbors;  // 邻接列表
+    unordered_map<int, unordered_set<int>> neighbors;  // 

    int addNode(const Node &singleton) {
        int idx = -1;
@ -98,18 +98,18 @@ struct Graph {           // 用set？
 };

 /*
- * 计算read的分数
+ * read
 */
 int16_t computeDuplicateScore(BamWrap &bw);

 /*
 * Builds a read ends object that represents a single read.
- * 用来表示一个read的特征结构
+ * read
 */
 void buildReadEnds(BamWrap &bw, int64_t index, ReadNameParser &rnParser, ReadEnds *pKey);

 /*
- * 对找到的pairend read end添加一些信息
+ * pairend read end
 */
 void modifyPairedEnds(const ReadEnds &fragEnd, ReadEnds *pPairedEnds);

@ -118,7 +118,7 @@ void modifyPairedEnds(const ReadEnds &fragEnd, ReadEnds *pPairedEnds);
 * in fact optical duplicates, and stores the data in the instance level histogram.
 * Additionally sets the transient isOpticalDuplicate flag on each read end that is
 * identified as an optical duplicate.
- * 记录光学原因造成的冗余
+ * 
 */
 void trackOpticalDuplicates(vector<const ReadEnds *> &readEndsArr, const ReadEnds *pBestRe);

--- a/src/markdup/md_pipeline.cpp
+++ b/src/markdup/md_pipeline.cpp
@ -22,16 +22,16 @@ using namespace std;

 namespace nsgv {

-extern MarkDupsArg gMdArg;           // 用来测试性能
-extern samFile *gInBamFp;            // 输入的bam文件
-extern sam_hdr_t *gInBamHeader;      // 输入的bam文件头信息
-extern DuplicationMetrics gMetrics;  // 统计信息
+extern MarkDupsArg gMdArg;           // 
+extern samFile *gInBamFp;            // bam
+extern sam_hdr_t *gInBamHeader;      // bam
+extern DuplicationMetrics gMetrics;  // 
 extern vector<ReadNameParser> gNameParsers;
 extern DupResult gDupRes;
 extern PipelineArg gPipe;
 };  // namespace nsgv

-/* 处理一组pairend的readends，标记冗余, 这个函数需要串行运行，因为需要做一些统计*/
+/* pairendreadends，, ，*/
 static void markDupsForPairs(vector<const ReadEnds *> &vpRe, DPSet<DupInfo> *dupIdx, MDSet<int64_t> *opticalDupIdx,
                             DPSet<DupInfo> *repIdx, MDSet<int64_t> *notDupIdx = nullptr,
                             MDSet<int64_t> *notOpticalDupIdx = nullptr, MDSet<int64_t> *notRepIdx = nullptr) {
@ -41,7 +41,7 @@ static void markDupsForPairs(vector<const ReadEnds *> &vpRe, DPSet<DupInfo> *dup
    int maxScore = 0;
    const ReadEnds *pBest = nullptr;
    /** All read ends should have orientation FF, FR, RF, or RR **/
-    for (auto pe : vpRe) {  // 找分数最高的readend
+    for (auto pe : vpRe) {  // readend
        if (pe->score > maxScore || pBest == nullptr) {
            maxScore = pe->score;
            pBest = pe;
@ -53,7 +53,7 @@ static void markDupsForPairs(vector<const ReadEnds *> &vpRe, DPSet<DupInfo> *dup
        notDupIdx->insert(pBest->read2IndexInFile);
    }

-    if (nsgv::gMdArg.CHECK_OPTICAL_DUP) {  // 检查光学冗余
+    if (nsgv::gMdArg.CHECK_OPTICAL_DUP) {  // 
        // trackOpticalDuplicates
        vector<const ReadEnds *> prevOpticalRe;
        if (notOpticalDupIdx != nullptr) {
@ -64,7 +64,7 @@ static void markDupsForPairs(vector<const ReadEnds *> &vpRe, DPSet<DupInfo> *dup
            }
        }
        trackOpticalDuplicates(vpRe, pBest);
-        // 由于重叠问题，可能会更新数据
+        // ，
        if (notOpticalDupIdx != nullptr) {
            for (auto pe : prevOpticalRe) {
                if (!pe->isOpticalDuplicate) {
@ -74,13 +74,13 @@ static void markDupsForPairs(vector<const ReadEnds *> &vpRe, DPSet<DupInfo> *dup
            }
        }
    }
-    for (auto pe : vpRe) {  // 对非best read标记冗余
-        if (pe != pBest) {  // 非best
-            dupIdx->insert(DupInfo(pe->read1IndexInFile, pBest->read1IndexInFile, (int16_t)vpRe.size()));  // 添加read1
+    for (auto pe : vpRe) {  // best read
+        if (pe != pBest) {  // best
+            dupIdx->insert(DupInfo(pe->read1IndexInFile, pBest->read1IndexInFile, (int16_t)vpRe.size()));  // read1
            if (pe->read2IndexInFile != pe->read1IndexInFile)
                dupIdx->insert(DupInfo(pe->read2IndexInFile, pBest->read1IndexInFile, (int16_t)vpRe.size()));  // read2,
-            //  注意这里代表是read1的索引
-            //  检查是否optical dup
+            //  read1
+            //  optical dup
            if (pe->isOpticalDuplicate && opticalDupIdx != nullptr) {
                opticalDupIdx->insert(pe->read1IndexInFile);
                if (pe->read2IndexInFile != pe->read1IndexInFile)
@ -88,7 +88,7 @@ static void markDupsForPairs(vector<const ReadEnds *> &vpRe, DPSet<DupInfo> *dup
            }
        }
    }
-    // 在输出的bam文件中添加tag, best作为dupset的代表
+    // bamtag, bestdupset
    if (nsgv::gMdArg.TAG_DUPLICATE_SET_MEMBERS) {
        repIdx->insert(DupInfo(pBest->read1IndexInFile, pBest->read1IndexInFile, (int16_t)vpRe.size()));
        repIdx->insert(DupInfo(pBest->read2IndexInFile, pBest->read1IndexInFile, (int16_t)vpRe.size()));
@ -104,7 +104,7 @@ static void markDupsForPairs(vector<const ReadEnds *> &vpRe, DPSet<DupInfo> *dup
    }
 }

-/* 处理一组非paired的readends，标记冗余 */
+/* pairedreadends， */
 static void markDupsForFrags(vector<const ReadEnds *> &vpRe, bool containsPairs, DPSet<DupInfo> *dupIdx,
                             MDSet<int64_t> *notDupIdx = nullptr) {
    if (containsPairs) {
@ -133,28 +133,28 @@ static void markDupsForFrags(vector<const ReadEnds *> &vpRe, bool containsPairs,
    }
 }

-/* 找到与readend pos相等的所有readend */
+/* readend posreadend */
 static void getEqualRE(const ReadEnds &re, vector<ReadEnds> &src, vector<ReadEnds> *dst) {
-    auto range = std::equal_range(src.begin(), src.end(), re, ReadEnds::CorLittleThan);  // 只比对位点
+    auto range = std::equal_range(src.begin(), src.end(), re, ReadEnds::CorLittleThan);  // 
    dst->insert(dst->end(), range.first, range.second);
 }

 #define LOWER_BOUND(tid, nthread, ndata) ((tid) * (ndata) / (nthread))
 #define UPPER_BOUND(tid, nthread, ndata) ((tid + 1) * (ndata) / (nthread))

-/* 处理pairs */
+/* pairs */
 static void processPairs(vector<ReadEnds> &readEnds, DPSet<DupInfo> *dupIdx, MDSet<int64_t> *opticalDupIdx,
                         DPSet<DupInfo> *repIdx, MDSet<int64_t> *notDupIdx = nullptr,
                         MDSet<int64_t> *notOpticalDupIdx = nullptr, MDSet<int64_t> *notRepIdx = nullptr) {
    // return;
-    vector<const ReadEnds *> vpCache;  // 有可能是冗余的reads
+    vector<const ReadEnds *> vpCache;  // reads
    const ReadEnds *pReadEnd = nullptr;
    for (auto &re : readEnds) {
-        if (pReadEnd != nullptr && ReadEnds::AreComparableForDuplicates(*pReadEnd, re, true))  // 跟前一个一样
-            vpCache.push_back(&re);                                                            // 处理一个潜在的冗余组
+        if (pReadEnd != nullptr && ReadEnds::AreComparableForDuplicates(*pReadEnd, re, true))  // 
+            vpCache.push_back(&re);                                                            // 
        else {
            markDupsForPairs(vpCache, dupIdx, opticalDupIdx, repIdx, notDupIdx, notOpticalDupIdx,
-                             notRepIdx);  // 不一样
+                             notRepIdx);  // 
            vpCache.clear();
            vpCache.push_back(&re);
            pReadEnd = &re;
@ -163,11 +163,11 @@ static void processPairs(vector<ReadEnds> &readEnds, DPSet<DupInfo> *dupIdx, MDS
    markDupsForPairs(vpCache, dupIdx, opticalDupIdx, repIdx, notDupIdx, notOpticalDupIdx, notRepIdx);
 }

-/* 处理frags */
+/* frags */
 static void processFrags(vector<ReadEnds> &readEnds, DPSet<DupInfo> *dupIdx, MDSet<int64_t> *notDupIdx = nullptr) {
    bool containsPairs = false;
    bool containsFrags = false;
-    vector<const ReadEnds *> vpCache;  // 有可能是冗余的reads
+    vector<const ReadEnds *> vpCache;  // reads
    const ReadEnds *pReadEnd = nullptr;
    for (auto &re : readEnds) {
        if (pReadEnd != nullptr && ReadEnds::AreComparableForDuplicates(*pReadEnd, re, false)) {
@ -191,7 +191,7 @@ static void processFrags(vector<ReadEnds> &readEnds, DPSet<DupInfo> *dupIdx, MDS
 }


-/* 获取交叉部分的数据 */
+/*  */
 static inline void getIntersectData(vector<ReadEnds> &leftArr, vector<ReadEnds> &rightArr, vector<ReadEnds> *dst,
                                    bool isPairCmp = false) {
    if (leftArr.empty() || rightArr.empty()) {
@ -204,7 +204,7 @@ static inline void getIntersectData(vector<ReadEnds> &leftArr, vector<ReadEnds>

    while (!ReadEnds::ReadLittleThan(leftArr[leftEndIdx - leftSpan], rightArr[rightStartIdx], isPairCmp)) {
        leftSpan += 1;
-        if (leftSpan > leftEndIdx) {  // 上一个的范围被下一个全部包围了（可能会有bug，上上个也与下一个有交集呢？）
+        if (leftSpan > leftEndIdx) {  // （bug，？）
            leftSpan = leftArr.size() - 1;
            break;
        }
@ -212,7 +212,7 @@ static inline void getIntersectData(vector<ReadEnds> &leftArr, vector<ReadEnds>

    while (!ReadEnds::ReadLittleThan(leftArr[leftEndIdx], rightArr[rightSpan], isPairCmp)) {
        rightSpan += 1;
-        if (rightSpan == rightArr.size() - 1)  // 同上，可能会有bug
+        if (rightSpan == rightArr.size() - 1)  // ，bug
            break;
    }
    dst->insert(dst->end(), leftArr.end() - leftSpan, leftArr.end());
@ -223,7 +223,7 @@ static inline void getIntersectData(vector<ReadEnds> &leftArr, vector<ReadEnds>
        std::sort(dst->begin(), dst->end(), ReadEnds::FragLittleThan);
 }

-/* 将frags重叠部分的dup idx放进数据中 */
+/* fragsdup idx */
 static inline void refreshFragDupIdx(DPSet<DupInfo> &dupIdx, MDSet<int64_t> &notDupIdx, MarkDupDataArg *lastArg,
                                     MarkDupDataArg *curArg) {
    auto &lp = *lastArg;
@ -259,26 +259,26 @@ static void mtGenerateReadEnds(void *data, long idx, int tid) {
    PROF_START(gen);
    size_t start_id = LOWER_BOUND(idx, nThread, bams.size());
    size_t end_id = UPPER_BOUND(idx, nThread, bams.size());
-    for (size_t i = start_id; i < end_id; ++i) {  // 循环处理每个read
+    for (size_t i = start_id; i < end_id; ++i) {  // read
        BamWrap *bw = bams[i];
        const int64_t bamIdx = bamStartIdx + i;
        if (bw->GetReadUnmappedFlag()) {
            if (bw->b->core.tid == -1)
                // When we hit the unmapped reads with no coordinate, no reason to continue (only in coordinate sort).
                break;
-        } else if (!bw->IsSecondaryOrSupplementary()) {  // 是主要比对
+        } else if (!bw->IsSecondaryOrSupplementary()) {  // 
            ReadEnds fragEnd;
            buildReadEnds(*bw, bamIdx, rnParser, &fragEnd);
-            frags.push_back(fragEnd);                                     // 添加进frag集合
-            if (bw->GetReadPairedFlag() && !bw->GetMateUnmappedFlag()) {  // 是pairend而且互补的read也比对上了
+            frags.push_back(fragEnd);                                     // frag
+            if (bw->GetReadPairedFlag() && !bw->GetMateUnmappedFlag()) {  // pairendread
                string key = bw->query_name();
                if (unpairedDic.find(key) == unpairedDic.end()) {
                    unpairedDic[key] = {taskSeq, fragEnd};
-                } else {  // 找到了pairend
+                } else {  // pairend
                    auto &pairedEnds = unpairedDic.at(key).unpairedRE;
                    modifyPairedEnds(fragEnd, &pairedEnds);
                    pairs.push_back(pairedEnds);
-                    unpairedDic.erase(key);  // 删除找到的pairend
+                    unpairedDic.erase(key);  // pairend
                }
            }
        }
@ -302,8 +302,8 @@ static void doGenRE(PipelineArg &pipeArg) {
    const int numThread = pipeArg.numThread;

    kt_for(numThread, mtGenerateReadEnds, &pipeArg, numThread);
-    // 用来在genRE阶段计算一些Sort阶段应该计算的数据，保持每个step用时更平衡
-    // 轮询每个线程中未找到匹配的read，找到匹配的
+    // genRESort，step
+    // read，
    genREData.unpairedDic.clear();
    vector<ReadEnds> &pairs = genREData.pairsArr[numThread];
    pairs.clear();
@ -316,11 +316,11 @@ static void doGenRE(PipelineArg &pipeArg) {
            const ReadEnds &fragEnd = val.second.unpairedRE;
            if (genREData.unpairedDic.find(key) == genREData.unpairedDic.end()) {
                genREData.unpairedDic[key] = {readData.taskSeq, fragEnd};
-            } else {  // 找到了pairend
+            } else {  // pairend
                auto &pairedEnds = genREData.unpairedDic.at(key).unpairedRE;
                modifyPairedEnds(fragEnd, &pairedEnds);
                pairs.push_back(pairedEnds);
-                genREData.unpairedDic.erase(key);  // 删除找到的pairend
+                genREData.unpairedDic.erase(key);  // pairend
            }
        }
    }
@ -367,11 +367,11 @@ static void doMarkDup(PipelineArg &pipeArg) {
    sortData.dataPtr = tmpPtr;

    SortMarkData &smd = *(SortMarkData *)mdData.dataPtr;
-    //  先处理 pair
+    //   pair
    PROF_START(markdup_pair);
    processPairs(smd.pairs, &mdData.pairDupIdx, &mdData.pairOpticalDupIdx, &mdData.pairRepIdx);
    PROF_END(gprof[GP_markdup_pair], markdup_pair);
-    // 再处理frag
+    // frag
    PROF_START(markdup_frag);
    processFrags(smd.frags, &mdData.fragDupIdx);
    PROF_END(gprof[GP_markdup_frag], markdup_frag);
@ -418,7 +418,7 @@ static void refreshMarkDupData(DPSet<DupInfo> &dupIdx, MDSet<int64_t> &opticalDu
    refreshNotDupIdx(repIdx, p.pairRepIdx);
 }

-// 处理相邻数据块之间重叠的部分
+// 
 static void processIntersectFragPairs(MarkDupData &lp, MarkDupData &cp) {
    SortMarkData &lsm = *(SortMarkData *)lp.dataPtr;
    SortMarkData &csm = *(SortMarkData *)cp.dataPtr;
@ -430,7 +430,7 @@ static void processIntersectFragPairs(MarkDupData &lp, MarkDupData &cp) {
    MDSet<int64_t> notOpticalDupIdx;
    MDSet<int64_t> notDupIdx;
    MDSet<int64_t> notRepIdx;
-    // 先处理重叠的frags
+    // frags
    getIntersectData(lsm.frags, csm.frags, &reArr);
    processFrags(reArr, &dupIdx, &notDupIdx);
    refreshDupIdx(dupIdx, lp.fragDupIdx);
@ -438,34 +438,34 @@ static void processIntersectFragPairs(MarkDupData &lp, MarkDupData &cp) {
    refreshNotDupIdx(notDupIdx, lp.fragDupIdx);
    refreshNotDupIdx(notDupIdx, cp.fragDupIdx);

-    // 再处理重叠的pairs
+    // pairs
    reArr.clear();
    dupIdx.clear();
    notDupIdx.clear();

    getIntersectData(lsm.pairs, csm.pairs, &reArr, true);
    processPairs(reArr, &dupIdx, &opticalDupIdx, &repIdx, &notDupIdx, &notOpticalDupIdx, &notRepIdx);
-    refreshMarkDupData(dupIdx, opticalDupIdx, repIdx, notDupIdx, notOpticalDupIdx, notRepIdx, cp, lp); // 放在cp里，因为后面global里可能有相同的dup，防止多次出现
+    refreshMarkDupData(dupIdx, opticalDupIdx, repIdx, notDupIdx, notOpticalDupIdx, notRepIdx, cp, lp); // cp，globaldup，
 }

-// 在相邻的数据块之间寻找未匹配的readends, 找到匹配的放到lp里
+// readends, lp
 static void findUnpairedInDatas(MarkDupData &lp, MarkDupData &cp) {
    auto &interPairedData = lp.ckeyReadEndsMap;
    SortMarkData &lsm = *(SortMarkData *)lp.dataPtr;
    SortMarkData &csm = *(SortMarkData *)cp.dataPtr;
-    for (auto itr = lsm.unpairedDic.begin(); itr != lsm.unpairedDic.end(); ) {  // 遍历上一个任务中的每个未匹配的read
+    for (auto itr = lsm.unpairedDic.begin(); itr != lsm.unpairedDic.end(); ) {  // read
        auto &lastUnpair = *itr;
        auto &readName = lastUnpair.first;
        auto &lastUnpairInfo = lastUnpair.second;
-        auto lastRE = lastUnpairInfo.unpairedRE;                        // 未匹配的read end
-        if (csm.unpairedDic.find(readName) != csm.unpairedDic.end()) {  // 在当前这个任务里找到了这个未匹配的read
+        auto lastRE = lastUnpairInfo.unpairedRE;                        // read end
+        if (csm.unpairedDic.find(readName) != csm.unpairedDic.end()) {  // read
            auto &curUnpairInfo = csm.unpairedDic[readName];
            auto &curRE = curUnpairInfo.unpairedRE;
-            modifyPairedEnds(curRE, &lastRE); // lastRE当做找到匹配后，完整的ReadEnds
+            modifyPairedEnds(curRE, &lastRE); // lastRE，ReadEnds
            CalcKey ck(lastRE);
            auto &pairArr = interPairedData[ck];
            pairArr.push_back(lastRE);
-            // 从dict中清除配对后的readends
+            // dictreadends
            csm.unpairedDic.erase(readName);
            itr = lsm.unpairedDic.erase(itr);
        } else {
@ -474,23 +474,23 @@ static void findUnpairedInDatas(MarkDupData &lp, MarkDupData &cp) {
    }
 }

-// 在global和lp中寻找未匹配的readends, 找到匹配的放到global里
+// globallpreadends, global
 static void findUnpairedInGlobal(IntersectData &g, MarkDupData &lp) {
    auto &interPairedData = g.ckeyReadEndsMap;
    SortMarkData &lsm = *(SortMarkData *)lp.dataPtr;
-    for (auto itr = lsm.unpairedDic.begin(); itr != lsm.unpairedDic.end();) {  // 遍历上一个任务中的每个未匹配的read
+    for (auto itr = lsm.unpairedDic.begin(); itr != lsm.unpairedDic.end();) {  // read
        auto &lastUnpair = *itr;
        auto &readName = lastUnpair.first;
        auto &lastUnpairInfo = lastUnpair.second;
-        auto lastRE = lastUnpairInfo.unpairedRE;                    // 未匹配的read end
-        if (g.unpairedDic.find(readName) != g.unpairedDic.end()) {  // 在global里找到了这个未匹配的read
+        auto lastRE = lastUnpairInfo.unpairedRE;                    // read end
+        if (g.unpairedDic.find(readName) != g.unpairedDic.end()) {  // globalread
            auto &gUnpairInfo = g.unpairedDic[readName];
            auto &gRE = gUnpairInfo.unpairedRE;
-            modifyPairedEnds(lastRE, &gRE);  // gRE当做找到匹配后，完整的ReadEnds
+            modifyPairedEnds(lastRE, &gRE);  // gRE，ReadEnds
            CalcKey ck(gRE);
            auto &pairArr = interPairedData[ck];
            pairArr.push_back(gRE);
-            // 从dict中清除配对后的readends
+            // dictreadends
            g.unpairedDic.erase(readName);
            itr = lsm.unpairedDic.erase(itr);
        } else {
@ -528,10 +528,10 @@ static void doIntersect(PipelineArg &pipeArg) {
    SortMarkData &lsm = *(SortMarkData *)lp.dataPtr;
    SortMarkData &csm = *(SortMarkData *)cp.dataPtr;

-    // 处理相邻数据块之间重叠的部分
+    // 
    processIntersectFragPairs(lp, cp);

-    // 检查确保lp和np之间没有数据交叉
+    // lpnp
    int64_t lastLeft = INT64_MIN, lastRight = INT64_MAX, curLeft = INT64_MAX, curRight = INT64_MAX;
    if (lsm.pairs.size() > 0) {
        lastLeft = lsm.frags[0].Left();
@ -549,21 +549,21 @@ static void doIntersect(PipelineArg &pipeArg) {
    }
    g.rightPos = lastRight;

-    findUnpairedInDatas(lp, cp);  // 找到的匹配放到lp里
-    findUnpairedInGlobal(g, cp);  // 把cp中未匹配的放到global里保存
+    findUnpairedInDatas(lp, cp);  // lp
+    findUnpairedInGlobal(g, cp);  // cpglobal

-    // 处理lp中的新找到的匹配
+    // lp
    TaskSeqDupInfo t;
    for (auto itr = lp.ckeyReadEndsMap.begin(); itr != lp.ckeyReadEndsMap.end();) {
        auto &ckVal = *itr;
        auto &ck = ckVal.first;
        auto &pairArr = ckVal.second;
-        getEqualRE(pairArr[0], lsm.pairs, &pairArr); // 如果不在计算范围内，会放在global里
-        if (ck.Right() <= lastRight) { // 必须在当前数据块的范围内, 才进行处理
-            if (ck.Left() >= curLeft) {  // 在交叉的范围内才去加上这些在cp中的数据
+        getEqualRE(pairArr[0], lsm.pairs, &pairArr); // ，global
+        if (ck.Right() <= lastRight) { // , 
+            if (ck.Left() >= curLeft) {  // cp
                getEqualRE(pairArr[0], csm.pairs, &pairArr);
            }
-            // 在global里找一找ck
+            // globalck
            auto gitr = g.ckeyReadEndsMap.find(ck);
            if (gitr != g.ckeyReadEndsMap.end()) {
                auto &gPairArr = gitr->second;
@ -578,7 +578,7 @@ static void doIntersect(PipelineArg &pipeArg) {
        }
    }

-    // 处理找到匹配的global数据
+    // global
    for (auto itr = g.ckeyReadEndsMap.begin(); itr != g.ckeyReadEndsMap.end();) {
        auto &ckVal = *itr;
        auto &ck = ckVal.first;
@ -586,7 +586,7 @@ static void doIntersect(PipelineArg &pipeArg) {
        if (ck.Left() >= lastLeft) {
            getEqualRE(pairArr[0], lsm.pairs, &pairArr);
        }
-        if (ck.Right() <= lastRight) {  // 只有在这个范围内，对应位点的所有reads才完全都包含了
+        if (ck.Right() <= lastRight) {  // ，reads
            sort(pairArr.begin(), pairArr.end(), ReadEnds::PairLittleThan);
            processPairs(pairArr, &t.dupIdx, &t.opticalDupIdx, &t.repIdx, &t.notDupIdx, &t.notOpticalDupIdx, &t.notRepIdx);
            itr = g.ckeyReadEndsMap.erase(itr);
@ -595,16 +595,16 @@ static void doIntersect(PipelineArg &pipeArg) {
        }
    }

-    // 剩余的在lp中没处理的放到global里
+    // lpglobal
    for (auto &ckVal : lp.ckeyReadEndsMap) {
        auto &pairArr = g.ckeyReadEndsMap[ckVal.first];
        pairArr.insert(pairArr.end(), ckVal.second.begin(), ckVal.second.end());

    }
    lp.ckeyReadEndsMap.clear();
-    // 更新一下冗余结果
+    // 
    refreshMarkDupData(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx, lp, cp);
-    // 处理g中新找到的匹配
+    // g
    putDupinfoToGlobal(g, lp);

    for (auto &unPair : lsm.unpairedDic) {
@ -620,16 +620,16 @@ static void *pipeRead(void *data) {
    while (1) {
        PROF_START(read_wait);
        yarn::POSSESS(pipeArg.readSig);
-        yarn::WAIT_FOR(pipeArg.readSig, yarn::NOT_TO_BE, 1);  // 只有一个坑，因为在bambuf内部支持异步读取
+        yarn::WAIT_FOR(pipeArg.readSig, yarn::NOT_TO_BE, 1);  // ，bambuf
        PROF_END(gprof[GP_read_wait], read_wait);
        size_t readNum = 0;
        PROF_START(read);
        if (inBamBuf.ReadStat() >= 0)
-            readNum = inBamBuf.ReadBam();  // 读入新一轮的数据
+            readNum = inBamBuf.ReadBam();  // 
        PROF_END(gprof[GP_read], read);
        if (readNum < 1) {
            pipeArg.readFinish = 1;
-            yarn::TWIST(pipeArg.readSig, yarn::BY, 1);  // 读入了一轮数据
+            yarn::TWIST(pipeArg.readSig, yarn::BY, 1);  // 
            break;
        }
        spdlog::info("{} reads processed in {} round", readNum, pipeArg.readOrder);
@ -640,9 +640,9 @@ static void *pipeRead(void *data) {
        pipeArg.readData.taskSeq = pipeArg.readOrder;

        readNumSum += readNum;
-        inBamBuf.ClearAll();                        // 清理上一轮读入的数据
+        inBamBuf.ClearAll();                        // 
        pipeArg.readOrder += 1;                     // for next
-        yarn::TWIST(pipeArg.readSig, yarn::BY, 1);  // 读入了一轮数据
+        yarn::TWIST(pipeArg.readSig, yarn::BY, 1);  // 
    }
    spdlog::info("{} total reads processed", readNumSum);
    return 0;
@ -659,21 +659,21 @@ static void *pipeGenRE(void *data) {
    while (1) {
        PROF_START(gen_wait);
        yarn::POSSESS(pipeArg.readSig);
-        yarn::WAIT_FOR(pipeArg.readSig, yarn::NOT_TO_BE, 0);  // 等待有数据
+        yarn::WAIT_FOR(pipeArg.readSig, yarn::NOT_TO_BE, 0);  // 
        yarn::POSSESS(pipeArg.genRESig);
        PROF_END(gprof[GP_gen_wait], gen_wait);

-        yarn::WAIT_FOR(pipeArg.genRESig, yarn::NOT_TO_BE, pipeArg.GENBUFNUM);  // 有BUFNUM个坑
-        yarn::RELEASE(pipeArg.genRESig);                                       // 因为有不止一个位置，所以要释放
+        yarn::WAIT_FOR(pipeArg.genRESig, yarn::NOT_TO_BE, pipeArg.GENBUFNUM);  // BUFNUM
+        yarn::RELEASE(pipeArg.genRESig);                                       // ，

-        if (pipeArg.readFinish) { // 读取结束的时候，没有新的数据需要处理了
+        if (pipeArg.readFinish) { // ，
            yarn::POSSESS(pipeArg.genRESig);
            pipeArg.genREFinish = 1;
            yarn::TWIST(pipeArg.genRESig, yarn::BY, 1);
            yarn::TWIST(pipeArg.readSig, yarn::BY, -1);
            break;
        }
-        /* 处理bam，生成readends */
+        /* bam，readends */
        PROF_START(gen);
        doGenRE(pipeArg);
        PROF_END(gprof[GP_gen], gen);
@ -681,7 +681,7 @@ static void *pipeGenRE(void *data) {
        yarn::POSSESS(pipeArg.genRESig);
        pipeArg.genREOrder += 1;
        yarn::TWIST(pipeArg.genRESig, yarn::BY, 1);
-        yarn::TWIST(pipeArg.readSig, yarn::BY, -1);  // 使用了一次读入的数据
+        yarn::TWIST(pipeArg.readSig, yarn::BY, -1);  // 
    }
    return 0;
 }
@ -691,19 +691,19 @@ static void *pipeSort(void *data) {
    while (1) {
        PROF_START(sort_wait);
        yarn::POSSESS(pipeArg.genRESig);
-        yarn::WAIT_FOR(pipeArg.genRESig, yarn::NOT_TO_BE, 0);  // 等待有数据
+        yarn::WAIT_FOR(pipeArg.genRESig, yarn::NOT_TO_BE, 0);  // 
        yarn::RELEASE(pipeArg.genRESig);
        PROF_END(gprof[GP_sort_wait], sort_wait);

        yarn::POSSESS(pipeArg.sortSig);
-        yarn::WAIT_FOR(pipeArg.sortSig, yarn::NOT_TO_BE, pipeArg.SORTBUFNUM);  // 有BUFNUM个位置
+        yarn::WAIT_FOR(pipeArg.sortSig, yarn::NOT_TO_BE, pipeArg.SORTBUFNUM);  // BUFNUM
        yarn::RELEASE(pipeArg.sortSig);

        if (pipeArg.genREFinish) {
-            // 处理完剩余数据
+            // 
            while (pipeArg.sortOrder < pipeArg.genREOrder) {
                yarn::POSSESS(pipeArg.sortSig);
-                yarn::WAIT_FOR(pipeArg.sortSig, yarn::NOT_TO_BE, pipeArg.SORTBUFNUM);  // 有BUFNUM个位置
+                yarn::WAIT_FOR(pipeArg.sortSig, yarn::NOT_TO_BE, pipeArg.SORTBUFNUM);  // BUFNUM
                yarn::RELEASE(pipeArg.sortSig);

                PROF_START(sort);
@ -719,7 +719,7 @@ static void *pipeSort(void *data) {
            yarn::TWIST(pipeArg.sortSig, yarn::BY, 1);
            break;
        }
-        /* 排序 readends */
+        /*  readends */
        PROF_START(sort);
        doSort(pipeArg);
        PROF_END(gprof[GP_sort], sort);
@ -739,7 +739,7 @@ static void *pipeMarkDup(void *data) {
    while (1) {
        PROF_START(markdup_wait);
        yarn::POSSESS(pipeArg.sortSig);
-        yarn::WAIT_FOR(pipeArg.sortSig, yarn::NOT_TO_BE, 0);  // 等待有数据
+        yarn::WAIT_FOR(pipeArg.sortSig, yarn::NOT_TO_BE, 0);  // 
        yarn::RELEASE(pipeArg.sortSig);
        PROF_END(gprof[GP_markdup_wait], markdup_wait);

@ -748,7 +748,7 @@ static void *pipeMarkDup(void *data) {
        yarn::RELEASE(pipeArg.markDupSig);

        if (pipeArg.sortFinish) {
-            // 应该还得处理剩余的数据
+            // 
            while (pipeArg.markDupOrder < pipeArg.sortOrder) {
                yarn::POSSESS(pipeArg.markDupSig);
                yarn::WAIT_FOR(pipeArg.markDupSig, yarn::NOT_TO_BE, pipeArg.MARKBUFNUM);
@ -767,7 +767,7 @@ static void *pipeMarkDup(void *data) {
            yarn::TWIST(pipeArg.markDupSig, yarn::TO, 2 + pipeArg.MARKBUFNUM);
            break;
        }
-        /* 冗余检测 readends */
+        /*  readends */
        PROF_START(markdup);
        doMarkDup(pipeArg);
        PROF_END(gprof[GP_markdup], markdup);
@ -787,7 +787,7 @@ static void *pipeIntersect(void *data) {
    while (1) {
        PROF_START(intersect_wait);
        yarn::POSSESS(pipeArg.markDupSig);
-        yarn::WAIT_FOR(pipeArg.markDupSig, yarn::TO_BE_MORE_THAN, kInitIntersectOrder);  // 等待有数据
+        yarn::WAIT_FOR(pipeArg.markDupSig, yarn::TO_BE_MORE_THAN, kInitIntersectOrder);  // 
        yarn::RELEASE(pipeArg.markDupSig);
        PROF_END(gprof[GP_intersect_wait], intersect_wait);

@ -801,7 +801,7 @@ static void *pipeIntersect(void *data) {

            break;
        }
-        /* 交叉数据处理 readends */
+        /*  readends */
        PROF_START(intersect);
        doIntersect(pipeArg);
        PROF_END(gprof[GP_intersect], intersect);
@ -814,7 +814,7 @@ static void *pipeIntersect(void *data) {
    return 0;
 }

-/* 当所有任务结束后，global data里还有未处理的数据 */
+/* ，global data */
 static void processLastData(PipelineArg &pipeArg) {
    IntersectData &g = pipeArg.intersectData;
    MarkDupData &lp = pipeArg.markDupData[(pipeArg.intersectOrder - 1) % pipeArg.MARKBUFNUM];
@ -823,7 +823,7 @@ static void processLastData(PipelineArg &pipeArg) {
    if (lsm.pairs.size() > 0) {
        lastLeft = lsm.frags[0].Left();
    }
-    // 处理找到匹配的global数据
+    // global
    TaskSeqDupInfo t;
    for (auto itr = g.ckeyReadEndsMap.begin(); itr != g.ckeyReadEndsMap.end();) {
        auto &ckVal = *itr;
@ -836,9 +836,9 @@ static void processLastData(PipelineArg &pipeArg) {
        processPairs(pairArr, &t.dupIdx, &t.opticalDupIdx, &t.repIdx);
        itr = g.ckeyReadEndsMap.erase(itr);
    }
-    // 更新一下冗余结果
+    // 
    refreshMarkDupData(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx, lp);
-    // 处理g中新找到的匹配
+    // g
    putDupinfoToGlobal(g, lp);
 }

@ -866,7 +866,7 @@ static void parallelPipeline() {
    // spdlog::info("result size : {} GB", nsgv::gDupRes.byteSize() / 1024.0 / 1024 / 1024);
 }

-/* 并行流水线方式处理数据，标记冗余 */
+/* ， */
 void PipelineMarkDups() {
    if (nsgv::gMdArg.NUM_THREADS > 1)
        return parallelPipeline();
@ -879,11 +879,11 @@ void PipelineMarkDups() {
    for (int i = 0; i < pipeArg.GENBUFNUM; ++i) {
        pipeArg.genREData[i].Init(pipeArg.numThread);
    }
-    pipeArg.intersectOrder = 1;  // do intersect 从1开始
+    pipeArg.intersectOrder = 1;  // do intersect 1
    while (1) {
        size_t readNum = 0;
        if (inBamBuf.ReadStat() >= 0)
-            readNum = inBamBuf.ReadBam();  // 读入新一轮的数据
+            readNum = inBamBuf.ReadBam();  // 
        if (readNum < 1) {
            break;
        }
@ -908,7 +908,7 @@ void PipelineMarkDups() {
        }

        readNumSum += readNum;
-        inBamBuf.ClearAll();     // 清理上一轮读入的数据
+        inBamBuf.ClearAll();     // 
        pipeArg.readOrder += 1;  // for next
    }
    processLastData(pipeArg);
--- a/src/markdup/md_pipeline.h
+++ b/src/markdup/md_pipeline.h
@ -8,22 +8,22 @@

 struct ReadData {
    vector<BamWrap *> bams;   // read step output
-    int64_t bamStartIdx = 0;  // 每轮读入的bam数组，起始位置在全局bam中的索引
-    int64_t taskSeq = 0;      // 任务序号
+    int64_t bamStartIdx = 0;  // bam，bam
+    int64_t taskSeq = 0;      // 
 };

 struct GenREData {
-    vector<vector<ReadEnds>> pairsArr;       // 成对的reads
-    vector<vector<ReadEnds>> fragsArr;       // 暂未找到配对的reads
-    vector<UnpairedNameMap> unpairedDicArr;  // 用来寻找pair end
+    vector<vector<ReadEnds>> pairsArr;       // reads
+    vector<vector<ReadEnds>> fragsArr;       // reads
+    vector<UnpairedNameMap> unpairedDicArr;  // pair end
    void Init(int nThread) {
-        for (int i = 0; i <= nThread; ++i) {  // 比线程多一个，主要是pairs多一个，其他没用
+        for (int i = 0; i <= nThread; ++i) {  // ，pairs，
            pairsArr.push_back(vector<ReadEnds>());
            fragsArr.push_back(vector<ReadEnds>());
            unpairedDicArr.push_back(UnpairedNameMap());
        }
    }
-    UnpairedNameMap unpairedDic;         // 代替sort step中一部分计算
+    UnpairedNameMap unpairedDic;         // sort step
    size_t byteSize() {
        size_t bytes = 0;
        for (auto &v : pairsArr)
@ -38,9 +38,9 @@ struct GenREData {
 };

 struct SortMarkData {
-    vector<ReadEnds> pairs;              // 成对的reads
-    vector<ReadEnds> frags;              // 暂未找到配对的reads
-    UnpairedNameMap unpairedDic;         // 用来寻找pair end
+    vector<ReadEnds> pairs;              // reads
+    vector<ReadEnds> frags;              // reads
+    UnpairedNameMap unpairedDic;         // pair end
    size_t byteSize() {
        size_t bytes = 0;
        for (auto &r : pairs) bytes += sizeof(r);
@ -56,11 +56,11 @@ struct SortData {
 };

 struct MarkDupData {
-    int64_t taskSeq = 0;               // 任务序号
-    DPSet<DupInfo> pairDupIdx;         // pair的冗余read的索引
-    MDSet<int64_t> pairOpticalDupIdx;  // optical冗余read的索引
-    DPSet<DupInfo> fragDupIdx;         // frag的冗余read的索引
-    DPSet<DupInfo> pairRepIdx;         // pair的dupset代表read的索引
+    int64_t taskSeq = 0;               // 
+    DPSet<DupInfo> pairDupIdx;         // pairread
+    MDSet<int64_t> pairOpticalDupIdx;  // opticalread
+    DPSet<DupInfo> fragDupIdx;         // fragread
+    DPSet<DupInfo> pairRepIdx;         // pairdupsetread
    CkeyReadEndsMap ckeyReadEndsMap;

    volatile void *dataPtr;  // SortMarkData pointer
@ -111,11 +111,11 @@ struct DupResult {
 };

 struct IntersectData {
-    UnpairedNameMap unpairedDic;  // 用来寻找pair end
+    UnpairedNameMap unpairedDic;  // pair end
    CkeyReadEndsMap ckeyReadEndsMap;
    int64_t rightPos = 0;

-    // 每个task对应一个vector
+    // taskvector
    vector<vector<DupInfo>> &dupIdxArr;
    vector<vector<int64_t>> &opticalDupIdxArr;
    vector<vector<DupInfo>> &repIdxArr;
@ -138,7 +138,7 @@ struct IntersectData {
    }
 };

-// 记录流水线状态，task的序号，以及某阶段是否结束
+// ，task，
 struct PipelineArg {
    static const int GENBUFNUM = 2;
    static const int SORTBUFNUM = 2;
@ -161,8 +161,8 @@ struct PipelineArg {
    yarn::lock_t *markDupSig;

    PipelineArg(DupResult *resPtr) : intersectData(resPtr) {
-        readSig = yarn::NEW_LOCK(0);   // 最大值1, 双buffer在bambuf中实现了，对调用透明
-        genRESig = yarn::NEW_LOCK(0);  // 最大值2, 双buffer
+        readSig = yarn::NEW_LOCK(0);   // 1, bufferbambuf，
+        genRESig = yarn::NEW_LOCK(0);  // 2, buffer
        sortSig = yarn::NEW_LOCK(0);
        markDupSig = yarn::NEW_LOCK(0);
        for (int i = 0; i < SORTBUFNUM; ++i) {
@ -208,8 +208,8 @@ struct PipelineArg {
 };

 struct REArrIdIdx {
-    int arrId = 0;        // 数组索引
-    uint64_t arrIdx = 0;  // 数组内部当前索引
+    int arrId = 0;        // 
+    uint64_t arrIdx = 0;  // 
    const ReadEnds *pE = nullptr;
 };

@ -224,7 +224,7 @@ struct REPairGreaterThan {

 template <typename CMP>
 struct ReadEndsHeap {
-    // 将冗余索引和他对应的task vector对应起来
+    // task vector
    vector<vector<ReadEnds>> *arr2d;
    priority_queue<REArrIdIdx, vector<REArrIdIdx>, CMP> minHeap;
    uint64_t popNum = 0;
@ -269,5 +269,5 @@ struct ReadEndsHeap {
    }
 };

-// 并行运行mark duplicate
+// mark duplicate
 void PipelineMarkDups();
--- a/src/markdup/md_types.h
+++ b/src/markdup/md_types.h
@ -19,13 +19,13 @@ using std::string;
 using std::unordered_set;
 using std::vector;

-/* 存放未匹配readend相同位点的所有readend */
+/* readendreadend */
 struct UnpairedREInfo {
-    int64_t taskSeq;  // 对应第几轮计算
+    int64_t taskSeq;  // 
    ReadEnds unpairedRE;
 };

-/* 对于一个pair数据，一个完整的计算点，包含read1的比对位置和read2的比对位置 */
+/* pair，，read1read2 */
 struct CalcKey {
    int8_t orientation = -1;
    int32_t read1ReferenceIndex = -1;
@ -70,7 +70,7 @@ struct CalcKey {
            comp = read2ReferenceIndex - o.read2ReferenceIndex;
        if (comp == 0)
            comp = read2Coordinate - o.read2Coordinate;
-        // 需要orientation，因为要跟排序的比较方式和顺序一致
+        // orientation，
        if (comp == 0)
            comp = orientation - o.orientation;
        return comp < 0;
@ -91,7 +91,7 @@ struct CalcKey {
            comp = read2ReferenceIndex - o.read2ReferenceIndex;
        if (comp == 0)
            comp = read2Coordinate - o.read2Coordinate;
-        // 需要orientation，因为要跟排序的比较方式和顺序一致
+        // orientation，
        if (comp == 0)
            comp = orientation - o.orientation;
        return comp < 0;
@ -113,10 +113,10 @@ struct CalcKeyEqual {
    bool operator()(const CalcKey &o1, const CalcKey &o2) const { return o1 == o2; }
 };

-/* 用来记录冗余索引相关的信息 */
+/*  */
 struct DupInfo {
    int16_t dupSet = 0;  // dup set size
-    uint16_t repIdxHigh = 0;  // 这一批冗余中的非冗余read 代表的索引
+    uint16_t repIdxHigh = 0;  // read 
    uint32_t repIdxLow = 0;
    int64_t idx;

@ -164,7 +164,7 @@ using CalcSet = set<T>;

 using ReadEndsSet = tsl::robin_set<ReadEnds, ReadEndsHash, ReadEndsEqual>;

-/* 当遗留数据在当前任务找到了pair read后，进行冗余计算时候存放结果的数据结构 */
+/* pair read， */
 struct TaskSeqDupInfo {
    DPSet<DupInfo> dupIdx;
    MDSet<int64_t> opticalDupIdx;
@ -174,7 +174,7 @@ struct TaskSeqDupInfo {
    MDSet<int64_t> notRepIdx;
 };

-/* 保存有未匹配pair位点的信息，包括read end数组和有几个未匹配的read end */
+/* pair，read endread end */
 struct UnpairedPosInfo {
    int unpairedNum = 0;
    int64_t taskSeq;
@ -184,33 +184,33 @@ struct UnpairedPosInfo {
 // typedef unordered_map<string, UnpairedREInfo> UnpairedNameMap;
 // typedef unordered_map<int64_t, UnpairedPosInfo> UnpairedPositionMap;

-typedef tsl::robin_map<string, UnpairedREInfo> UnpairedNameMap;  // 以read name为索引，保存未匹配的pair read
-//typedef map<string, UnpairedREInfo> UnpairedNameMap;  // 以read name为索引，保存未匹配的pair read
-typedef tsl::robin_map<int64_t, UnpairedPosInfo> UnpairedPositionMap;  // 以位点为索引，保存该位点包含的对应的所有read和该位点包含的剩余未匹配的read的数量
-// typedef map<CalcKey, vector<ReadEnds>> CkeyReadEndsMap;  // 以calckey为关键字，保存在相邻数据块之前找到的匹配readEnds
+typedef tsl::robin_map<string, UnpairedREInfo> UnpairedNameMap;  // read name，pair read
+//typedef map<string, UnpairedREInfo> UnpairedNameMap;  // read name，pair read
+typedef tsl::robin_map<int64_t, UnpairedPosInfo> UnpairedPositionMap;  // ，readread
+// typedef map<CalcKey, vector<ReadEnds>> CkeyReadEndsMap;  // calckey，readEnds
 typedef unordered_map<CalcKey, vector<ReadEnds>, CalcKeyHash, CalcKeyEqual>
-    CkeyReadEndsMap;  // 以calckey为关键字，保存在相邻数据块之前找到的匹配readEnds
+    CkeyReadEndsMap;  // calckey，readEnds
 // typedef tsl::robin_map<CalcKey, vector<ReadEnds>, CalcKeyHash, CalcKeyEqual> CkeyReadEndsMap;  //
-// 以calckey为关键字，保存在相邻数据块之前找到的匹配readEnds
+// calckey，readEnds

-/* 单线程处理冗余参数结构体 */
+/*  */
 struct MarkDupDataArg {
-    int64_t taskSeq;                     // 任务序号
-    int64_t bamStartIdx;                 // 当前vBam数组中第一个bam记录在整体bam中所处的位置
-    vector<BamWrap *> bams;              // 存放待处理的bam read
-    vector<ReadEnds> pairs;              // 成对的reads
-    vector<ReadEnds> frags;              // 暂未找到配对的reads
-    DPSet<DupInfo> pairDupIdx;           // pair的冗余read的索引
-    MDSet<int64_t> pairOpticalDupIdx;    // optical冗余read的索引
-    DPSet<DupInfo> fragDupIdx;           // frag的冗余read的索引
-    DPSet<DupInfo> pairRepIdx;           // pair的dupset代表read的索引
-    MDSet<int64_t> pairSingletonIdx;     // 某位置只有一对read的所有read pair个数
-    UnpairedNameMap unpairedDic;         // 用来寻找pair end
-    UnpairedPositionMap unpairedPosArr;  // 存放未匹配的ReadEnd对应位点的所有ReadEnd，为了避免重复存储
+    int64_t taskSeq;                     // 
+    int64_t bamStartIdx;                 // vBambambam
+    vector<BamWrap *> bams;              // bam read
+    vector<ReadEnds> pairs;              // reads
+    vector<ReadEnds> frags;              // reads
+    DPSet<DupInfo> pairDupIdx;           // pairread
+    MDSet<int64_t> pairOpticalDupIdx;    // opticalread
+    DPSet<DupInfo> fragDupIdx;           // fragread
+    DPSet<DupInfo> pairRepIdx;           // pairdupsetread
+    MDSet<int64_t> pairSingletonIdx;     // readread pair
+    UnpairedNameMap unpairedDic;         // pair end
+    UnpairedPositionMap unpairedPosArr;  // ReadEndReadEnd，
 };

 /*
- * 优先队列，用最小堆来实现对所有冗余索引的排序
+ * ，
 */
 template <typename T>
 struct PairArrIdIdx {
@ -226,9 +226,9 @@ struct IdxGreaterThan {

 template <typename T>
 struct DupIdxQueue {
-    // 将冗余索引和他对应的task vector对应起来
+    // task vector

-    // 由于是多个task来查找冗余，所以每次找到的冗余index都放在一个独立的vector中，vector之间可能有重叠，所以需要用一个最小堆来维护
+    // task，indexvector，vector，
    vector<vector<T>> *dupIdx2DArr;
    priority_queue<PairArrIdIdx<T>, vector<PairArrIdIdx<T>>, IdxGreaterThan<T>> minHeap;
    uint64_t popNum = 0;
--- a/src/markdup/read_ends.h
+++ b/src/markdup/read_ends.h
@ -1,6 +1,6 @@
 /*
 Description: read
-ends结构体主要用来标记冗余，包含一些序列的测序过程中的物理信息等
+ends，

 Copyright : All right reserved by ICT

@ -39,15 +39,15 @@ struct PhysicalLocation {
    int16_t y = -1;
 };

-/* 包含了所有read ends信息，如picard里边的 ReadEndsForMarkDuplicates*/
+/* read ends，picard ReadEndsForMarkDuplicates*/
 struct ReadEnds : PhysicalLocation {
    static const int8_t F = 0, R = 1, FF = 2, FR = 3, RR = 4, RF = 5;
-    /* 保留一些bam记录中的数据 */
+    /* bam */
    bool read1FirstOfPair = true;
-    /* ReadEnds中的成员变量 */
+    /* ReadEnds */
    /** Little struct-like class to hold read pair (and fragment) end data for
     * duplicate marking. */
-    // int16_t libraryId; // 没用，不考虑多样本
+    // int16_t libraryId; // ，
    int8_t orientation = -1;
    int32_t read1ReferenceIndex = -1;
    int32_t read1Coordinate = -1;
@ -55,8 +55,8 @@ struct ReadEnds : PhysicalLocation {
    // This field is overloaded for flow based processing as the end coordinate of read 1. (paired reads not supported)
    int32_t read2Coordinate = -1;
    /* Additional information used to detect optical dupes */
-    // int16_t readGroup = -1; 一般经过比对后的bam文件只有一个read
-    // group，normal或者tumor
+    // int16_t readGroup = -1; bamread
+    // group，normaltumor
    /** For optical duplicate detection the orientation matters regard to 1st or
     * 2nd end of a mate */
    int8_t orientationForOpticalDuplicates = -1;
@ -64,7 +64,7 @@ struct ReadEnds : PhysicalLocation {
     */
    bool isOpticalDuplicate = false;

-    /* ReadEndsForMarkDuplicates中的成员变量 */
+    /* ReadEndsForMarkDuplicates */
    /** Little struct-like class to hold read pair (and fragment) end data for
     * MarkDuplicatesWithMateCigar **/
    int16_t score = 0;
@ -72,20 +72,20 @@ struct ReadEnds : PhysicalLocation {
    int64_t read2IndexInFile = -1;
    int64_t duplicateSetSize = -1;

-    /* ReadEndsForMarkDuplicatesWithBarcodes中的成员变量 (好像用不到) */
+    /* ReadEndsForMarkDuplicatesWithBarcodes () */
    // int32_t barcode = 0; // primary barcode for this read (and pair)
    // int32_t readOneBarcode = 0; // read one barcode, 0 if not present
    // int32_t readTwoBarcode = 0; // read two barcode, 0 if not present or not
    // paired

-    /* zzh增加的成员变量 */
-    int64_t posKey = -1;  // 根据位置信息生成的关键字 return (int64_t)tid <<
-                          // MAX_CONTIG_LEN_SHIFT | (int64_t)pos; （包含clip的序列，也就是可能比map结果更偏左）
+    /* zzh */
+    int64_t posKey = -1;  //  return (int64_t)tid <<
+                          // MAX_CONTIG_LEN_SHIFT | (int64_t)pos; （clip，map）

-    /* 用来做一些判断，因为一些readends会做多次操作，比如task之间有重叠等等 */
+    /* ，readends，task */
    int oprateTime = 0;

-    /* 根据pairend read的比对方向，来确定整体的比对方向 */
+    /* pairend read， */
    static int8_t GetOrientationByte(bool read1NegativeStrand, bool read2NegativeStrand) {
        if (read1NegativeStrand) {
            if (read2NegativeStrand)
@ -100,7 +100,7 @@ struct ReadEnds : PhysicalLocation {
        }
    }

-    /* 比较两个readends是否一样（有个冗余） */
+    /* readends（） */
    static bool AreComparableForDuplicates(const ReadEnds &lhs, const ReadEnds &rhs, bool compareRead2) {
        bool areComparable = true;
        areComparable = lhs.read1ReferenceIndex == rhs.read1ReferenceIndex &&
@ -112,15 +112,15 @@ struct ReadEnds : PhysicalLocation {
        return areComparable;
    }

-    /* 比对方向是否正向 */
+    /*  */
    bool IsPositiveStrand() const { return orientation == F; }

-    /* pairend是否合适的比对上了 */
+    /* pairend */
    bool IsPaired() const { return read2ReferenceIndex != -1; }

    bool IsNegativeStrand() const { return orientation == R; }

-    // 对于相交的数据进行比对，a是否小于b，根据AreComparableForDuplicates函数得来
+    // ，ab，AreComparableForDuplicates
    static inline bool ReadLittleThan(const ReadEnds &a, const ReadEnds &b, bool compareRead2 = false) {
        int comp = a.read1ReferenceIndex - b.read1ReferenceIndex;
        if (comp == 0)
@ -141,7 +141,7 @@ struct ReadEnds : PhysicalLocation {
        int comp = a.read1ReferenceIndex - b.read1ReferenceIndex;
        if (comp == 0)
            comp = a.read1Coordinate - b.read1Coordinate;
-        if (comp == 0)  // 这个放在坐标比较之前，因为在解析bam的时候，可能有的给read2ReferenceIndex赋值了,有的则没赋值
+        if (comp == 0)  // ，bam，read2ReferenceIndex,
            comp = a.orientation - b.orientation;
        if (comp == 0)
            comp = a.read2ReferenceIndex - b.read2ReferenceIndex;
@ -150,7 +150,7 @@ struct ReadEnds : PhysicalLocation {
 //        if (comp == 0)
 //            comp = a.tile - b.tile;
 //        if (comp == 0)
-//            comp = a.x - b.x; // 由于picard的bug，用short类型来表示x，y，导致其可能为负数
+//            comp = a.x - b.x; // picardbug，shortx，y，
 //        if (comp == 0)
 //            comp - a.y - b.y;
        if (comp == 0)
@ -168,12 +168,12 @@ struct ReadEnds : PhysicalLocation {
            comp = a.read2ReferenceIndex - b.read2ReferenceIndex;
        if (comp == 0)
            comp = a.read2Coordinate - b.read2Coordinate;
-        if (comp == 0)  // 这个放在坐标比较了之后，把坐标范围的放在之前，这样对分段数据块比较好处理
+        if (comp == 0)  // ，，
            comp = a.orientation - b.orientation;
 //        if (comp == 0)
 //            comp = a.tile - b.tile;
 //        if (comp == 0)
-//            comp = a.x - b.x; // 由于picard的bug，用short类型来表示x，y，导致其可能为负数
+//            comp = a.x - b.x; // picardbug，shortx，y，
 //        if (comp == 0)
 //            comp - a.y - b.y;
        if (comp == 0)
@ -191,7 +191,7 @@ struct ReadEnds : PhysicalLocation {
            comp = a.read2ReferenceIndex - b.read2ReferenceIndex;
        if (comp == 0)
            comp = a.read2Coordinate - b.read2Coordinate;
-        if (comp == 0)  // 这个放在坐标比较了之后，把坐标范围的放在之前，这样对分段数据块比较好处理
+        if (comp == 0)  // ，，
            comp = a.orientation - b.orientation;
        return comp < 0;
    }
--- a/src/markdup/read_name_parser.h
+++ b/src/markdup/read_name_parser.h
@ -1,5 +1,5 @@
 /*
-Description: 解析read的name中的信息，比如tile, x, y等
+Description: readname，tile, x, y

 Copyright : All right reserved by ICT

@ -28,7 +28,7 @@ using std::string;
 * Provides access to the physical location information about a cluster.
 * All values should be defaulted to -1 if unavailable.  ReadGroup and Tile
 * should only allow non-zero positive integers, x and y coordinates may be
- * negative. 非线程安全
+ * negative. 
 */
 struct ReadNameParser {
    /**
@ -72,7 +72,7 @@ struct ReadNameParser {
        warnAboutRegexNotMatching = isWarn;
    }

-    /* 重新设置readNameRegex */
+    /* readNameRegex */
    void SetReadNameRegex(const string &strReadNameRegex) {
        readNameRegex = strReadNameRegex;
        if (strReadNameRegex == DEFAULT_READ_NAME_REGEX)
@ -83,7 +83,7 @@ struct ReadNameParser {
        // readNamePattern = strReadNameRegex;
    }

-    /* 添加测序时候的tile x y 信息 */
+    /* tile x y  */
    bool AddLocationInformation(const string &readName, PhysicalLocation *loc) {
        if (!(readName == readNameStored)) {
            if (ReadLocationInformation(readName, loc)) {
--- a/src/util/bam_buf.cpp
+++ b/src/util/bam_buf.cpp
@ -1,5 +1,5 @@
 /*
-     Description: 读入sam/bam时，开辟一个大的buf，存放这些数据
+     Description: sam/bam，buf，

     Copyright : All right reserved by ICT

@ -10,25 +10,25 @@
 #include "bam_buf.h"

 /*
- * BamBuf类
+ * BamBuf
 */
-// 读取数据直到读完，或者缓冲区满
+// ，
 int BamBuf::ReadBam() {
    int read_num = 0;
-    if (handle_last) {             // 处理上次读入的最后一个bam
-        if (has_enough_space()) {  // 必须调用，在边界处调整memffset
+    if (handle_last) {             // bam
+        if (has_enough_space()) {  // ，memffset
            ++read_num;
            append_one_bam();
        } else {
-            return read_num;  // 还是没空间
+            return read_num;  // 
        }
    }
    while (read_stat_ >= 0 && (read_stat_ = sam_read1(fp, hdr, bw->b)) >= 0) {
        bw->end_pos_ = BamWrap::BamEndPos(bw->b);
-        if (has_enough_space()) {  // 还有空间
-                                   // if (true) {  // 还有空间
+        if (has_enough_space()) {  // 
+                                   // if (true) {  // 
            append_one_bam();
-            ++read_num;  // 放进缓存才算读取到
+            ++read_num;  // 
        } else {
            break;
        }
@ -41,7 +41,7 @@ int BamBuf::ReadBam() {
    return read_num;
 }

-// 初始化缓存
+// 
 void BamBuf::Init(samFile *fp, sam_hdr_t *hdr, int64_t mem_size) {
    this->fp = fp;
    this->hdr = hdr;
@ -71,9 +71,9 @@ void BamBuf::ClearAll() {
    prepare_read();
 }

-// 为下一次读取做准备, 计算一些边界条件
+// , 
 inline void BamBuf::prepare_read() {
-    // 计算余留的下次计算可能用到的bam所占的位置
+    // bam
    if (bv.size() > 0) {
        BamWrap *bw = bv[0];
        legacy_start = (int64_t)bw - (int64_t)mem;
@ -81,11 +81,11 @@ inline void BamBuf::prepare_read() {
        legacy_end = (int64_t)bw + bw->length() - (int64_t)mem;
    } else {
        legacy_start = legacy_end = 0;
-        mem_offset = 0;  // 上次没剩下，那就从头存储
+        mem_offset = 0;  // ，
    }
 }

-// 检查缓存是否还有空间
+// 
 inline bool BamBuf::has_enough_space() {
    const uint32_t bam_len = bw->length();
    int64_t potential_end = mem_offset + bam_len;
@ -104,7 +104,7 @@ inline bool BamBuf::has_enough_space() {
    return potential_end < legacy_start;
 }

-// 处理一个读取后的bam
+// bam
 inline void BamBuf::append_one_bam() {
    BamWrap *bwp = (BamWrap *)(mem + mem_offset);
    *bwp = *bw;
@ -113,15 +113,15 @@ inline void BamBuf::append_one_bam() {
    *bp = *bw->b;
    bp->data = (uint8_t *)((char *)bwp->b + sizeof(bam1_t));
    memcpy(bp->data, bw->b->data, bw->b->l_data);
-    // 更新下次存储的位置
+    // 
    mem_offset = (mem_offset + bw->length() + 8 - 1) & ~((size_t)(8 - 1));
    bv.push_back(bwp);
 }

-// 处理上次读入的最后一个read
+// read
 inline bool BamBuf::handle_last_read() {
-    if (handle_last) {             // 处理上次读入的最后一个bam
-        if (has_enough_space()) {  // 必须调用，在边界处调整memffset
+    if (handle_last) {             // bam
+        if (has_enough_space()) {  // ，memffset
            append_one_bam();
            handle_last = false;
            return true;
@ -131,9 +131,9 @@ inline bool BamBuf::handle_last_read() {
 }

 /*
- * AsyncIoBamBuf 类
+ * AsyncIoBamBuf 
 */
-// 初始化缓存
+// 
 void AsyncIoBamBuf::Init(samFile *fp, sam_hdr_t *hdr, int64_t mem_size) {
    if (use_async_io_) {
        buf1_.Init(fp, hdr, mem_size >> 1);
@ -147,7 +147,7 @@ void AsyncIoBamBuf::Init(samFile *fp, sam_hdr_t *hdr, int64_t mem_size) {
    }
 }

-// 读取数据
+// 
 int AsyncIoBamBuf::ReadBam() {
    if (use_async_io_) {
        hasThread = true;
@ -178,11 +178,11 @@ int AsyncIoBamBuf::async_read_bam() {
        first_read_ = false;
        refresh_bam_arr();
    } else {
-        // join, 交换缓冲区指针
+        // join, 
        pthread_join(*tid_, 0);
        resize_buf();

-        if (need_read_) {  // 需要交换指针
+        if (need_read_) {  // 
            BamBuf *tmp = pi_;
            pi_ = po_;
            po_ = tmp;
@ -190,14 +190,14 @@ int AsyncIoBamBuf::async_read_bam() {
        read_num = last_read_num_;
        refresh_bam_arr();
    }
-    // 异步读
+    // 
    pthread_create(tid_, 0, async_read, this);
    return read_num;
 }

 void *AsyncIoBamBuf::async_read(void *data) {
    AsyncIoBamBuf *ab = (AsyncIoBamBuf *)data;
-    if (ab->need_read_ && ab->ReadStat() >= 0) {  // 需要读取
+    if (ab->need_read_ && ab->ReadStat() >= 0) {  // 
        ab->last_read_num_ = ab->po_->ReadBam();
    } else {
        ab->last_read_num_ = 0;
@ -205,23 +205,23 @@ void *AsyncIoBamBuf::async_read(void *data) {
    pthread_exit(0);
 }

-// 为下一次读取做准备,
-// 计算一些边界条件，延迟操作，因为此时可能po_对应的buf正在读取
+// ,
+// ，，po_buf
 void AsyncIoBamBuf::ClearBeforeIdx(size_t idxInBv) { clear_before_idx_ = idxInBv; }

-// 清空上一次所有读入的数据，延迟操作，因为此时可能po_对应的buf正在读取
+// ，，po_buf
 void AsyncIoBamBuf::ClearAll() { clear_all_ = true; }

 inline void AsyncIoBamBuf::resize_buf() {
-    if (clear_all_) {  // 清理上一轮的数据
+    if (clear_all_) {  // 
        clear_all_ = false;
        po_->ClearBeforeIdx(legacy_size_);
        pi_->ClearAll();
-        if (pi_->handle_last_read()) {  // 上次读取有一个read没放入缓存
+        if (pi_->handle_last_read()) {  // read
            last_read_num_ += 1;
-            legacy_size_ = pi_->Size();  // 应该只有一个read
+            legacy_size_ = pi_->Size();  // read
            need_read_ = true;
-        } else {  // 没空间存放，则不交换指针，或者文件已经读取完毕
+        } else {  // ，，
            legacy_size_ = 0;
            need_read_ = false;
        }
@ -229,16 +229,16 @@ inline void AsyncIoBamBuf::resize_buf() {
        if (clear_before_idx_ < legacy_size_) {
            po_->ClearBeforeIdx(clear_before_idx_);
            legacy_size_ -= clear_before_idx_;
-            // 不需要交换指针，不需要读取
+            // ，
            need_read_ = false;
        } else {
            po_->ClearBeforeIdx(legacy_size_);
            pi_->ClearBeforeIdx(clear_before_idx_ - legacy_size_);
-            if (pi_->handle_last_read()) {  // 上次读取有一个read没放入缓存
+            if (pi_->handle_last_read()) {  // read
                last_read_num_ += 1;
-                legacy_size_ = pi_->Size();  // 应该只有一个read
+                legacy_size_ = pi_->Size();  // read
                need_read_ = true;
-            } else {  // 没空间存放，则不交换指针，或者文件已经读取完毕
+            } else {  // ，，
                legacy_size_ = 0;
                need_read_ = false;
            }
--- a/src/util/bam_buf.h
+++ b/src/util/bam_buf.h
@ -1,5 +1,5 @@
 /*
-     Description: 读入sam/bam时，开辟一个大的buf，存放这些数据
+     Description: sam/bam，buf，

     Copyright : All right reserved by ICT

@ -27,32 +27,32 @@ using std::vector;
 using namespace std;

 /*
- * 存放读入的bam数据
+ * bam
 */
 struct BamBuf {
-    sam_hdr_t *hdr;            // sam文件的header信息
-    samFile *fp;               // sam文件指针
-    BamWrap *bw = nullptr;     // 用来循环读入bam
-    uint8_t *mem = nullptr;    // 用来存放bam的数据,
-                               // 程序结束后自动释放，所以没在析构函数里释放
-    int64_t mem_offset = 0;    // 下一次要存放的位置
-    int64_t mem_size;          // 缓存大小
-    int read_stat_ = 0;        // 读取状态，是否读完
-    vector<BamWrap *> bv;      // 方便对bam数据的访问
-    int64_t legacy_start = 0;  // 没处理完的bam在mem中的起始位置, 闭区间
-    int64_t legacy_end = 0;    // 没处理完的bam在mem中的结束位置, 开区间
-    bool handle_last = false;  // 上次最后读入的bam是否需要处理
+    sam_hdr_t *hdr;            // samheader
+    samFile *fp;               // sam
+    BamWrap *bw = nullptr;     // bam
+    uint8_t *mem = nullptr;    // bam,
+                               // ，
+    int64_t mem_offset = 0;    // 
+    int64_t mem_size;          // 
+    int read_stat_ = 0;        // ，
+    vector<BamWrap *> bv;      // bam
+    int64_t legacy_start = 0;  // bammem, 
+    int64_t legacy_end = 0;    // bammem, 
+    bool handle_last = false;  // bam

-    // 初始化缓存
+    // 
    void Init(samFile *fp, sam_hdr_t *hdr, int64_t mem_size);
-    // 读取数据直到读完，或者缓冲区满
+    // ，
    int ReadBam();
-    // 为下一次读取做准备, 计算一些边界条件
+    // , 
    void ClearBeforeIdx(size_t idxInBv);
-    // 清空上一次所有读入的数据
+    // 
    void ClearAll();
-    inline int64_t Size() { return bv.size(); }  // 包含多少个read
-    inline int ReadStat() { return read_stat_; }  // 文件的读取状态，是否可读（读取完全）
+    inline int64_t Size() { return bv.size(); }  // read
+    inline int ReadStat() { return read_stat_; }  // ，（）
    ~BamBuf() {
        if (this->mem != nullptr) {
            free(this->mem);
@ -62,7 +62,7 @@ struct BamBuf {
            free(bw);
        }
    }
-    void FreeMemory()  // 释放开辟的内存
+    void FreeMemory()  // 
    {
        if (this->mem != nullptr) {
            free(this->mem);
@ -75,14 +75,14 @@ struct BamBuf {
        this->bw = nullptr;
    }
    void prepare_read();
-    // 检查缓存是否还有空间
+    // 
    bool has_enough_space();
-    // 处理一个读取后的bam
+    // bam
    void append_one_bam();
-    // 处理上次读入的最后一个read
+    // read
    bool handle_last_read();

-    // 针对bv的操作
+    // bv
    inline BamWrap *operator[](int64_t pos) { return bv[pos]; }
    inline void push_back(BamWrap *val) { bv.push_back(val); }
    inline void clear() { bv.clear(); }
@ -90,53 +90,53 @@ struct BamBuf {
 };

 /*
- * io异步缓冲区
+ * io
 */
 struct AsyncIoBamBuf {
    BamBuf buf1_;
    BamBuf buf2_;
-    BamBuf *pi_;  // 当前用的buf
-    BamBuf *po_;  // 后台在读取的buf
+    BamBuf *pi_;  // buf
+    BamBuf *po_;  // buf
    pthread_t *tid_ = NULL;
    bool hasThread = false;
-    int64_t legacy_size_ = 0;  // 上一轮运算之后，缓存中还剩余的上次读取的read数量
+    int64_t legacy_size_ = 0;  // ，read
    bool first_read_ = true;
-    int last_read_num_ = 0;  // 上一次读取了多少reads
+    int last_read_num_ = 0;  // reads
    bool need_read_ = true;
    bool use_async_io_ = true;
-    int64_t clear_before_idx_ = 0;  // 用户异步读取，下一轮读取之前清理掉clear_before_idx_之前的所有reads
-    bool clear_all_ = false;        // 用于异步读取，下一轮读取之前清理掉之前的所有reads
+    int64_t clear_before_idx_ = 0;  // ，clear_before_idx_reads
+    bool clear_all_ = false;        // ，reads

-    vector<BamWrap *> bam_arr_;  // 用来访问buf中的bam
+    vector<BamWrap *> bam_arr_;  // bufbam

    AsyncIoBamBuf() {}
    AsyncIoBamBuf(bool use_async) : use_async_io_(use_async) {}
-    // 析构
+    // 
    ~AsyncIoBamBuf() {
        if (tid_ != NULL) {
            if (hasThread)
                pthread_join(*tid_, 0);
            free(tid_);
        }
-        // 其他的内存就等程序结束自动释放
-        // buf的析构函数会自动调用
+        // 
+        // buf
    }

-    // 初始化缓存
+    // 
    void Init(samFile *fp, sam_hdr_t *hdr, int64_t mem_size);

-    // 读取数据
+    // 
    int ReadBam();
-    // 为下一次读取做准备, 计算一些边界条件
+    // , 
    void ClearBeforeIdx(size_t idxInBv);
-    vector<BamWrap *> &GetBamArr() { return bam_arr_; }  // 获取bam array
-    // 清空上一次所有读入的数据
+    vector<BamWrap *> &GetBamArr() { return bam_arr_; }  // bam array
+    // 
    void ClearAll();
-    // 包含的read数量
+    // read
    inline int64_t Size() { return legacy_size_ + pi_->Size(); }
    inline int ReadStat() { return pi_->read_stat_; }
    inline BamWrap *operator[](int64_t pos) { return bam_arr_[pos]; }
-    // 获取某一段reads
+    // reads
    inline vector<BamWrap *> Slice(size_t startIdx, size_t endIdx) {
        if (endIdx > startIdx) {
            auto begItr = bam_arr_.begin();
@ -149,11 +149,11 @@ struct AsyncIoBamBuf {
        buf2_.FreeMemory();
    }

-    // 同步读取
+    // 
    int sync_read_bam();
-    // 异步读取
+    // 
    int async_read_bam();
-    // 异步读取线程函数
+    // 
    static void *async_read(void *data);
    void resize_buf();
    inline void refresh_bam_arr() {
--- a/src/util/bam_wrap.h
+++ b/src/util/bam_wrap.h
@ -1,5 +1,5 @@
 /*
-     Description: 读入sam/bam时，开辟一个大的buf，存放这些数据
+     Description: sam/bam，buf，

     Copyright : All right reserved by ICT

@ -20,37 +20,37 @@
 using namespace std;

 /*
-    这里的成员函数命名有点混乱，特此说明，小写加下划线的函数命名，无论是静态函数，还是普通成员函数，更侧重说明
-    这是类似bam的一个属性，而大写加驼峰命名的函数，更侧重说明这是通过计算得出的。
+    ，，，，，
+    bam，，。
 */
 /*
- * sam read的封装
+ * sam read
 */
 struct BamWrap {
-    // 将contig左移后加上pos作为全局位置
-    const static int MAX_CONTIG_LEN_SHIFT = 40;  // 将染色体id左移多少位，和位点拼合在一起
+    // contigpos
+    const static int MAX_CONTIG_LEN_SHIFT = 40;  // id，
    const static int READ_MAX_LENGTH = 200;
-    const static int READ_MAX_DEPTH = 1000;  // 这只是用来初始化空间用的，深度大于这个值也没关系
+    const static int READ_MAX_DEPTH = 1000;  // ，

-    // 成员变量尽量少，减少占用内存空间
+    // ，
    bam1_t *b;
-    int64_t end_pos_;  // bam的全局结束位置, 闭区间
+    int64_t end_pos_;  // bam, 

-    // 全局开始位置
+    // 
    inline int64_t start_pos() { return bam_global_pos(b); }
-    // 全局结束位置
+    // 
    inline int64_t end_pos() { return end_pos_; }
-    // 和reference对应的序列长度
+    // reference
    inline int16_t read_len() { return (end_pos_ - start_pos() + 1); }

-    // 在contig内的开始位置
+    // contig
    inline int32_t contig_pos() { return b->core.pos; }
-    // 在contig内部的结束位置
+    // contig
    inline int32_t contig_end_pos() { return bam_pos(end_pos_); }
-    // 序列的长度（AGTC字母个数）
+    // （AGTC）
    inline int16_t seq_len() { return b->core.l_qseq; }

-    // 算上开头的softclip
+    // softclip
    inline int32_t softclip_start() {
        const uint32_t *cigar = bam_get_cigar(b);
        const bam1_core_t &bc = b->core;
@ -61,7 +61,7 @@ struct BamWrap {
        return bc.pos;
    }

-    // 算上结尾的softclip
+    // softclip
    inline int32_t softclip_end() {
        const uint32_t *cigar = bam_get_cigar(b);
        const bam1_core_t &bc = b->core;
@ -72,7 +72,7 @@ struct BamWrap {
        return bam_pos(end_pos_);
    }

-    // 算上结尾的softclip
+    // softclip
    inline int32_t right_softclip_len() {
        const uint32_t *cigar = bam_get_cigar(b);
        const bam1_core_t &bc = b->core;
@ -83,7 +83,7 @@ struct BamWrap {
        return 0;
    }

-    // 获取序列
+    // 
    inline std::string sequence() {
        ostringstream oss;
        char *seq = (char *)bam_get_seq(b);
@ -96,9 +96,9 @@ struct BamWrap {
        return std::move(oss.str());
    }

-    // 获取名字
+    // 
    inline const char *query_name() { return bam_get_qname(b); }
-    // 获取cigar 字符串
+    // cigar 
    inline string cigar_str() {
        ostringstream oss;
        const uint32_t *cigar = bam_get_cigar(b);
@ -111,10 +111,10 @@ struct BamWrap {
        return std::move(oss.str());
    }

-    // 占用的内存大小
+    // 
    inline int16_t length() { return sizeof(*this) + sizeof(bam1_t) + b->l_data; }

-    // 获取cigar中insert的总长度
+    // cigarinsert
    inline int32_t insert_cigar_len() {
        const uint32_t *cigar = bam_get_cigar(b);
        const bam1_core_t &bc = b->core;
@ -128,7 +128,7 @@ struct BamWrap {
        return ret;
    }

-    // 获取cigar中delete的总长度
+    // cigardelete
    inline int32_t del_cigar_len() {
        const uint32_t *cigar = bam_get_cigar(b);
        const bam1_core_t &bc = b->core;
@ -142,7 +142,7 @@ struct BamWrap {
        return ret;
    }

-    // 计算sam read的终点位置
+    // sam read
    static inline int64_t BamEndPos(const bam1_t *b) {
        const uint32_t *cigar = bam_get_cigar(b);
        const bam1_core_t &bc = b->core;
@ -170,7 +170,7 @@ struct BamWrap {
        return hasWellDefinedFragmentSize;
    }

-    // 计算bam的adapterBoundary
+    // bamadapterBoundary
    int GetAdapterBoundary() {
        const bam1_core_t &bc = b->core;
        int adapterBoundary;
@ -179,11 +179,11 @@ struct BamWrap {
        else if (bc.flag & BAM_FREVERSE)
            adapterBoundary = bc.mpos - 1;
        else
-            adapterBoundary = bc.pos + abs(bc.isize);  // GATK4.0 和 GATK3.5不一样，3.5的这里+1
+            adapterBoundary = bc.pos + abs(bc.isize);  // GATK4.0  GATK3.5，3.5+1
        return adapterBoundary;
    }

-    // 获取开头的I的长度
+    // I
    inline int GetHeadInsertLen() {
        int insLen = 0;
        const uint32_t *cigar = bam_get_cigar(b);
@ -200,8 +200,8 @@ struct BamWrap {
        return insLen;
    }

-    // 获取soft clip开始位置(能处理H和S相连的情况，有这种情况么？,
-    // 注意开头的I要当做S？)
+    // soft clip(HS，？,
+    // IS？)
    inline int64_t GetSoftStart() {
        int64_t softStart = b->core.pos;
        const uint32_t *cigar = bam_get_cigar(b);
@ -217,7 +217,7 @@ struct BamWrap {
        return softStart;
    }

-    // 获取unclipped开始位置(包括hardclip)
+    // unclipped(hardclip)
    inline int64_t GetUnclippedStart() {
        int64_t start = b->core.pos;
        const uint32_t *cigar = bam_get_cigar(b);
@ -233,7 +233,7 @@ struct BamWrap {
        return start;
    }

-    // 获取unclipped结束位置(包括hardclip)
+    // unclipped(hardclip)
    inline int64_t GetUnclippedEnd() {
        int64_t end_pos = bam_endpos(b);
        const uint32_t *cigar = bam_get_cigar(b);
@ -249,7 +249,7 @@ struct BamWrap {
        return end_pos - 1;
    }

-    /* 获取碱基质量分数的加和 */
+    /*  */
    /** Calculates a score for the read which is the sum of scores over Q15. */
    inline int GetSumOfBaseQualities() {
        int score = 0;
@ -262,9 +262,9 @@ struct BamWrap {
        return score;
    }

-    /* 与flag相关的检测 */
+    /* flag */

-    /* 没有比对上 unmapped */
+    /*  unmapped */
    inline bool GetReadUnmappedFlag() { return b->core.flag & BAM_FUNMAP; }

    /* Template having multiple segments in sequencing  */
@ -313,7 +313,7 @@ struct BamWrap {
     */
    bool GetMateNegativeStrandFlag() { return b->core.flag & BAM_FMREVERSE; }

-    /* 其他的一些信息 */
+    /*  */
    inline int GetReferenceLength() {
        int length = 0;
        const uint32_t *cigar = bam_get_cigar(b);
@ -336,26 +336,26 @@ struct BamWrap {
        return length;
    }

-    // 计算bam的全局位置，算上染色体序号和比对位置
+    // bam，
    static inline int64_t bam_global_pos(bam1_t *b) {
        return (((int64_t)b->core.tid << MAX_CONTIG_LEN_SHIFT) | (int64_t)b->core.pos);
    }
    static inline int64_t bam_global_pos(int tid, int pos) {
        return (((int64_t)tid << MAX_CONTIG_LEN_SHIFT) | (int64_t)pos);
    }
-    // 根据全局位置获取bam的染色体序号
+    // bam
    static inline int32_t bam_tid(int64_t global_pos) {
        const int64_t mask = ~(((int64_t)1 << MAX_CONTIG_LEN_SHIFT) - 1);
        const int64_t high_tid = global_pos & mask;
        return (int32_t)(high_tid >> MAX_CONTIG_LEN_SHIFT);
    }
-    // 根据全局位置获取bam的比对位置(染色体内)
+    // bam()
    static inline int32_t bam_pos(int64_t global_pos) {
        const int64_t mask = ((int64_t)1 << MAX_CONTIG_LEN_SHIFT) - 1;
        return (int32_t)(global_pos & mask);
    }

-    // 设置是否冗余的标记
+    // 
    void SetDuplicateReadFlag(bool flag) { setFlag(flag, BAM_FDUP); }

    void setFlag(bool flag, int bit) {
--- a/src/util/murmur3.h
+++ b/src/util/murmur3.h
@ -1,5 +1,5 @@
 /*
-Description: Murmur哈希
+Description: Murmur

 Copyright : All right reserved by ICT