diff --git a/.vscode/launch.json b/.vscode/launch.json index 0275f53..78e15fb 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -13,11 +13,11 @@ "program": "${workspaceRoot}/build/bin/picard_cpp", "args": [ "MarkDuplicates", - "--INPUT", "~/data/bam/100w.sam", + "--INPUT", "/home/zzh/data/wgs_na12878.bam", "--OUTPUT", "./out.sam", "--METRICS_FILE", "metrics.txt", - "--num_threads", "2", - "--max_mem", "2M", + "--num_threads", "32", + "--max_mem", "2G", "--verbosity", "DEBUG", "--asyncio", "true", "--READ_NAME_REGEX", "" diff --git a/run.sh b/run.sh index dcd8b97..f7323ef 100755 --- a/run.sh +++ b/run.sh @@ -7,8 +7,8 @@ nthread=32 #nthread=64 #nthread=128 #input=/home/zzh/data/bam/zy_normal.bam -#input=/home/zzh/data/bam/zy_tumor.bam -input=/home/zzh/data/wgs_na12878.bam +input=/home/zzh/data/bam/zy_tumor.bam +#input=/home/zzh/data/wgs_na12878.bam #input=~/data/bam/100w.bam #input=~/data/bam/t100w.sam #input=~/data/bam/1k.sam diff --git a/src/sam/markdups/markdups.cpp b/src/sam/markdups/markdups.cpp index 1351e4c..95f4e94 100644 --- a/src/sam/markdups/markdups.cpp +++ b/src/sam/markdups/markdups.cpp @@ -44,7 +44,7 @@ using std::string; #define BAM_BLOCK_SIZE 32L * 1024 * 1024 #define NO_SUCH_INDEX INT64_MAX -Timer tm_arr[20]; // 用来测试性能 +Timer tm_arr[30]; // 用来测试性能 /* 全局本地变量 */ vector g_vRnParser; // 每个线程一个read name parser samFile *g_inBamFp; // 输入的bam文件 @@ -154,8 +154,8 @@ int MarkDuplicates(int argc, char *argv[]) { htsThreadPool htsPoolWrite = {NULL, 0}; // 读写用不同的线程池 // htsPoolRead.pool = hts_tpool_init(g_gArg.num_threads); // htsPoolWrite.pool = hts_tpool_init(g_gArg.num_threads); - htsPoolRead.pool = hts_tpool_init(64); - htsPoolWrite.pool = hts_tpool_init(64); + htsPoolRead.pool = hts_tpool_init(16); + htsPoolWrite.pool = hts_tpool_init(32); if (!htsPoolRead.pool || !htsPoolWrite.pool) { Error("[%d] failed to set up thread pool", __LINE__); sam_close(g_inBamFp); diff --git a/src/sam/markdups/md_types.h b/src/sam/markdups/md_types.h index 426e882..e4d061c 100644 --- a/src/sam/markdups/md_types.h +++ b/src/sam/markdups/md_types.h @@ -83,8 +83,8 @@ template using DPSet = tsl::robin_set; template -// using CalcSet = set; -using CalcSet = tsl::robin_set; +using CalcSet = set; +// using CalcSet = tsl::robin_set; using ReadEndsSet = tsl::robin_set; diff --git a/src/sam/markdups/pipeline_md.cpp b/src/sam/markdups/pipeline_md.cpp index 006a60e..8032638 100644 --- a/src/sam/markdups/pipeline_md.cpp +++ b/src/sam/markdups/pipeline_md.cpp @@ -74,17 +74,20 @@ static void markDupsForPairs(vector &vpRe, DPSet *dup // 这个统计可能会有误差,因为当前位点可能还有没匹配上的read,导致当前位点的read(paired)数量为1 // 可以通过后续的补充计算来解决这个问题,有必要么?好像没必要 // gMetrics.singletonReads.insert(vpRe[0]->read1IndexInFile); - singletonIdx->insert(vpRe[0]->read1IndexInFile); +// singletonIdx->insert(vpRe[0]->read1IndexInFile); } return; } for (auto pe : vpRe) { // gMetrics.singletonReads.erase(pe->read1IndexInFile); + if (pe->read1IndexInFile == 1208593132 || pe->read2IndexInFile == 1208593132) { + cout << 1208593132 << "\t" << (notDupIdx == nullptr) << endl; + } } if (notSingletonIdx != nullptr) { for (auto pe : vpRe) { - notSingletonIdx->insert(pe->read1IndexInFile); +// notSingletonIdx->insert(pe->read1IndexInFile); } } @@ -324,7 +327,7 @@ static void generateReadEnds(ParallelDataArg &pdArg, MarkDupDataArg *arg) { pdArg.mdArg = arg; tm_arr[8].acc_start(); - kt_for(g_gArg.num_threads, threadGenerateReadEnds, &pdArg, g_gArg.num_threads); + kt_for(g_gArg.num_threads / 2, threadGenerateReadEnds, &pdArg, g_gArg.num_threads); // tm_arr[8].acc_end(); // 合并各个线程数据 // 查找未匹配的frags @@ -948,7 +951,6 @@ static void handleLastTask(MarkDupDataArg *task, GlobalDataArg *gDataArg) { auto &vSingletonIdx = g.singletonIdxArr.back(); vSingletonIdx.insert(vSingletonIdx.end(), lp.pairSingletonIdx.begin(), lp.pairSingletonIdx.end()); std::sort(vSingletonIdx.begin(), vSingletonIdx.end()); - gSingletonNum += lp.pairSingletonIdx.size(); } // for step 2 generate read ends @@ -957,7 +959,7 @@ static void handleLastTask(MarkDupDataArg *task, GlobalDataArg *gDataArg) { static void mtGenerateReadEnds(void *data, long idx, int tid) { auto &p = *(PipelineArg *)data; auto &rnParser = g_vRnParser[idx]; - int nThread = g_gArg.num_threads; + int nThread = p.genREThreadNum; auto &bams = p.readData.bams; int64_t bamStartIdx = p.readData.bamStartIdx; int64_t taskSeq = p.readData.taskSeq; @@ -1002,101 +1004,116 @@ static void mtGenerateReadEnds(void *data, long idx, int tid) { } static void doGenRE(PipelineArg &pipeArg) { +// return; GenREData &genREData = pipeArg.genREData[pipeArg.genREOrder % pipeArg.GENBUFNUM]; ReadData &readData = pipeArg.readData; - // 用来在genRE阶段计算一些Sort阶段应该计算的数据,保持每个step用时更平衡 - SortData &sortData = pipeArg.sortData[pipeArg.genREOrder % pipeArg.SORTBUFNUM]; + // cout << "dogen-gen buf : " << pipeArg.genREOrder % pipeArg.GENBUFNUM << endl; // cout << "dogen-sort buf:" << pipeArg.genREOrder % pipeArg.SORTBUFNUM << endl; -// sortData.pSmd->unpairedDic.clear(); -// sortData.pSmd->unpairedPosArr.clear(); // generate read ends - kt_for(g_gArg.num_threads, mtGenerateReadEnds, &pipeArg, g_gArg.num_threads); - + const int numThread = pipeArg.genREThreadNum; + // / 4; + kt_for(numThread, mtGenerateReadEnds, &pipeArg, numThread); + +#if 1 + // 用来在genRE阶段计算一些Sort阶段应该计算的数据,保持每个step用时更平衡 // 轮询每个线程中未找到匹配的read,找到匹配的 -// vector &pairs = genREData.pairsArr[g_gArg.num_threads]; -// pairs.clear(); -// for (int i = 0; i < g_gArg.num_threads; ++i) { -// for (auto &val : genREData.unpairedDicArr[i]) { -// const string &key = val.first; -// const ReadEnds &fragEnd = val.second.unpairedRE; -// if (sortData.pSmd->unpairedDic.find(key) == sortData.pSmd->unpairedDic.end()) { -// sortData.pSmd->unpairedDic[key] = {readData.taskSeq, fragEnd}; -// } else { // 找到了pairend -// auto &pairedEnds = sortData.pSmd->unpairedDic.at(key).unpairedRE; -// modifyPairedEnds(fragEnd, &pairedEnds); -// pairs.push_back(pairedEnds); -// sortData.pSmd->unpairedDic.erase(key); // 删除找到的pairend -// } -// } -// } -// sort(pairs.begin(), pairs.end()); -// -// // create unpaired info -// for (auto &e : sortData.pSmd->unpairedDic) { -// auto posKey = e.second.unpairedRE.posKey; -// auto &unpairArrInfo = sortData.pSmd->unpairedPosArr[posKey]; -// unpairArrInfo.unpairedNum++; -// unpairArrInfo.taskSeq = readData.taskSeq; -// unpairArrInfo.readNameSet.insert(e.first); -// } -} -// end for step 2 generate read ends - -// for step-3 sort -static void doSort(PipelineArg &pipeArg) { - GenREData &genREData = pipeArg.genREData[pipeArg.sortOrder % pipeArg.GENBUFNUM]; - SortData &sortData = pipeArg.sortData[pipeArg.sortOrder % pipeArg.SORTBUFNUM]; -// cout << "dosort-gen buf : " << pipeArg.sortOrder % pipeArg.GENBUFNUM << endl; -// cout << "dosort-sort buf:" << pipeArg.sortOrder % pipeArg.SORTBUFNUM << endl; - auto pSmd = (SortMarkData*) sortData.pSmd; - pSmd->pairs.clear(); - pSmd->frags.clear(); - pSmd->unpairedDic.clear(); - pSmd->unpairedPosArr.clear(); - + genREData.unpairedDic.clear(); + genREData.unpairedPosArr.clear(); vector &pairs = genREData.pairsArr[g_gArg.num_threads]; pairs.clear(); for (int i = 0; i < g_gArg.num_threads; ++i) { for (auto &val : genREData.unpairedDicArr[i]) { const string &key = val.first; const ReadEnds &fragEnd = val.second.unpairedRE; - if (pSmd->unpairedDic.find(key) == pSmd->unpairedDic.end()) { - pSmd->unpairedDic[key] = {(int64_t)pipeArg.sortOrder, fragEnd}; + if (genREData.unpairedDic.find(key) == genREData.unpairedDic.end()) { + genREData.unpairedDic[key] = {readData.taskSeq, fragEnd}; } else { // 找到了pairend - auto &pairedEnds = pSmd->unpairedDic.at(key).unpairedRE; + auto &pairedEnds = genREData.unpairedDic.at(key).unpairedRE; modifyPairedEnds(fragEnd, &pairedEnds); pairs.push_back(pairedEnds); - pSmd->unpairedDic.erase(key); // 删除找到的pairend + genREData.unpairedDic.erase(key); // 删除找到的pairend } } } sort(pairs.begin(), pairs.end()); // create unpaired info - for (auto &e : pSmd->unpairedDic) { + for (auto &e : genREData.unpairedDic) { auto posKey = e.second.unpairedRE.posKey; - auto &unpairArrInfo = pSmd->unpairedPosArr[posKey]; + auto &unpairArrInfo = genREData.unpairedPosArr[posKey]; + unpairArrInfo.unpairedNum++; + unpairArrInfo.taskSeq = readData.taskSeq; + unpairArrInfo.readNameSet.insert(e.first); + } +#endif + +} +// end for step 2 generate read ends + +// for step-3 sort +static void doSort(PipelineArg &pipeArg) { +// return; + GenREData &genREData = pipeArg.genREData[pipeArg.sortOrder % pipeArg.GENBUFNUM]; + SortData &sortData = pipeArg.sortData[pipeArg.sortOrder % pipeArg.SORTBUFNUM]; +// cout << "dosort-gen buf : " << pipeArg.sortOrder % pipeArg.GENBUFNUM << endl; +// cout << "dosort-sort buf:" << pipeArg.sortOrder % pipeArg.SORTBUFNUM << endl; + +#if 1 + sortData.CopyGenREData(genREData); +#else + sortData.unpairedDic.clear(); + sortData.unpairedPosArr.clear(); + const int numThread = pipeArg.genREThreadNum; + // / 4; + + vector &pairs = genREData.pairsArr[numThread]; + pairs.clear(); + for (int i = 0; i < numThread; ++i) { + for (auto &val : genREData.unpairedDicArr[i]) { + const string &key = val.first; + const ReadEnds &fragEnd = val.second.unpairedRE; + if (sortData.unpairedDic.find(key) == sortData.unpairedDic.end()) { + sortData.unpairedDic[key] = {(int64_t)pipeArg.sortOrder, fragEnd}; + } else { // 找到了pairend + auto &pairedEnds = sortData.unpairedDic.at(key).unpairedRE; + modifyPairedEnds(fragEnd, &pairedEnds); + pairs.push_back(pairedEnds); + sortData.unpairedDic.erase(key); // 删除找到的pairend + } + } + } + sort(pairs.begin(), pairs.end()); + + // create unpaired info + for (auto &e : sortData.unpairedDic) { + auto posKey = e.second.unpairedRE.posKey; + auto &unpairArrInfo = sortData.unpairedPosArr[posKey]; unpairArrInfo.unpairedNum++; unpairArrInfo.taskSeq = pipeArg.sortOrder; unpairArrInfo.readNameSet.insert(e.first); } +#endif + + sortData.pairs.clear(); + sortData.frags.clear(); const ReadEnds *pRE; ReadEndsHeap pairsHeap, fragsHeap; pairsHeap.Init(&genREData.pairsArr); while ((pRE = pairsHeap.Pop()) != nullptr) { - pSmd->pairs.push_back(*pRE); + sortData.pairs.push_back(*pRE); } fragsHeap.Init(&genREData.fragsArr); while ((pRE = fragsHeap.Pop()) != nullptr) { - pSmd->frags.push_back(*pRE); + sortData.frags.push_back(*pRE); } } // for step-4 sort static void doMarkDup(PipelineArg &pipeArg) { +// return; MarkDupData &mdData = pipeArg.markDupData[pipeArg.markDupOrder % pipeArg.MARKBUFNUM]; SortData &sortData = pipeArg.sortData[pipeArg.markDupOrder % pipeArg.SORTBUFNUM]; mdData.taskSeq = pipeArg.markDupOrder; @@ -1106,29 +1123,15 @@ static void doMarkDup(PipelineArg &pipeArg) { mdData.pairRepIdx.clear(); mdData.pairSingletonIdx.clear(); - //cout << "doMarkDup-mark buf : " << pipeArg.markDupOrder % pipeArg.MARKBUFNUM << endl; - //cout << "doMarkDup-sort buf:" << pipeArg.markDupOrder % pipeArg.SORTBUFNUM << endl; - tm_arr[5].acc_start(); - // auto tmpPt = sortData.pSmd; - // sortData.pSmd = mdData.pSmd; - // mdData.pSmd = tmpPt; - - auto mdpSmd = (SortMarkData *)mdData.pSmd; - auto stpSmd = (SortMarkData *)sortData.pSmd; - mdpSmd->frags = stpSmd->frags; - mdpSmd->pairs = stpSmd->pairs; - mdpSmd->unpairedDic = stpSmd->unpairedDic; - mdpSmd->unpairedPosArr = stpSmd->unpairedPosArr; - //cout << mdData.pSmd->frags.size() << "\t" << mdData.pSmd->pairs.size() << "\t" << mdData.pSmd->unpairedDic.size() - // << "\t" << mdData.pSmd->unpairedPosArr.size() << endl; + mdData.CopySortData(sortData); tm_arr[5].acc_end(); // 先处理 pair - processPairs(mdpSmd->pairs, &mdData.pairDupIdx, &mdData.pairOpticalDupIdx, &mdData.pairRepIdx, + processPairs(mdData.pairs, &mdData.pairDupIdx, &mdData.pairOpticalDupIdx, &mdData.pairRepIdx, &mdData.pairSingletonIdx); // 再处理frag - processFrags(mdpSmd->frags, &mdData.fragDupIdx); + processFrags(mdData.frags, &mdData.fragDupIdx); } template @@ -1151,28 +1154,25 @@ static void refreshMarkDupData(DPSet &dupIdx, MDSet &opticalDu MDSet ¬OpticalDupIdx, MDSet ¬RepIdx, MDSet ¬SingletonIdx, MarkDupData &lp, MarkDupData &p) { refreshDupIdx(dupIdx, lp.pairDupIdx, p.pairDupIdx); - refreshNotDupIdx(notDupIdx, lp.pairDupIdx, p.pairDupIdx); refreshDupIdx(opticalDupIdx, lp.pairOpticalDupIdx, p.pairOpticalDupIdx); - refreshNotDupIdx(notOpticalDupIdx, lp.pairOpticalDupIdx, p.pairOpticalDupIdx); refreshDupIdx(repIdx, lp.pairRepIdx, p.pairRepIdx); - refreshNotDupIdx(notRepIdx, lp.pairRepIdx, p.pairRepIdx); refreshDupIdx(singletonIdx, lp.pairSingletonIdx, p.pairSingletonIdx); + + refreshNotDupIdx(notDupIdx, lp.pairDupIdx, p.pairDupIdx); + refreshNotDupIdx(notOpticalDupIdx, lp.pairOpticalDupIdx, p.pairOpticalDupIdx); + refreshNotDupIdx(notRepIdx, lp.pairRepIdx, p.pairRepIdx); refreshNotDupIdx(notSingletonIdx, lp.pairSingletonIdx, p.pairSingletonIdx); } // for step-5 sort static void doIntersect(PipelineArg &pipeArg) { -// return; + // return; IntersectData &g = pipeArg.intersectData; if (pipeArg.intersectOrder == 0) return; // 要等第二部分数据才能进行计算 MarkDupData &lp = pipeArg.markDupData[(pipeArg.intersectOrder - 1) % pipeArg.MARKBUFNUM]; MarkDupData &p = pipeArg.markDupData[pipeArg.intersectOrder % pipeArg.MARKBUFNUM]; -// cout << "dointer-mark buf : " << (pipeArg.intersectOrder - 1) % pipeArg.MARKBUFNUM << "\t" -// << pipeArg.intersectOrder % pipeArg.MARKBUFNUM << endl; - // cout << "sort buf:" << pipeArg.sortOrder % pipeArg.SORTBUFNUM << endl; - vector reArr; DPSet dupIdx; MDSet opticalDupIdx; @@ -1182,12 +1182,10 @@ static void doIntersect(PipelineArg &pipeArg) { MDSet notDupIdx; MDSet notRepIdx; MDSet notSingletonIdx; - auto lppSmd = (SortMarkData *)lp.pSmd; - auto ppSmd = (SortMarkData *)p.pSmd; // 先处理重叠的frags // cout << "frags: " << lp.pSmd->frags.size() << "\t" << p.pSmd->frags.size() << endl; - getIntersectData(lppSmd->frags, ppSmd->frags, &reArr); + getIntersectData(lp.frags, p.frags, &reArr); processFrags(reArr, &dupIdx, ¬DupIdx); refreshDupIdx(dupIdx, lp.fragDupIdx, p.fragDupIdx); refreshNotDupIdx(notDupIdx, lp.fragDupIdx, p.fragDupIdx); @@ -1197,7 +1195,7 @@ static void doIntersect(PipelineArg &pipeArg) { reArr.clear(); dupIdx.clear(); notDupIdx.clear(); - getIntersectData(lppSmd->pairs, ppSmd->pairs, &reArr, true); + getIntersectData(lp.pairs, p.pairs, &reArr, true); // cout << "pairs size: " << lp.pSmd->pairs.size() << "\t" << p.pSmd->pairs.size() << endl; // cout << "range: " << lp.pSmd->pairs.front().posKey << "\t" << lp.pSmd->pairs.back().posKey << "\t" // << p.pSmd->pairs.front().posKey << "\t" << p.pSmd->pairs.back().posKey << endl; @@ -1206,24 +1204,23 @@ static void doIntersect(PipelineArg &pipeArg) { ¬SingletonIdx); refreshMarkDupData(dupIdx, opticalDupIdx, repIdx, singletonIdx, notDupIdx, notOpticalDupIdx, notRepIdx, notSingletonIdx, lp, p); -/* + // return; // 处理之前未匹配的部分 map recalcPos; CalcSet alreadyAdd; // 与该位点相同的pair都添加到数组里了 MDSet addToGlobal; int64_t prevLastPos = 0, nextFirstPos = 0; - if (lppSmd->frags.size() > 0) - prevLastPos = lppSmd->frags.back().posKey; - if (ppSmd->frags.size() > 0) - nextFirstPos = ppSmd->frags[0].posKey; - for (auto &prevUnpair : lppSmd->unpairedDic) { // 遍历上一个任务中的每个未匹配的read + if (lp.frags.size() > 0) prevLastPos = lp.frags.back().posKey; + if (p.frags.size() > 0) nextFirstPos = p.frags[0].posKey; + //cout << "dic size: " << lp.unpairedDic.size() << "\t" << p.unpairedDic.size() << "\t" << g.unpairedDic.size() + // << endl; + for (auto &prevUnpair : lp.unpairedDic) { // 遍历上一个任务中的每个未匹配的read auto &readName = prevUnpair.first; auto &prevPosInfo = prevUnpair.second; auto prevFragEnd = prevPosInfo.unpairedRE; // 未匹配的read end - - if (ppSmd->unpairedDic.find(readName) != ppSmd->unpairedDic.end()) { // 在当前这个任务里找到了这个未匹配的read - auto &nextPosInfo = ppSmd->unpairedDic[readName]; + if (p.unpairedDic.find(readName) != p.unpairedDic.end()) { // 在当前这个任务里找到了这个未匹配的read + auto &nextPosInfo = p.unpairedDic[readName]; auto &nextFragEnd = nextPosInfo.unpairedRE; int64_t prevPosKey = prevFragEnd.posKey; modifyPairedEnds(nextFragEnd, &prevFragEnd); // 在某些clip情况下,poskey可能是后面的read @@ -1231,11 +1228,11 @@ static void doIntersect(PipelineArg &pipeArg) { CalcKey ck = {prevPosKey, nextPosKey}; UnpairedPosInfo *prevUnpairInfoP = nullptr; UnpairedPosInfo *nextUnpairInfoP = nullptr; - if (lppSmd->unpairedPosArr.find(prevPosKey) != lppSmd->unpairedPosArr.end()) - prevUnpairInfoP = &lppSmd->unpairedPosArr[prevPosKey]; - if (ppSmd->unpairedPosArr.find(prevPosKey) != ppSmd->unpairedPosArr.end()) - nextUnpairInfoP = &ppSmd->unpairedPosArr[prevPosKey]; + if (lp.unpairedPosArr.find(prevPosKey) != lp.unpairedPosArr.end()) + prevUnpairInfoP = &lp.unpairedPosArr[prevPosKey]; + if (p.unpairedPosArr.find(prevPosKey) != p.unpairedPosArr.end()) + nextUnpairInfoP = &p.unpairedPosArr[prevPosKey]; // pos分为两种情况,根据poskey(pair中两个read分别的pos)的位置确定 // 1. // prevpos在交叉部分之前,nextpos在交叉部分之后,这种情况不需要获取pairarr中的数据; @@ -1252,30 +1249,29 @@ static void doIntersect(PipelineArg &pipeArg) { if (alreadyAdd.find(ck) != alreadyAdd.end()) { // 之前已经添加过了,后面就不用再添加数据了,因为同一个位置可能找到两个及以上的unpair数据,处理之前的数据时候可能已经添加了这些数据 addDataToPos = false; - } else - alreadyAdd.insert(ck); + } else alreadyAdd.insert(ck); if (prevPosKey < nextFirstPos) { // prevpos在交叉部分之前 auto &prevPairArr = prevUnpairInfoP->pairArr; // prevUnpairInfoP肯定不是nullptr prevPairArr.push_back(prevFragEnd); if (nextPosKey <= prevLastPos && addDataToPos) { // 第二种情况 - getEqualRE(prevFragEnd, lppSmd->pairs, &prevPairArr); + getEqualRE(prevFragEnd, lp.pairs, &prevPairArr); } // 第一种情况,第二种情况下都会出现,复杂情况一 auto gPosInfo = g.unpairedPosArr.find(prevPosKey); if (gPosInfo != g.unpairedPosArr.end()) { // 可能g和p有匹配的,刚好和该位点一致 auto &gUnpairInfo = gPosInfo->second; - auto pPosInfo = ppSmd->unpairedPosArr.find(nextPosKey); - if (pPosInfo != ppSmd->unpairedPosArr.end()) { + auto pPosInfo = p.unpairedPosArr.find(nextPosKey); + if (pPosInfo != p.unpairedPosArr.end()) { auto &pUnpairInfo = pPosInfo->second; for (auto &rn : gUnpairInfo.readNameSet) { // 遍历每一个readname,看是否有匹配的 if (pUnpairInfo.readNameSet.find(rn) != pUnpairInfo.readNameSet.end()) { auto pe = g.unpairedDic[rn].unpairedRE; - auto fe = ppSmd->unpairedDic[rn].unpairedRE; + auto fe = p.unpairedDic[rn].unpairedRE; modifyPairedEnds(fe, &pe); prevPairArr.push_back(pe); g.unpairedDic.erase(rn); - ppSmd->unpairedDic.erase(rn); + p.unpairedDic.erase(rn); } } } @@ -1290,8 +1286,8 @@ static void doIntersect(PipelineArg &pipeArg) { auto &prevPairArr = prevUnpairInfoP->pairArr; prevPairArr.push_back(prevFragEnd); if (addDataToPos) { - getEqualRE(prevFragEnd, ppSmd->pairs, &prevPairArr); - getEqualRE(prevFragEnd, ppSmd->pairs, &nextPairArr); + getEqualRE(prevFragEnd, p.pairs, &prevPairArr); + getEqualRE(prevFragEnd, p.pairs, &nextPairArr); } // 将数据放到next task里,(这个位点以后会可能还会计算到,目前方案是都计算,只是把冗余剔除) recalcPos[ck] = nextPosInfo.taskSeq; @@ -1302,26 +1298,28 @@ static void doIntersect(PipelineArg &pipeArg) { auto &prevPairArr = prevUnpairInfoP->pairArr; // prevUnpairInfoP肯定不是nullptr prevPairArr.push_back(prevFragEnd); if (addDataToPos) // 第二种情况 - getEqualRE(prevFragEnd, ppSmd->pairs, &prevPairArr); + getEqualRE(prevFragEnd, p.pairs, &prevPairArr); recalcPos[ck] = prevPosInfo.taskSeq; std::sort(prevPairArr.begin(), prevPairArr.end()); } } else { // 第四种情况 if (prevUnpairInfoP == nullptr) { - prevUnpairInfoP = &lppSmd->unpairedPosArr[prevPosKey]; + prevUnpairInfoP = &lp.unpairedPosArr[prevPosKey]; prevUnpairInfoP->taskSeq = lp.taskSeq; } auto &prevPairArr = prevUnpairInfoP->pairArr; prevPairArr.push_back(prevFragEnd); if (addDataToPos) { - getEqualRE(prevFragEnd, lppSmd->pairs, &prevPairArr); - getEqualRE(prevFragEnd, ppSmd->pairs, &prevPairArr); + getEqualRE(prevFragEnd, lp.pairs, &prevPairArr); + getEqualRE(prevFragEnd, p.pairs, &prevPairArr); } recalcPos[ck] = prevPosInfo.taskSeq; std::sort(prevPairArr.begin(), prevPairArr.end()); } } - ppSmd->unpairedDic.erase(readName); // 在next task里删除该read + p.unpairedDic.erase(readName); // 在next task里删除该read + // p.unpairedPosArr.erase(nextPosKey); + // p.unpairedPosArr[nextPosKey]. } else if (g.unpairedDic.find(readName) != g.unpairedDic.end()) { // 在遗留数据中找到了匹配的read auto &remainPosInfo = g.unpairedDic[readName]; auto remainFragEnd = remainPosInfo.unpairedRE; @@ -1341,6 +1339,7 @@ static void doIntersect(PipelineArg &pipeArg) { addToGlobal.insert(prevPosKey); } } + // return; map taskChanged; MDSet posProcessed; for (auto &e : recalcPos) { @@ -1355,7 +1354,7 @@ static void doIntersect(PipelineArg &pipeArg) { if (taskSeq < lp.taskSeq) pairArrP = &g.unpairedPosArr[posKey].pairArr; else - pairArrP = &lppSmd->unpairedPosArr[posKey].pairArr; + pairArrP = &lp.unpairedPosArr[posKey].pairArr; processPairs(*pairArrP, &t.dupIdx, &t.opticalDupIdx, &t.repIdx, &t.singletonIdx, &t.notDupIdx, &t.notOpticalDupIdx, &t.notRepIdx, &t.notSingletonIdx); if (taskSeq < lp.taskSeq) @@ -1363,30 +1362,36 @@ static void doIntersect(PipelineArg &pipeArg) { } // 最后再添加,以防开始赋值,后来这个位置要是又添加了新的数据 // 放在这里,因为lp中的unpairedPosArr中的readends可能会被修改(比如optical duplicate) - for (auto posKey : addToGlobal) { - g.unpairedPosArr[posKey] = lppSmd->unpairedPosArr[posKey]; - } +// for (auto posKey : addToGlobal) { +// g.unpairedPosArr[posKey] = lp.unpairedPosArr[posKey]; +// } + // return; // 更新结果 -// for (auto &e : taskChanged) { -// auto taskSeq = e.first; -// auto &t = e.second; -// if (taskSeq < lp.taskSeq) { -// refeshTaskDupInfo(t.dupIdx, t.opticalDupIdx, t.repIdx, t.singletonIdx, t.notDupIdx, t.notOpticalDupIdx, -// t.notRepIdx, t.notSingletonIdx, g.latterDupIdxArr[taskSeq], -// g.latterOpticalDupIdxArr[taskSeq], g.latterRepIdxArr[taskSeq], -// g.latterSingletonIdxArr[taskSeq], g.latterNotDupIdxArr[taskSeq], -// g.latterNotOpticalDupIdxArr[taskSeq], g.latterNotRepIdxArr[taskSeq], -// g.latterNotSingletonIdxArr[taskSeq]); -// } else if (taskSeq == lp.taskSeq) { -// refreshMarkDupData(t.dupIdx, t.opticalDupIdx, t.repIdx, t.singletonIdx, t.notDupIdx, t.notOpticalDupIdx, -// t.notRepIdx, t.notSingletonIdx, lp, p); -// } else { -// refreshMarkDupData(t.dupIdx, t.opticalDupIdx, t.repIdx, t.singletonIdx, t.notDupIdx, t.notOpticalDupIdx, -// t.notRepIdx, t.notSingletonIdx, p, lp); // 把结果放到p中 -// } + for (auto &e : taskChanged) { + auto taskSeq = e.first; + auto &t = e.second; + if (taskSeq < lp.taskSeq) { + refeshTaskDupInfo(t.dupIdx, t.opticalDupIdx, t.repIdx, t.singletonIdx, t.notDupIdx, t.notOpticalDupIdx, + t.notRepIdx, t.notSingletonIdx, g.latterDupIdxArr[taskSeq], + g.latterOpticalDupIdxArr[taskSeq], g.latterRepIdxArr[taskSeq], + g.latterSingletonIdxArr[taskSeq], g.latterNotDupIdxArr[taskSeq], + g.latterNotOpticalDupIdxArr[taskSeq], g.latterNotRepIdxArr[taskSeq], + g.latterNotSingletonIdxArr[taskSeq]); + } else if (taskSeq == lp.taskSeq) { + refreshMarkDupData(t.dupIdx, t.opticalDupIdx, t.repIdx, t.singletonIdx, t.notDupIdx, t.notOpticalDupIdx, + t.notRepIdx, t.notSingletonIdx, lp, p); + } else { + refreshMarkDupData(t.dupIdx, t.opticalDupIdx, t.repIdx, t.singletonIdx, t.notDupIdx, t.notOpticalDupIdx, + t.notRepIdx, t.notSingletonIdx, p, lp); // 把结果放到p中 + } + } +// for (auto posKey : addToGlobal) { +// g.unpairedPosArr[posKey] = lp.unpairedPosArr[posKey]; // } -*/ + +// return; + // 将dupidx放进全局数据 g.latterDupIdxArr.push_back(DPSet()); g.latterOpticalDupIdxArr.push_back(MDSet()); @@ -1417,7 +1422,7 @@ static void doIntersect(PipelineArg &pipeArg) { auto &vSingletonIdx = g.singletonIdxArr.back(); vSingletonIdx.insert(vSingletonIdx.end(), lp.pairSingletonIdx.begin(), lp.pairSingletonIdx.end()); std::sort(vSingletonIdx.begin(), vSingletonIdx.end()); - gSingletonNum += lp.pairSingletonIdx.size(); +// gSingletonNum += lp.pairSingletonIdx.size(); } static void *pipeRead(void *data) { @@ -1426,8 +1431,10 @@ static void *pipeRead(void *data) { inBamBuf.Init(g_inBamFp, g_inBamHeader, g_gArg.max_mem); int64_t readNumSum = 0; while (1) { + tm_arr[10].acc_start(); POSSESS(pipeArg.readSig); WAIT_FOR(pipeArg.readSig, NOT_TO_BE, 1); // 只有一个坑,因为在bambuf内部支持异步读取 + tm_arr[10].acc_end(); size_t readNum = 0; tm_arr[0].acc_start(); if (inBamBuf.ReadStat() >= 0) readNum = inBamBuf.ReadBam(); // 读入新一轮的数据 @@ -1456,17 +1463,23 @@ static void *pipeGenRE(void *data) { PipelineArg &pipeArg = *(PipelineArg *)data; auto &genREData = pipeArg.genREData; // init generate read ends data by num_thread + int genREThread = pipeArg.genREThreadNum; + // / 4; for (int i = 0; i < pipeArg.GENBUFNUM; ++i) { - genREData[i].Init(g_gArg.num_threads); + genREData[i].Init(genREThread); } while(1) { + tm_arr[11].acc_start(); POSSESS(pipeArg.readSig); WAIT_FOR(pipeArg.readSig, NOT_TO_BE, 0); // 等待有数据 POSSESS(pipeArg.genRESig); + tm_arr[11].acc_end(); // cout << "gre: " << PEEK_LOCK(pipeArg.genRESig) << endl; + tm_arr[21].acc_start(); WAIT_FOR(pipeArg.genRESig, NOT_TO_BE, pipeArg.GENBUFNUM); // 有BUFNUM个坑 RELEASE(pipeArg.genRESig); // 因为有不止一个位置,所以要释放 + tm_arr[21].acc_end(); if (pipeArg.readFinish) { POSSESS(pipeArg.genRESig); @@ -1492,13 +1505,17 @@ static void *pipeSort(void *data) { PipelineArg &pipeArg = *(PipelineArg *)data; while (1) { + tm_arr[12].acc_start(); POSSESS(pipeArg.genRESig); WAIT_FOR(pipeArg.genRESig, NOT_TO_BE, 0); // 等待有数据 RELEASE(pipeArg.genRESig); + tm_arr[12].acc_end(); + tm_arr[22].acc_start(); POSSESS(pipeArg.sortSig); WAIT_FOR(pipeArg.sortSig, NOT_TO_BE, pipeArg.SORTBUFNUM); // 有BUFNUM个位置 RELEASE(pipeArg.sortSig); + tm_arr[22].acc_end(); if (pipeArg.genREFinish) { // 处理完剩余数据 @@ -1532,13 +1549,17 @@ static void *pipeMarkDup(void *data) { PipelineArg &pipeArg = *(PipelineArg *)data; while (1) { + tm_arr[13].acc_start(); POSSESS(pipeArg.sortSig); WAIT_FOR(pipeArg.sortSig, NOT_TO_BE, 0); // 等待有数据 RELEASE(pipeArg.sortSig); + tm_arr[13].acc_end(); + tm_arr[23].acc_start(); POSSESS(pipeArg.markDupSig); WAIT_FOR(pipeArg.markDupSig, NOT_TO_BE, pipeArg.MARKBUFNUM); RELEASE(pipeArg.markDupSig); + tm_arr[23].acc_end(); if (pipeArg.sortFinish) { // 应该还得处理剩余的数据 @@ -1554,7 +1575,7 @@ static void *pipeMarkDup(void *data) { break; } /* 冗余检测 readends */ -// cout << "markdup order: " << pipeArg.markDupOrder << endl; + cout << "markdup order: " << pipeArg.markDupOrder << endl; tm_arr[3].acc_start(); doMarkDup(pipeArg); tm_arr[3].acc_end(); @@ -1569,11 +1590,13 @@ static void *pipeMarkDup(void *data) { } static void *pipeIntersect(void *data) { PipelineArg &pipeArg = *(PipelineArg *)data; - + pipeArg.intersectOrder = 1; while (1) { + tm_arr[14].acc_start(); POSSESS(pipeArg.markDupSig); - WAIT_FOR(pipeArg.markDupSig, NOT_TO_BE, 0); // 等待有数据 + WAIT_FOR(pipeArg.markDupSig, TO_BE_MORE_THAN, 1); // 等待有数据 RELEASE(pipeArg.markDupSig); + tm_arr[14].acc_end(); if (pipeArg.markDupFinish) { while (pipeArg.intersectOrder < pipeArg.markDupOrder) { @@ -1587,11 +1610,13 @@ static void *pipeIntersect(void *data) { /* 交叉数据处理 readends */ // cout << "intersect order: " << pipeArg.intersectOrder << endl; tm_arr[4].acc_start(); +// cout << "intersect markdup size: " << PEEK_LOCK(pipeArg.markDupSig) << endl; doIntersect(pipeArg); tm_arr[4].acc_end(); POSSESS(pipeArg.markDupSig); TWIST(pipeArg.markDupSig, BY, -1); + pipeArg.intersectOrder += 1; } return 0; @@ -1600,9 +1625,8 @@ static void *pipeIntersect(void *data) { /* 当所有任务结束后,global data里还有未处理的数据 */ static void mergeAllTask(PipelineArg &pipeArg) { // cout << "last task start" << endl; - MarkDupData &mdData = pipeArg.markDupData[(pipeArg.markDupOrder - 1) % pipeArg.MARKBUFNUM]; + MarkDupData &lp = pipeArg.markDupData[(pipeArg.markDupOrder - 1) % pipeArg.MARKBUFNUM]; IntersectData &g = pipeArg.intersectData; - auto &lp = *(SortMarkData *)mdData.pSmd; // 遗留的未匹配的pair for (auto &prevUnpair : lp.unpairedDic) { // 遍历上一个任务中的每个未匹配的read @@ -1673,30 +1697,33 @@ static void mergeAllTask(PipelineArg &pipeArg) { g.dupIdxArr.push_back(vector()); auto &vIdx = g.dupIdxArr.back(); - mdData.pairDupIdx.insert(mdData.fragDupIdx.begin(), mdData.fragDupIdx.end()); - vIdx.insert(vIdx.end(), mdData.pairDupIdx.begin(), mdData.pairDupIdx.end()); + lp.pairDupIdx.insert(lp.fragDupIdx.begin(), lp.fragDupIdx.end()); + vIdx.insert(vIdx.end(), lp.pairDupIdx.begin(), lp.pairDupIdx.end()); std::sort(vIdx.begin(), vIdx.end()); g.opticalDupIdxArr.push_back(vector()); auto &vOpticalIdx = g.opticalDupIdxArr.back(); - vOpticalIdx.insert(vOpticalIdx.end(), mdData.pairOpticalDupIdx.begin(), mdData.pairOpticalDupIdx.end()); + vOpticalIdx.insert(vOpticalIdx.end(), lp.pairOpticalDupIdx.begin(), lp.pairOpticalDupIdx.end()); std::sort(vOpticalIdx.begin(), vOpticalIdx.end()); g.repIdxArr.push_back(vector()); auto &vRepIdx = g.repIdxArr.back(); - vRepIdx.insert(vRepIdx.end(), mdData.pairRepIdx.begin(), mdData.pairRepIdx.end()); + vRepIdx.insert(vRepIdx.end(), lp.pairRepIdx.begin(), lp.pairRepIdx.end()); std::sort(vRepIdx.begin(), vRepIdx.end()); g.singletonIdxArr.push_back(vector()); auto &vSingletonIdx = g.singletonIdxArr.back(); - vSingletonIdx.insert(vSingletonIdx.end(), mdData.pairSingletonIdx.begin(), mdData.pairSingletonIdx.end()); + vSingletonIdx.insert(vSingletonIdx.end(), lp.pairSingletonIdx.begin(), lp.pairSingletonIdx.end()); std::sort(vSingletonIdx.begin(), vSingletonIdx.end()); - gSingletonNum += mdData.pairSingletonIdx.size(); +// gSingletonNum += lp.pairSingletonIdx.size(); } static void startPipeline() { Timer::log_time("pipeline start"); PipelineArg pipeArg; + pipeArg.genREThreadNum = g_gArg.num_threads; + // / 2; + // g_gArg.num_threads / 2; pthread_t tidArr[5]; pthread_create(&tidArr[0], 0, pipeRead, &pipeArg); @@ -1716,21 +1743,48 @@ static void startPipeline() { for (auto &arr : pipeArg.intersectData.opticalDupIdxArr) opticalDupNum += arr.size(); for (auto &arr : pipeArg.intersectData.singletonIdxArr) singletonNum += arr.size(); + map dup; + #if 0 + int taskSeq = 0; + for (auto &arr : pipeArg.intersectData.dupIdxArr) { + for (auto idx : arr) { + if (dup.find(idx.idx) != dup.end()) { + //if (taskSeq - 1 > dup[idx]) + cout << "dup index: " << dup[idx] << '\t' << taskSeq << '\t' << idx << endl; + } + dup[idx.idx] = taskSeq; + } + // cout << taskSeq << "\t" << arr.size() << endl; + taskSeq++; + } + #endif + cout << "Final read order: " << pipeArg.readOrder << endl; cout << "Final gen order: " << pipeArg.genREOrder << endl; cout << "Final sort order: " << pipeArg.sortOrder << endl; cout << "Final mark order: " << pipeArg.markDupOrder << endl; cout << "Final inter order: " << pipeArg.intersectOrder << endl; + cout << "w read time : " << tm_arr[10].acc_seconds_elapsed() << endl; + cout << "w gen time : " << tm_arr[11].acc_seconds_elapsed() << endl; + cout << "w sort time : " << tm_arr[12].acc_seconds_elapsed() << endl; + cout << "w markdup time : " << tm_arr[13].acc_seconds_elapsed() << endl; + cout << "w intersect time: " << tm_arr[14].acc_seconds_elapsed() << endl; + + cout << "w1 gen time : " << tm_arr[21].acc_seconds_elapsed() << endl; + cout << "w1 sort time : " << tm_arr[22].acc_seconds_elapsed() << endl; + cout << "w1 markdup time : " << tm_arr[23].acc_seconds_elapsed() << endl; + cout << "read time : " << tm_arr[0].acc_seconds_elapsed() << endl; cout << "gen time : " << tm_arr[1].acc_seconds_elapsed() << endl; cout << "sort time : " << tm_arr[2].acc_seconds_elapsed() << endl; cout << "markdup time : " << tm_arr[3].acc_seconds_elapsed() << endl; cout << "intersect time: " << tm_arr[4].acc_seconds_elapsed() << endl; + cout << "copy time: " << tm_arr[5].acc_seconds_elapsed() << endl; cout << "merge al6 time: " << tm_arr[6].acc_seconds_elapsed() << endl; - cout << "dup num : " << dupNum << endl; + cout << "dup num : " << dupNum << "\t" << dup.size() << endl; cout << "optical dup num : " << opticalDupNum / 2 << "\t" << opticalDupNum << endl; cout << "singleton num : " << singletonNum << endl; @@ -1779,20 +1833,20 @@ void pipelineMarkDups() { tm_arr[0].acc_start(); Timer t1; - generateReadEnds(pdArg, curArgP); +// generateReadEnds(pdArg, curArgP); // cout << "calc read end time: " << t1.seconds_elapsed() << " s" << endl; tm_arr[0].acc_end(); tm_arr[1].acc_start(); t1.reinit(); - markdups(curArgP); +// markdups(curArgP); // cout << "markdups time: " << t1.seconds_elapsed() << " s" << endl; tm_arr[1].acc_end(); if (!isFirstRound) { tm_arr[2].acc_start(); t1.reinit(); - handleIntersectData(lastArgP, curArgP, &gData); +// handleIntersectData(lastArgP, curArgP, &gData); // cout << "intersect time: " << t1.seconds_elapsed() << " s" << endl; // addTaskIdxToSet(lastArgP, &gData); tm_arr[2].acc_end(); @@ -1815,7 +1869,7 @@ void pipelineMarkDups() { } tm_arr[3].acc_start(); // 处理剩下的全局数据 - handleLastTask(lastArgP, &gData); +// handleLastTask(lastArgP, &gData); // cout << "here 2" << endl; tm_arr[3].acc_end(); diff --git a/src/sam/markdups/pipeline_md.h b/src/sam/markdups/pipeline_md.h index eb6d97b..d2b3d60 100644 --- a/src/sam/markdups/pipeline_md.h +++ b/src/sam/markdups/pipeline_md.h @@ -10,7 +10,6 @@ struct ReadData { int64_t taskSeq = 0; // 任务序号 }; -class SortData; struct GenREData { vector> pairsArr; // 成对的reads vector> fragsArr; // 暂未找到配对的reads @@ -22,17 +21,23 @@ struct GenREData { unpairedDicArr.push_back(UnpairedNameMap()); } } + UnpairedNameMap unpairedDic; // 代替sort step中一部分计算 + UnpairedPositionMap unpairedPosArr; // }; -struct SortMarkData { +struct SortData { vector pairs; // 成对的reads vector frags; // 暂未找到配对的reads UnpairedNameMap unpairedDic; // 用来寻找pair end UnpairedPositionMap unpairedPosArr; // 存放未匹配的ReadEnd对应位点的所有ReadEnd,为了避免重复存储 -}; - -struct SortData { - volatile void *pSmd; + void CopyGenREData(GenREData &genREData) { + // unpairedDic.clear(); + // unpairedPosArr.clear(); + // for (auto &val : genREData.unpairedDic) unpairedDic.insert(val); + // for (auto &val : genREData.unpairedPosArr) unpairedPosArr.insert(val); + unpairedDic = genREData.unpairedDic; + unpairedPosArr = genREData.unpairedPosArr; + } }; struct MarkDupData { @@ -42,7 +47,27 @@ struct MarkDupData { DPSet fragDupIdx; // frag的冗余read的索引 DPSet pairRepIdx; // pair的dupset代表read的索引 MDSet pairSingletonIdx; // 某位置只有一对read的所有read pair个数 - volatile void *pSmd; + + // 从sort step拷贝过来的数据 + vector pairs; // 成对的reads + vector frags; // 暂未找到配对的reads + UnpairedNameMap unpairedDic; // 用来寻找pair end + UnpairedPositionMap unpairedPosArr; // 存放未匹配的ReadEnd对应位点的所有ReadEnd,为了避免重复存储 + + void CopySortData(SortData &sortData) { + // pairs.clear(); + // frags.clear(); + // unpairedDic.clear(); + // unpairedPosArr.clear(); + // for (auto &val : sortData.pairs) pairs.push_back(val); + // for (auto &val : sortData.frags) frags.push_back(val); + // for (auto &val : sortData.unpairedDic) unpairedDic.insert(val); + // for (auto &val : sortData.unpairedPosArr) unpairedPosArr.insert(val); + pairs = sortData.pairs; + frags = sortData.frags; + unpairedDic = sortData.unpairedDic; + unpairedPosArr = sortData.unpairedPosArr; + } }; struct IntersectData { @@ -64,6 +89,27 @@ struct IntersectData { vector> latterNotOpticalDupIdxArr; vector> latterNotRepIdxArr; vector> latterNotSingletonIdxArr; + + MarkDupData lp; + MarkDupData p; + void copyOneMarkdupData(MarkDupData &src, MarkDupData &dst) { + dst.taskSeq = src.taskSeq; + dst.pairDupIdx = src.pairDupIdx; + dst.pairOpticalDupIdx = src.pairOpticalDupIdx; + dst.fragDupIdx = src.fragDupIdx; + dst.pairRepIdx = src.pairRepIdx; + dst.pairSingletonIdx = src.pairSingletonIdx; + + // 从sort step拷贝过来的数据 + dst.pairs = src.pairs; + dst.frags = src.frags; + dst.unpairedDic = src.unpairedDic; + dst.unpairedPosArr = src.unpairedPosArr; + } + void CopyMarkDupData(MarkDupData &_lp, MarkDupData &_p) { + copyOneMarkdupData(_lp, lp); + copyOneMarkdupData(_p, p); + } }; // 记录流水线状态,task的序号,以及某阶段是否结束 @@ -76,6 +122,7 @@ struct PipelineArg { uint64_t sortOrder = 0; uint64_t markDupOrder = 0; uint64_t intersectOrder = 0; + int genREThreadNum = 0; volatile int readFinish = 0; volatile int genREFinish = 0; @@ -92,13 +139,8 @@ struct PipelineArg { genRESig = NEW_LOCK(0); // 最大值2, 双buffer sortSig = NEW_LOCK(0); markDupSig = NEW_LOCK(0); - for (int i = 0; i < SORTBUFNUM; ++i) sortData[i].pSmd = &smData[i]; - for (int i = 0; i < MARKBUFNUM; ++i) markDupData[i].pSmd = &smData[i + SORTBUFNUM]; } - // sort mark shared data - SortMarkData smData[SORTBUFNUM + MARKBUFNUM]; - // for step-1 read ReadData readData; // for step-2 generate readends diff --git a/src/sam/markdups/shared_args.h b/src/sam/markdups/shared_args.h index d0c51cc..c2479f1 100644 --- a/src/sam/markdups/shared_args.h +++ b/src/sam/markdups/shared_args.h @@ -11,7 +11,7 @@ #include using std::set; -extern Timer tm_arr[20]; // 用来测试性能 +extern Timer tm_arr[30]; // 用来测试性能 /* 全局本地变量 */ extern vector g_vRnParser; // 每个线程一个read name parser extern samFile *g_inBamFp; // 输入的bam文件