diff --git a/run.sh b/run.sh index 72839c2..8f3b6bc 100755 --- a/run.sh +++ b/run.sh @@ -6,8 +6,8 @@ nthread=32 #nthread=64 #nthread=128 -input=/home/zzh/data/bam/zy_normal.bam -#input=/home/zzh/data/bam/zy_tumor.bam +#input=/home/zzh/data/bam/zy_normal.bam +input=/home/zzh/data/bam/zy_tumor.bam #input=/home/zzh/data/wgs_na12878.bam #input=~/data/bam/100w.bam #input=~/data/bam/t100w.sam diff --git a/src/sam/markdups/pipeline_md.cpp b/src/sam/markdups/pipeline_md.cpp index d731f76..818f6e6 100644 --- a/src/sam/markdups/pipeline_md.cpp +++ b/src/sam/markdups/pipeline_md.cpp @@ -58,9 +58,8 @@ static inline void sortReadEndsArr(vector &arr) { /* 处理一组pairend的readends,标记冗余, 这个函数需要串行运行,因为需要做一些统计*/ static void markDupsForPairs(vector &vpRe, DPSet *dupIdx, MDSet *opticalDupIdx, - DPSet *repIdx, MDSet *singletonIdx, MDSet *notDupIdx = nullptr, - MDSet *notOpticalDupIdx = nullptr, MDSet *notRepIdx = nullptr, - MDSet *notSingletonIdx = nullptr) { + DPSet *repIdx, MDSet *notDupIdx = nullptr, + MDSet *notOpticalDupIdx = nullptr, MDSet *notRepIdx = nullptr ) { if (vpRe.size() < 2) { return; } @@ -171,9 +170,8 @@ static void getEqualRE(const ReadEnds &re, vector &src, vector &readEnds, DPSet *dupIdx, MDSet *opticalDupIdx, - DPSet *repIdx, MDSet *singletonIdx, MDSet *notDupIdx = nullptr, - MDSet *notOpticalDupIdx = nullptr, MDSet *notRepIdx = nullptr, - MDSet *notSingletonIdx = nullptr) { + DPSet *repIdx, MDSet *notDupIdx = nullptr, + MDSet *notOpticalDupIdx = nullptr, MDSet *notRepIdx = nullptr) { // return; vector vpCache; // 有可能是冗余的reads const ReadEnds *pReadEnd = nullptr; @@ -181,15 +179,14 @@ static void processPairs(vector &readEnds, DPSet *dupIdx, MDS if (pReadEnd != nullptr && ReadEnds::AreComparableForDuplicates(*pReadEnd, re, true)) // 跟前一个一样 vpCache.push_back(&re); // 处理一个潜在的冗余组 else { - markDupsForPairs(vpCache, dupIdx, opticalDupIdx, repIdx, singletonIdx, notDupIdx, notOpticalDupIdx, - notRepIdx, notSingletonIdx); // 不一样 + markDupsForPairs(vpCache, dupIdx, opticalDupIdx, repIdx, notDupIdx, notOpticalDupIdx, + notRepIdx); // 不一样 vpCache.clear(); vpCache.push_back(&re); pReadEnd = &re; } } - markDupsForPairs(vpCache, dupIdx, opticalDupIdx, repIdx, singletonIdx, notDupIdx, notOpticalDupIdx, notRepIdx, - notSingletonIdx); + markDupsForPairs(vpCache, dupIdx, opticalDupIdx, repIdx, notDupIdx, notOpticalDupIdx, notRepIdx); } /* 处理frags */ @@ -279,54 +276,12 @@ static inline void refreshFragDupIdx(DPSet &dupIdx, MDSet ¬ } } -/* 将pairs重叠部分的dup idx放进数据中 */ -static inline void refreshPairDupIdx(DPSet &dupIdx, MDSet &opticalDupIdx, DPSet &repIdx, - MDSet &singletonIdx, MDSet ¬DupIdx, - MDSet ¬OpticalDupIdx, MDSet ¬RepIdx, - MDSet ¬SingletonIdx, MarkDupDataArg *lastArg, MarkDupDataArg *curArg) { - auto &lp = *lastArg; - auto &p = *curArg; - for (auto idx : dupIdx) { - lp.pairDupIdx.insert(idx); - p.pairDupIdx.erase(idx); - } - for (auto idx : opticalDupIdx) { - lp.pairOpticalDupIdx.insert(idx); - p.pairOpticalDupIdx.erase(idx); - } - for (auto idx : repIdx) { - lp.pairRepIdx.insert(idx); - p.pairRepIdx.erase(idx); - } - for (auto idx : singletonIdx) { - lp.pairSingletonIdx.insert(idx); - p.pairSingletonIdx.erase(idx); - } - for (auto idx : notDupIdx) { - lp.pairDupIdx.erase(idx); - p.pairDupIdx.erase(idx); - } - for (auto idx : notOpticalDupIdx) { - lp.pairOpticalDupIdx.erase(idx); - p.pairOpticalDupIdx.erase(idx); - } - for (auto idx : notRepIdx) { - lp.pairRepIdx.erase(idx); - p.pairRepIdx.erase(idx); - } - for (auto idx : notSingletonIdx) { - lp.pairSingletonIdx.erase(idx); - p.pairSingletonIdx.erase(idx); - } -} - // 用来分别处理dup和optical dup static void refeshTaskDupInfo(DPSet &dupIdx, MDSet &opticalDupIdx, DPSet &repIdx, MDSet ¬DupIdx, MDSet ¬OpticalDupIdx, MDSet ¬RepIdx, - MDSet ¬SingletonIdx, DPSet &latterDupIdx, - MDSet &latterOpticalDupIdx, DPSet &latterRepIdx, - MDSet &latterNotDupIdx, MDSet &latterNotOpticalDupIdx, - MDSet &latterNotRepIdx) { + DPSet &latterDupIdx, MDSet &latterOpticalDupIdx, + DPSet &latterRepIdx, MDSet &latterNotDupIdx, + MDSet &latterNotOpticalDupIdx, MDSet &latterNotRepIdx) { for (auto idx : dupIdx) latterDupIdx.insert(idx); for (auto idx : opticalDupIdx) latterOpticalDupIdx.insert(idx); for (auto idx : repIdx) latterRepIdx.insert(idx); @@ -511,8 +466,7 @@ static void doMarkDup(PipelineArg &pipeArg) { tm_arr[5].acc_end(); SortMarkData &smd = *(SortMarkData *)mdData.dataPtr; // 先处理 pair - processPairs(smd.pairs, &mdData.pairDupIdx, &mdData.pairOpticalDupIdx, &mdData.pairRepIdx, - nullptr); + processPairs(smd.pairs, &mdData.pairDupIdx, &mdData.pairOpticalDupIdx, &mdData.pairRepIdx); // 再处理frag processFrags(smd.frags, &mdData.fragDupIdx); } @@ -533,9 +487,8 @@ static void refreshNotDupIdx(T1 &srcArr, T2 &delArr1, T2 &delArr2) { } static void refreshMarkDupData(DPSet &dupIdx, MDSet &opticalDupIdx, DPSet &repIdx, - MDSet &singletonIdx, MDSet ¬DupIdx, - MDSet ¬OpticalDupIdx, MDSet ¬RepIdx, - MDSet ¬SingletonIdx, MarkDupData &lp, MarkDupData &p) { + MDSet ¬DupIdx, MDSet ¬OpticalDupIdx, MDSet ¬RepIdx, + MarkDupData &lp, MarkDupData &p) { refreshDupIdx(dupIdx, lp.pairDupIdx, p.pairDupIdx); refreshDupIdx(opticalDupIdx, lp.pairOpticalDupIdx, p.pairOpticalDupIdx); refreshDupIdx(repIdx, lp.pairRepIdx, p.pairRepIdx); @@ -558,11 +511,9 @@ static void doIntersect(PipelineArg &pipeArg) { DPSet dupIdx; MDSet opticalDupIdx; DPSet repIdx; - MDSet singletonIdx; MDSet notOpticalDupIdx; MDSet notDupIdx; MDSet notRepIdx; - MDSet notSingletonIdx; // 先处理重叠的frags getIntersectData(lpSM.frags, pSM.frags, &reArr); @@ -576,10 +527,8 @@ static void doIntersect(PipelineArg &pipeArg) { notDupIdx.clear(); getIntersectData(lpSM.pairs, pSM.pairs, &reArr, true); - processPairs(reArr, &dupIdx, &opticalDupIdx, &repIdx, &singletonIdx, ¬DupIdx, ¬OpticalDupIdx, ¬RepIdx, - ¬SingletonIdx); - refreshMarkDupData(dupIdx, opticalDupIdx, repIdx, singletonIdx, notDupIdx, notOpticalDupIdx, notRepIdx, - notSingletonIdx, lp, p); + processPairs(reArr, &dupIdx, &opticalDupIdx, &repIdx, ¬DupIdx, ¬OpticalDupIdx, ¬RepIdx); + refreshMarkDupData(dupIdx, opticalDupIdx, repIdx, notDupIdx, notOpticalDupIdx, notRepIdx, lp, p); // 处理之前未匹配的部分 map recalcPos; @@ -727,8 +676,8 @@ static void doIntersect(PipelineArg &pipeArg) { pairArrP = &g.unpairedPosArr[posKey].pairArr; else pairArrP = &lpSM.unpairedPosArr[posKey].pairArr; - processPairs(*pairArrP, &t.dupIdx, &t.opticalDupIdx, &t.repIdx, &t.singletonIdx, &t.notDupIdx, - &t.notOpticalDupIdx, &t.notRepIdx, &t.notSingletonIdx); + processPairs(*pairArrP, &t.dupIdx, &t.opticalDupIdx, &t.repIdx, &t.notDupIdx, &t.notOpticalDupIdx, + &t.notRepIdx); if (taskSeq < lp.taskSeq) g.unpairedPosArr.erase(posKey); } @@ -746,15 +695,15 @@ static void doIntersect(PipelineArg &pipeArg) { auto &t = e.second; if (taskSeq < lp.taskSeq) { refeshTaskDupInfo(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx, - t.notSingletonIdx, g.latterDupIdxArr[taskSeq], g.latterOpticalDupIdxArr[taskSeq], - g.latterRepIdxArr[taskSeq], g.latterNotDupIdxArr[taskSeq], - g.latterNotOpticalDupIdxArr[taskSeq], g.latterNotRepIdxArr[taskSeq]); + g.latterDupIdxArr[taskSeq], g.latterOpticalDupIdxArr[taskSeq], g.latterRepIdxArr[taskSeq], + g.latterNotDupIdxArr[taskSeq], g.latterNotOpticalDupIdxArr[taskSeq], + g.latterNotRepIdxArr[taskSeq]); } else if (taskSeq == lp.taskSeq) { - refreshMarkDupData(t.dupIdx, t.opticalDupIdx, t.repIdx, t.singletonIdx, t.notDupIdx, t.notOpticalDupIdx, - t.notRepIdx, t.notSingletonIdx, lp, p); + refreshMarkDupData(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, + t.notRepIdx, lp, p); } else { - refreshMarkDupData(t.dupIdx, t.opticalDupIdx, t.repIdx, t.singletonIdx, t.notDupIdx, t.notOpticalDupIdx, - t.notRepIdx, t.notSingletonIdx, p, lp); // 把结果放到p中 + refreshMarkDupData(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, + t.notRepIdx, p, lp); // 把结果放到p中 } } @@ -1015,8 +964,7 @@ static void mergeAllTask(PipelineArg &pipeArg) { if (arr.size() > 1) { std::sort(arr.begin(), arr.end()); - processPairs(arr, &t.dupIdx, &t.opticalDupIdx, &t.repIdx, &t.singletonIdx, &t.notDupIdx, - &t.notOpticalDupIdx, &t.notRepIdx, &t.notSingletonIdx); + processPairs(arr, &t.dupIdx, &t.opticalDupIdx, &t.repIdx, &t.notDupIdx, &t.notOpticalDupIdx, &t.notRepIdx); } } // 更新结果 @@ -1026,9 +974,9 @@ static void mergeAllTask(PipelineArg &pipeArg) { auto taskSeq = e.first; auto &t = e.second; refeshTaskDupInfo(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx, - t.notSingletonIdx, g.latterDupIdxArr[taskSeq], g.latterOpticalDupIdxArr[taskSeq], - g.latterRepIdxArr[taskSeq], g.latterNotDupIdxArr[taskSeq], - g.latterNotOpticalDupIdxArr[taskSeq], g.latterNotRepIdxArr[taskSeq]); + g.latterDupIdxArr[taskSeq], g.latterOpticalDupIdxArr[taskSeq], g.latterRepIdxArr[taskSeq], + g.latterNotDupIdxArr[taskSeq], g.latterNotOpticalDupIdxArr[taskSeq], + g.latterNotRepIdxArr[taskSeq]); } g.unpairedPosArr.clear();