又去除了一部分singleton代码

This commit is contained in:
zzh 2024-11-21 23:16:11 +08:00
parent d37ea0afcc
commit 153e05399d
2 changed files with 13 additions and 26 deletions

View File

@ -322,20 +322,17 @@ static inline void refreshPairDupIdx(DPSet<DupInfo> &dupIdx, MDSet<int64_t> &opt
// 用来分别处理dup和optical dup
static void refeshTaskDupInfo(DPSet<DupInfo> &dupIdx, MDSet<int64_t> &opticalDupIdx, DPSet<DupInfo> &repIdx,
MDSet<int64_t> &singletonIdx, MDSet<int64_t> &notDupIdx, MDSet<int64_t> &notOpticalDupIdx,
MDSet<int64_t> &notRepIdx, MDSet<int64_t> &notSingletonIdx, DPSet<DupInfo> &latterDupIdx,
MDSet<int64_t> &notDupIdx, MDSet<int64_t> &notOpticalDupIdx, MDSet<int64_t> &notRepIdx,
MDSet<int64_t> &notSingletonIdx, DPSet<DupInfo> &latterDupIdx,
MDSet<int64_t> &latterOpticalDupIdx, DPSet<DupInfo> &latterRepIdx,
MDSet<int64_t> &latterSingletonIdx, MDSet<int64_t> &latterNotDupIdx,
MDSet<int64_t> &latterNotOpticalDupIdx, MDSet<int64_t> &latterNotRepIdx,
MDSet<int64_t> &latterNotSingletonIdx) {
MDSet<int64_t> &latterNotDupIdx, MDSet<int64_t> &latterNotOpticalDupIdx,
MDSet<int64_t> &latterNotRepIdx) {
for (auto idx : dupIdx) latterDupIdx.insert(idx);
for (auto idx : opticalDupIdx) latterOpticalDupIdx.insert(idx);
for (auto idx : repIdx) latterRepIdx.insert(idx);
for (auto idx : singletonIdx) latterSingletonIdx.insert(idx);
for (auto idx : notDupIdx) latterNotDupIdx.insert(idx);
for (auto idx : notOpticalDupIdx) latterNotOpticalDupIdx.insert(idx);
for (auto idx : notRepIdx) latterNotRepIdx.insert(idx);
for (auto idx : notSingletonIdx) latterNotSingletonIdx.insert(idx);
}
/* 最后合并数据并排序 */
@ -506,7 +503,6 @@ static void doMarkDup(PipelineArg &pipeArg) {
mdData.pairDupIdx.clear();
mdData.pairOpticalDupIdx.clear();
mdData.pairRepIdx.clear();
mdData.pairSingletonIdx.clear();
tm_arr[5].acc_start();
auto tmpPtr = mdData.dataPtr;
@ -516,7 +512,7 @@ static void doMarkDup(PipelineArg &pipeArg) {
SortMarkData &smd = *(SortMarkData *)mdData.dataPtr;
// 先处理 pair
processPairs(smd.pairs, &mdData.pairDupIdx, &mdData.pairOpticalDupIdx, &mdData.pairRepIdx,
&mdData.pairSingletonIdx);
nullptr);
// 再处理frag
processFrags(smd.frags, &mdData.fragDupIdx);
}
@ -543,12 +539,10 @@ static void refreshMarkDupData(DPSet<DupInfo> &dupIdx, MDSet<int64_t> &opticalDu
refreshDupIdx(dupIdx, lp.pairDupIdx, p.pairDupIdx);
refreshDupIdx(opticalDupIdx, lp.pairOpticalDupIdx, p.pairOpticalDupIdx);
refreshDupIdx(repIdx, lp.pairRepIdx, p.pairRepIdx);
refreshDupIdx(singletonIdx, lp.pairSingletonIdx, p.pairSingletonIdx);
refreshNotDupIdx(notDupIdx, lp.pairDupIdx, p.pairDupIdx);
refreshNotDupIdx(notOpticalDupIdx, lp.pairOpticalDupIdx, p.pairOpticalDupIdx);
refreshNotDupIdx(notRepIdx, lp.pairRepIdx, p.pairRepIdx);
refreshNotDupIdx(notSingletonIdx, lp.pairSingletonIdx, p.pairSingletonIdx);
}
// for step-5 sort
@ -751,12 +745,10 @@ static void doIntersect(PipelineArg &pipeArg) {
auto taskSeq = e.first;
auto &t = e.second;
if (taskSeq < lp.taskSeq) {
refeshTaskDupInfo(t.dupIdx, t.opticalDupIdx, t.repIdx, t.singletonIdx, t.notDupIdx, t.notOpticalDupIdx,
t.notRepIdx, t.notSingletonIdx, g.latterDupIdxArr[taskSeq],
g.latterOpticalDupIdxArr[taskSeq], g.latterRepIdxArr[taskSeq],
g.latterSingletonIdxArr[taskSeq], g.latterNotDupIdxArr[taskSeq],
g.latterNotOpticalDupIdxArr[taskSeq], g.latterNotRepIdxArr[taskSeq],
g.latterNotSingletonIdxArr[taskSeq]);
refeshTaskDupInfo(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx,
t.notSingletonIdx, g.latterDupIdxArr[taskSeq], g.latterOpticalDupIdxArr[taskSeq],
g.latterRepIdxArr[taskSeq], g.latterNotDupIdxArr[taskSeq],
g.latterNotOpticalDupIdxArr[taskSeq], g.latterNotRepIdxArr[taskSeq]);
} else if (taskSeq == lp.taskSeq) {
refreshMarkDupData(t.dupIdx, t.opticalDupIdx, t.repIdx, t.singletonIdx, t.notDupIdx, t.notOpticalDupIdx,
t.notRepIdx, t.notSingletonIdx, lp, p);
@ -1033,11 +1025,10 @@ static void mergeAllTask(PipelineArg &pipeArg) {
for (auto &e : taskChanged) {
auto taskSeq = e.first;
auto &t = e.second;
refeshTaskDupInfo(t.dupIdx, t.opticalDupIdx, t.repIdx, t.singletonIdx, t.notDupIdx, t.notOpticalDupIdx,
t.notRepIdx, t.notSingletonIdx, g.latterDupIdxArr[taskSeq], g.latterOpticalDupIdxArr[taskSeq],
g.latterRepIdxArr[taskSeq], g.latterSingletonIdxArr[taskSeq], g.latterNotDupIdxArr[taskSeq],
g.latterNotOpticalDupIdxArr[taskSeq], g.latterNotRepIdxArr[taskSeq],
g.latterNotSingletonIdxArr[taskSeq]);
refeshTaskDupInfo(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx,
t.notSingletonIdx, g.latterDupIdxArr[taskSeq], g.latterOpticalDupIdxArr[taskSeq],
g.latterRepIdxArr[taskSeq], g.latterNotDupIdxArr[taskSeq],
g.latterNotOpticalDupIdxArr[taskSeq], g.latterNotRepIdxArr[taskSeq]);
}
g.unpairedPosArr.clear();

View File

@ -42,7 +42,6 @@ struct MarkDupData {
MDSet<int64_t> pairOpticalDupIdx; // optical冗余read的索引
DPSet<DupInfo> fragDupIdx; // frag的冗余read的索引
DPSet<DupInfo> pairRepIdx; // pair的dupset代表read的索引
MDSet<int64_t> pairSingletonIdx; // 某位置只有一对read的所有read pair个数
volatile void *dataPtr; // SortMarkData pointer
};
@ -55,17 +54,14 @@ struct IntersectData {
vector<vector<DupInfo>> dupIdxArr;
vector<vector<int64_t>> opticalDupIdxArr;
vector<vector<DupInfo>> repIdxArr;
vector<vector<int64_t>> singletonIdxArr;
// 用来存放后续计算的数据
vector<DPSet<DupInfo>> latterDupIdxArr;
vector<MDSet<int64_t>> latterOpticalDupIdxArr;
vector<DPSet<DupInfo>> latterRepIdxArr;
vector<MDSet<int64_t>> latterSingletonIdxArr;
vector<MDSet<int64_t>> latterNotDupIdxArr;
vector<MDSet<int64_t>> latterNotOpticalDupIdxArr;
vector<MDSet<int64_t>> latterNotRepIdxArr;
vector<MDSet<int64_t>> latterNotSingletonIdxArr;
};
// 记录流水线状态task的序号以及某阶段是否结束