完成去除singleton相关的代码
This commit is contained in:
parent
153e05399d
commit
6de538670f
4
run.sh
4
run.sh
|
|
@ -6,8 +6,8 @@
|
|||
nthread=32
|
||||
#nthread=64
|
||||
#nthread=128
|
||||
input=/home/zzh/data/bam/zy_normal.bam
|
||||
#input=/home/zzh/data/bam/zy_tumor.bam
|
||||
#input=/home/zzh/data/bam/zy_normal.bam
|
||||
input=/home/zzh/data/bam/zy_tumor.bam
|
||||
#input=/home/zzh/data/wgs_na12878.bam
|
||||
#input=~/data/bam/100w.bam
|
||||
#input=~/data/bam/t100w.sam
|
||||
|
|
|
|||
|
|
@ -58,9 +58,8 @@ static inline void sortReadEndsArr(vector<ReadEnds> &arr) {
|
|||
|
||||
/* 处理一组pairend的readends,标记冗余, 这个函数需要串行运行,因为需要做一些统计*/
|
||||
static void markDupsForPairs(vector<const ReadEnds *> &vpRe, DPSet<DupInfo> *dupIdx, MDSet<int64_t> *opticalDupIdx,
|
||||
DPSet<DupInfo> *repIdx, MDSet<int64_t> *singletonIdx, MDSet<int64_t> *notDupIdx = nullptr,
|
||||
MDSet<int64_t> *notOpticalDupIdx = nullptr, MDSet<int64_t> *notRepIdx = nullptr,
|
||||
MDSet<int64_t> *notSingletonIdx = nullptr) {
|
||||
DPSet<DupInfo> *repIdx, MDSet<int64_t> *notDupIdx = nullptr,
|
||||
MDSet<int64_t> *notOpticalDupIdx = nullptr, MDSet<int64_t> *notRepIdx = nullptr ) {
|
||||
if (vpRe.size() < 2) {
|
||||
return;
|
||||
}
|
||||
|
|
@ -171,9 +170,8 @@ static void getEqualRE(const ReadEnds &re, vector<ReadEnds> &src, vector<ReadEnd
|
|||
|
||||
/* 处理pairs */
|
||||
static void processPairs(vector<ReadEnds> &readEnds, DPSet<DupInfo> *dupIdx, MDSet<int64_t> *opticalDupIdx,
|
||||
DPSet<DupInfo> *repIdx, MDSet<int64_t> *singletonIdx, MDSet<int64_t> *notDupIdx = nullptr,
|
||||
MDSet<int64_t> *notOpticalDupIdx = nullptr, MDSet<int64_t> *notRepIdx = nullptr,
|
||||
MDSet<int64_t> *notSingletonIdx = nullptr) {
|
||||
DPSet<DupInfo> *repIdx, MDSet<int64_t> *notDupIdx = nullptr,
|
||||
MDSet<int64_t> *notOpticalDupIdx = nullptr, MDSet<int64_t> *notRepIdx = nullptr) {
|
||||
// return;
|
||||
vector<const ReadEnds *> vpCache; // 有可能是冗余的reads
|
||||
const ReadEnds *pReadEnd = nullptr;
|
||||
|
|
@ -181,15 +179,14 @@ static void processPairs(vector<ReadEnds> &readEnds, DPSet<DupInfo> *dupIdx, MDS
|
|||
if (pReadEnd != nullptr && ReadEnds::AreComparableForDuplicates(*pReadEnd, re, true)) // 跟前一个一样
|
||||
vpCache.push_back(&re); // 处理一个潜在的冗余组
|
||||
else {
|
||||
markDupsForPairs(vpCache, dupIdx, opticalDupIdx, repIdx, singletonIdx, notDupIdx, notOpticalDupIdx,
|
||||
notRepIdx, notSingletonIdx); // 不一样
|
||||
markDupsForPairs(vpCache, dupIdx, opticalDupIdx, repIdx, notDupIdx, notOpticalDupIdx,
|
||||
notRepIdx); // 不一样
|
||||
vpCache.clear();
|
||||
vpCache.push_back(&re);
|
||||
pReadEnd = &re;
|
||||
}
|
||||
}
|
||||
markDupsForPairs(vpCache, dupIdx, opticalDupIdx, repIdx, singletonIdx, notDupIdx, notOpticalDupIdx, notRepIdx,
|
||||
notSingletonIdx);
|
||||
markDupsForPairs(vpCache, dupIdx, opticalDupIdx, repIdx, notDupIdx, notOpticalDupIdx, notRepIdx);
|
||||
}
|
||||
|
||||
/* 处理frags */
|
||||
|
|
@ -279,54 +276,12 @@ static inline void refreshFragDupIdx(DPSet<DupInfo> &dupIdx, MDSet<int64_t> ¬
|
|||
}
|
||||
}
|
||||
|
||||
/* 将pairs重叠部分的dup idx放进数据中 */
|
||||
static inline void refreshPairDupIdx(DPSet<DupInfo> &dupIdx, MDSet<int64_t> &opticalDupIdx, DPSet<DupInfo> &repIdx,
|
||||
MDSet<int64_t> &singletonIdx, MDSet<int64_t> ¬DupIdx,
|
||||
MDSet<int64_t> ¬OpticalDupIdx, MDSet<int64_t> ¬RepIdx,
|
||||
MDSet<int64_t> ¬SingletonIdx, MarkDupDataArg *lastArg, MarkDupDataArg *curArg) {
|
||||
auto &lp = *lastArg;
|
||||
auto &p = *curArg;
|
||||
for (auto idx : dupIdx) {
|
||||
lp.pairDupIdx.insert(idx);
|
||||
p.pairDupIdx.erase(idx);
|
||||
}
|
||||
for (auto idx : opticalDupIdx) {
|
||||
lp.pairOpticalDupIdx.insert(idx);
|
||||
p.pairOpticalDupIdx.erase(idx);
|
||||
}
|
||||
for (auto idx : repIdx) {
|
||||
lp.pairRepIdx.insert(idx);
|
||||
p.pairRepIdx.erase(idx);
|
||||
}
|
||||
for (auto idx : singletonIdx) {
|
||||
lp.pairSingletonIdx.insert(idx);
|
||||
p.pairSingletonIdx.erase(idx);
|
||||
}
|
||||
for (auto idx : notDupIdx) {
|
||||
lp.pairDupIdx.erase(idx);
|
||||
p.pairDupIdx.erase(idx);
|
||||
}
|
||||
for (auto idx : notOpticalDupIdx) {
|
||||
lp.pairOpticalDupIdx.erase(idx);
|
||||
p.pairOpticalDupIdx.erase(idx);
|
||||
}
|
||||
for (auto idx : notRepIdx) {
|
||||
lp.pairRepIdx.erase(idx);
|
||||
p.pairRepIdx.erase(idx);
|
||||
}
|
||||
for (auto idx : notSingletonIdx) {
|
||||
lp.pairSingletonIdx.erase(idx);
|
||||
p.pairSingletonIdx.erase(idx);
|
||||
}
|
||||
}
|
||||
|
||||
// 用来分别处理dup和optical dup
|
||||
static void refeshTaskDupInfo(DPSet<DupInfo> &dupIdx, MDSet<int64_t> &opticalDupIdx, DPSet<DupInfo> &repIdx,
|
||||
MDSet<int64_t> ¬DupIdx, MDSet<int64_t> ¬OpticalDupIdx, MDSet<int64_t> ¬RepIdx,
|
||||
MDSet<int64_t> ¬SingletonIdx, DPSet<DupInfo> &latterDupIdx,
|
||||
MDSet<int64_t> &latterOpticalDupIdx, DPSet<DupInfo> &latterRepIdx,
|
||||
MDSet<int64_t> &latterNotDupIdx, MDSet<int64_t> &latterNotOpticalDupIdx,
|
||||
MDSet<int64_t> &latterNotRepIdx) {
|
||||
DPSet<DupInfo> &latterDupIdx, MDSet<int64_t> &latterOpticalDupIdx,
|
||||
DPSet<DupInfo> &latterRepIdx, MDSet<int64_t> &latterNotDupIdx,
|
||||
MDSet<int64_t> &latterNotOpticalDupIdx, MDSet<int64_t> &latterNotRepIdx) {
|
||||
for (auto idx : dupIdx) latterDupIdx.insert(idx);
|
||||
for (auto idx : opticalDupIdx) latterOpticalDupIdx.insert(idx);
|
||||
for (auto idx : repIdx) latterRepIdx.insert(idx);
|
||||
|
|
@ -511,8 +466,7 @@ static void doMarkDup(PipelineArg &pipeArg) {
|
|||
tm_arr[5].acc_end();
|
||||
SortMarkData &smd = *(SortMarkData *)mdData.dataPtr;
|
||||
// 先处理 pair
|
||||
processPairs(smd.pairs, &mdData.pairDupIdx, &mdData.pairOpticalDupIdx, &mdData.pairRepIdx,
|
||||
nullptr);
|
||||
processPairs(smd.pairs, &mdData.pairDupIdx, &mdData.pairOpticalDupIdx, &mdData.pairRepIdx);
|
||||
// 再处理frag
|
||||
processFrags(smd.frags, &mdData.fragDupIdx);
|
||||
}
|
||||
|
|
@ -533,9 +487,8 @@ static void refreshNotDupIdx(T1 &srcArr, T2 &delArr1, T2 &delArr2) {
|
|||
}
|
||||
|
||||
static void refreshMarkDupData(DPSet<DupInfo> &dupIdx, MDSet<int64_t> &opticalDupIdx, DPSet<DupInfo> &repIdx,
|
||||
MDSet<int64_t> &singletonIdx, MDSet<int64_t> ¬DupIdx,
|
||||
MDSet<int64_t> ¬OpticalDupIdx, MDSet<int64_t> ¬RepIdx,
|
||||
MDSet<int64_t> ¬SingletonIdx, MarkDupData &lp, MarkDupData &p) {
|
||||
MDSet<int64_t> ¬DupIdx, MDSet<int64_t> ¬OpticalDupIdx, MDSet<int64_t> ¬RepIdx,
|
||||
MarkDupData &lp, MarkDupData &p) {
|
||||
refreshDupIdx(dupIdx, lp.pairDupIdx, p.pairDupIdx);
|
||||
refreshDupIdx(opticalDupIdx, lp.pairOpticalDupIdx, p.pairOpticalDupIdx);
|
||||
refreshDupIdx(repIdx, lp.pairRepIdx, p.pairRepIdx);
|
||||
|
|
@ -558,11 +511,9 @@ static void doIntersect(PipelineArg &pipeArg) {
|
|||
DPSet<DupInfo> dupIdx;
|
||||
MDSet<int64_t> opticalDupIdx;
|
||||
DPSet<DupInfo> repIdx;
|
||||
MDSet<int64_t> singletonIdx;
|
||||
MDSet<int64_t> notOpticalDupIdx;
|
||||
MDSet<int64_t> notDupIdx;
|
||||
MDSet<int64_t> notRepIdx;
|
||||
MDSet<int64_t> notSingletonIdx;
|
||||
|
||||
// 先处理重叠的frags
|
||||
getIntersectData(lpSM.frags, pSM.frags, &reArr);
|
||||
|
|
@ -576,10 +527,8 @@ static void doIntersect(PipelineArg &pipeArg) {
|
|||
notDupIdx.clear();
|
||||
getIntersectData(lpSM.pairs, pSM.pairs, &reArr, true);
|
||||
|
||||
processPairs(reArr, &dupIdx, &opticalDupIdx, &repIdx, &singletonIdx, ¬DupIdx, ¬OpticalDupIdx, ¬RepIdx,
|
||||
¬SingletonIdx);
|
||||
refreshMarkDupData(dupIdx, opticalDupIdx, repIdx, singletonIdx, notDupIdx, notOpticalDupIdx, notRepIdx,
|
||||
notSingletonIdx, lp, p);
|
||||
processPairs(reArr, &dupIdx, &opticalDupIdx, &repIdx, ¬DupIdx, ¬OpticalDupIdx, ¬RepIdx);
|
||||
refreshMarkDupData(dupIdx, opticalDupIdx, repIdx, notDupIdx, notOpticalDupIdx, notRepIdx, lp, p);
|
||||
|
||||
// 处理之前未匹配的部分
|
||||
map<CalcKey, int64_t> recalcPos;
|
||||
|
|
@ -727,8 +676,8 @@ static void doIntersect(PipelineArg &pipeArg) {
|
|||
pairArrP = &g.unpairedPosArr[posKey].pairArr;
|
||||
else
|
||||
pairArrP = &lpSM.unpairedPosArr[posKey].pairArr;
|
||||
processPairs(*pairArrP, &t.dupIdx, &t.opticalDupIdx, &t.repIdx, &t.singletonIdx, &t.notDupIdx,
|
||||
&t.notOpticalDupIdx, &t.notRepIdx, &t.notSingletonIdx);
|
||||
processPairs(*pairArrP, &t.dupIdx, &t.opticalDupIdx, &t.repIdx, &t.notDupIdx, &t.notOpticalDupIdx,
|
||||
&t.notRepIdx);
|
||||
if (taskSeq < lp.taskSeq)
|
||||
g.unpairedPosArr.erase(posKey);
|
||||
}
|
||||
|
|
@ -746,15 +695,15 @@ static void doIntersect(PipelineArg &pipeArg) {
|
|||
auto &t = e.second;
|
||||
if (taskSeq < lp.taskSeq) {
|
||||
refeshTaskDupInfo(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx,
|
||||
t.notSingletonIdx, g.latterDupIdxArr[taskSeq], g.latterOpticalDupIdxArr[taskSeq],
|
||||
g.latterRepIdxArr[taskSeq], g.latterNotDupIdxArr[taskSeq],
|
||||
g.latterNotOpticalDupIdxArr[taskSeq], g.latterNotRepIdxArr[taskSeq]);
|
||||
g.latterDupIdxArr[taskSeq], g.latterOpticalDupIdxArr[taskSeq], g.latterRepIdxArr[taskSeq],
|
||||
g.latterNotDupIdxArr[taskSeq], g.latterNotOpticalDupIdxArr[taskSeq],
|
||||
g.latterNotRepIdxArr[taskSeq]);
|
||||
} else if (taskSeq == lp.taskSeq) {
|
||||
refreshMarkDupData(t.dupIdx, t.opticalDupIdx, t.repIdx, t.singletonIdx, t.notDupIdx, t.notOpticalDupIdx,
|
||||
t.notRepIdx, t.notSingletonIdx, lp, p);
|
||||
refreshMarkDupData(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx,
|
||||
t.notRepIdx, lp, p);
|
||||
} else {
|
||||
refreshMarkDupData(t.dupIdx, t.opticalDupIdx, t.repIdx, t.singletonIdx, t.notDupIdx, t.notOpticalDupIdx,
|
||||
t.notRepIdx, t.notSingletonIdx, p, lp); // 把结果放到p中
|
||||
refreshMarkDupData(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx,
|
||||
t.notRepIdx, p, lp); // 把结果放到p中
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1015,8 +964,7 @@ static void mergeAllTask(PipelineArg &pipeArg) {
|
|||
|
||||
if (arr.size() > 1) {
|
||||
std::sort(arr.begin(), arr.end());
|
||||
processPairs(arr, &t.dupIdx, &t.opticalDupIdx, &t.repIdx, &t.singletonIdx, &t.notDupIdx,
|
||||
&t.notOpticalDupIdx, &t.notRepIdx, &t.notSingletonIdx);
|
||||
processPairs(arr, &t.dupIdx, &t.opticalDupIdx, &t.repIdx, &t.notDupIdx, &t.notOpticalDupIdx, &t.notRepIdx);
|
||||
}
|
||||
}
|
||||
// 更新结果
|
||||
|
|
@ -1026,9 +974,9 @@ static void mergeAllTask(PipelineArg &pipeArg) {
|
|||
auto taskSeq = e.first;
|
||||
auto &t = e.second;
|
||||
refeshTaskDupInfo(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx,
|
||||
t.notSingletonIdx, g.latterDupIdxArr[taskSeq], g.latterOpticalDupIdxArr[taskSeq],
|
||||
g.latterRepIdxArr[taskSeq], g.latterNotDupIdxArr[taskSeq],
|
||||
g.latterNotOpticalDupIdxArr[taskSeq], g.latterNotRepIdxArr[taskSeq]);
|
||||
g.latterDupIdxArr[taskSeq], g.latterOpticalDupIdxArr[taskSeq], g.latterRepIdxArr[taskSeq],
|
||||
g.latterNotDupIdxArr[taskSeq], g.latterNotOpticalDupIdxArr[taskSeq],
|
||||
g.latterNotRepIdxArr[taskSeq]);
|
||||
}
|
||||
g.unpairedPosArr.clear();
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue