完成去除singleton相关的代码
This commit is contained in:
parent
153e05399d
commit
6de538670f
4
run.sh
4
run.sh
|
|
@ -6,8 +6,8 @@
|
||||||
nthread=32
|
nthread=32
|
||||||
#nthread=64
|
#nthread=64
|
||||||
#nthread=128
|
#nthread=128
|
||||||
input=/home/zzh/data/bam/zy_normal.bam
|
#input=/home/zzh/data/bam/zy_normal.bam
|
||||||
#input=/home/zzh/data/bam/zy_tumor.bam
|
input=/home/zzh/data/bam/zy_tumor.bam
|
||||||
#input=/home/zzh/data/wgs_na12878.bam
|
#input=/home/zzh/data/wgs_na12878.bam
|
||||||
#input=~/data/bam/100w.bam
|
#input=~/data/bam/100w.bam
|
||||||
#input=~/data/bam/t100w.sam
|
#input=~/data/bam/t100w.sam
|
||||||
|
|
|
||||||
|
|
@ -58,9 +58,8 @@ static inline void sortReadEndsArr(vector<ReadEnds> &arr) {
|
||||||
|
|
||||||
/* 处理一组pairend的readends,标记冗余, 这个函数需要串行运行,因为需要做一些统计*/
|
/* 处理一组pairend的readends,标记冗余, 这个函数需要串行运行,因为需要做一些统计*/
|
||||||
static void markDupsForPairs(vector<const ReadEnds *> &vpRe, DPSet<DupInfo> *dupIdx, MDSet<int64_t> *opticalDupIdx,
|
static void markDupsForPairs(vector<const ReadEnds *> &vpRe, DPSet<DupInfo> *dupIdx, MDSet<int64_t> *opticalDupIdx,
|
||||||
DPSet<DupInfo> *repIdx, MDSet<int64_t> *singletonIdx, MDSet<int64_t> *notDupIdx = nullptr,
|
DPSet<DupInfo> *repIdx, MDSet<int64_t> *notDupIdx = nullptr,
|
||||||
MDSet<int64_t> *notOpticalDupIdx = nullptr, MDSet<int64_t> *notRepIdx = nullptr,
|
MDSet<int64_t> *notOpticalDupIdx = nullptr, MDSet<int64_t> *notRepIdx = nullptr ) {
|
||||||
MDSet<int64_t> *notSingletonIdx = nullptr) {
|
|
||||||
if (vpRe.size() < 2) {
|
if (vpRe.size() < 2) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
@ -171,9 +170,8 @@ static void getEqualRE(const ReadEnds &re, vector<ReadEnds> &src, vector<ReadEnd
|
||||||
|
|
||||||
/* 处理pairs */
|
/* 处理pairs */
|
||||||
static void processPairs(vector<ReadEnds> &readEnds, DPSet<DupInfo> *dupIdx, MDSet<int64_t> *opticalDupIdx,
|
static void processPairs(vector<ReadEnds> &readEnds, DPSet<DupInfo> *dupIdx, MDSet<int64_t> *opticalDupIdx,
|
||||||
DPSet<DupInfo> *repIdx, MDSet<int64_t> *singletonIdx, MDSet<int64_t> *notDupIdx = nullptr,
|
DPSet<DupInfo> *repIdx, MDSet<int64_t> *notDupIdx = nullptr,
|
||||||
MDSet<int64_t> *notOpticalDupIdx = nullptr, MDSet<int64_t> *notRepIdx = nullptr,
|
MDSet<int64_t> *notOpticalDupIdx = nullptr, MDSet<int64_t> *notRepIdx = nullptr) {
|
||||||
MDSet<int64_t> *notSingletonIdx = nullptr) {
|
|
||||||
// return;
|
// return;
|
||||||
vector<const ReadEnds *> vpCache; // 有可能是冗余的reads
|
vector<const ReadEnds *> vpCache; // 有可能是冗余的reads
|
||||||
const ReadEnds *pReadEnd = nullptr;
|
const ReadEnds *pReadEnd = nullptr;
|
||||||
|
|
@ -181,15 +179,14 @@ static void processPairs(vector<ReadEnds> &readEnds, DPSet<DupInfo> *dupIdx, MDS
|
||||||
if (pReadEnd != nullptr && ReadEnds::AreComparableForDuplicates(*pReadEnd, re, true)) // 跟前一个一样
|
if (pReadEnd != nullptr && ReadEnds::AreComparableForDuplicates(*pReadEnd, re, true)) // 跟前一个一样
|
||||||
vpCache.push_back(&re); // 处理一个潜在的冗余组
|
vpCache.push_back(&re); // 处理一个潜在的冗余组
|
||||||
else {
|
else {
|
||||||
markDupsForPairs(vpCache, dupIdx, opticalDupIdx, repIdx, singletonIdx, notDupIdx, notOpticalDupIdx,
|
markDupsForPairs(vpCache, dupIdx, opticalDupIdx, repIdx, notDupIdx, notOpticalDupIdx,
|
||||||
notRepIdx, notSingletonIdx); // 不一样
|
notRepIdx); // 不一样
|
||||||
vpCache.clear();
|
vpCache.clear();
|
||||||
vpCache.push_back(&re);
|
vpCache.push_back(&re);
|
||||||
pReadEnd = &re;
|
pReadEnd = &re;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
markDupsForPairs(vpCache, dupIdx, opticalDupIdx, repIdx, singletonIdx, notDupIdx, notOpticalDupIdx, notRepIdx,
|
markDupsForPairs(vpCache, dupIdx, opticalDupIdx, repIdx, notDupIdx, notOpticalDupIdx, notRepIdx);
|
||||||
notSingletonIdx);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 处理frags */
|
/* 处理frags */
|
||||||
|
|
@ -279,54 +276,12 @@ static inline void refreshFragDupIdx(DPSet<DupInfo> &dupIdx, MDSet<int64_t> ¬
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 将pairs重叠部分的dup idx放进数据中 */
|
|
||||||
static inline void refreshPairDupIdx(DPSet<DupInfo> &dupIdx, MDSet<int64_t> &opticalDupIdx, DPSet<DupInfo> &repIdx,
|
|
||||||
MDSet<int64_t> &singletonIdx, MDSet<int64_t> ¬DupIdx,
|
|
||||||
MDSet<int64_t> ¬OpticalDupIdx, MDSet<int64_t> ¬RepIdx,
|
|
||||||
MDSet<int64_t> ¬SingletonIdx, MarkDupDataArg *lastArg, MarkDupDataArg *curArg) {
|
|
||||||
auto &lp = *lastArg;
|
|
||||||
auto &p = *curArg;
|
|
||||||
for (auto idx : dupIdx) {
|
|
||||||
lp.pairDupIdx.insert(idx);
|
|
||||||
p.pairDupIdx.erase(idx);
|
|
||||||
}
|
|
||||||
for (auto idx : opticalDupIdx) {
|
|
||||||
lp.pairOpticalDupIdx.insert(idx);
|
|
||||||
p.pairOpticalDupIdx.erase(idx);
|
|
||||||
}
|
|
||||||
for (auto idx : repIdx) {
|
|
||||||
lp.pairRepIdx.insert(idx);
|
|
||||||
p.pairRepIdx.erase(idx);
|
|
||||||
}
|
|
||||||
for (auto idx : singletonIdx) {
|
|
||||||
lp.pairSingletonIdx.insert(idx);
|
|
||||||
p.pairSingletonIdx.erase(idx);
|
|
||||||
}
|
|
||||||
for (auto idx : notDupIdx) {
|
|
||||||
lp.pairDupIdx.erase(idx);
|
|
||||||
p.pairDupIdx.erase(idx);
|
|
||||||
}
|
|
||||||
for (auto idx : notOpticalDupIdx) {
|
|
||||||
lp.pairOpticalDupIdx.erase(idx);
|
|
||||||
p.pairOpticalDupIdx.erase(idx);
|
|
||||||
}
|
|
||||||
for (auto idx : notRepIdx) {
|
|
||||||
lp.pairRepIdx.erase(idx);
|
|
||||||
p.pairRepIdx.erase(idx);
|
|
||||||
}
|
|
||||||
for (auto idx : notSingletonIdx) {
|
|
||||||
lp.pairSingletonIdx.erase(idx);
|
|
||||||
p.pairSingletonIdx.erase(idx);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 用来分别处理dup和optical dup
|
// 用来分别处理dup和optical dup
|
||||||
static void refeshTaskDupInfo(DPSet<DupInfo> &dupIdx, MDSet<int64_t> &opticalDupIdx, DPSet<DupInfo> &repIdx,
|
static void refeshTaskDupInfo(DPSet<DupInfo> &dupIdx, MDSet<int64_t> &opticalDupIdx, DPSet<DupInfo> &repIdx,
|
||||||
MDSet<int64_t> ¬DupIdx, MDSet<int64_t> ¬OpticalDupIdx, MDSet<int64_t> ¬RepIdx,
|
MDSet<int64_t> ¬DupIdx, MDSet<int64_t> ¬OpticalDupIdx, MDSet<int64_t> ¬RepIdx,
|
||||||
MDSet<int64_t> ¬SingletonIdx, DPSet<DupInfo> &latterDupIdx,
|
DPSet<DupInfo> &latterDupIdx, MDSet<int64_t> &latterOpticalDupIdx,
|
||||||
MDSet<int64_t> &latterOpticalDupIdx, DPSet<DupInfo> &latterRepIdx,
|
DPSet<DupInfo> &latterRepIdx, MDSet<int64_t> &latterNotDupIdx,
|
||||||
MDSet<int64_t> &latterNotDupIdx, MDSet<int64_t> &latterNotOpticalDupIdx,
|
MDSet<int64_t> &latterNotOpticalDupIdx, MDSet<int64_t> &latterNotRepIdx) {
|
||||||
MDSet<int64_t> &latterNotRepIdx) {
|
|
||||||
for (auto idx : dupIdx) latterDupIdx.insert(idx);
|
for (auto idx : dupIdx) latterDupIdx.insert(idx);
|
||||||
for (auto idx : opticalDupIdx) latterOpticalDupIdx.insert(idx);
|
for (auto idx : opticalDupIdx) latterOpticalDupIdx.insert(idx);
|
||||||
for (auto idx : repIdx) latterRepIdx.insert(idx);
|
for (auto idx : repIdx) latterRepIdx.insert(idx);
|
||||||
|
|
@ -511,8 +466,7 @@ static void doMarkDup(PipelineArg &pipeArg) {
|
||||||
tm_arr[5].acc_end();
|
tm_arr[5].acc_end();
|
||||||
SortMarkData &smd = *(SortMarkData *)mdData.dataPtr;
|
SortMarkData &smd = *(SortMarkData *)mdData.dataPtr;
|
||||||
// 先处理 pair
|
// 先处理 pair
|
||||||
processPairs(smd.pairs, &mdData.pairDupIdx, &mdData.pairOpticalDupIdx, &mdData.pairRepIdx,
|
processPairs(smd.pairs, &mdData.pairDupIdx, &mdData.pairOpticalDupIdx, &mdData.pairRepIdx);
|
||||||
nullptr);
|
|
||||||
// 再处理frag
|
// 再处理frag
|
||||||
processFrags(smd.frags, &mdData.fragDupIdx);
|
processFrags(smd.frags, &mdData.fragDupIdx);
|
||||||
}
|
}
|
||||||
|
|
@ -533,9 +487,8 @@ static void refreshNotDupIdx(T1 &srcArr, T2 &delArr1, T2 &delArr2) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static void refreshMarkDupData(DPSet<DupInfo> &dupIdx, MDSet<int64_t> &opticalDupIdx, DPSet<DupInfo> &repIdx,
|
static void refreshMarkDupData(DPSet<DupInfo> &dupIdx, MDSet<int64_t> &opticalDupIdx, DPSet<DupInfo> &repIdx,
|
||||||
MDSet<int64_t> &singletonIdx, MDSet<int64_t> ¬DupIdx,
|
MDSet<int64_t> ¬DupIdx, MDSet<int64_t> ¬OpticalDupIdx, MDSet<int64_t> ¬RepIdx,
|
||||||
MDSet<int64_t> ¬OpticalDupIdx, MDSet<int64_t> ¬RepIdx,
|
MarkDupData &lp, MarkDupData &p) {
|
||||||
MDSet<int64_t> ¬SingletonIdx, MarkDupData &lp, MarkDupData &p) {
|
|
||||||
refreshDupIdx(dupIdx, lp.pairDupIdx, p.pairDupIdx);
|
refreshDupIdx(dupIdx, lp.pairDupIdx, p.pairDupIdx);
|
||||||
refreshDupIdx(opticalDupIdx, lp.pairOpticalDupIdx, p.pairOpticalDupIdx);
|
refreshDupIdx(opticalDupIdx, lp.pairOpticalDupIdx, p.pairOpticalDupIdx);
|
||||||
refreshDupIdx(repIdx, lp.pairRepIdx, p.pairRepIdx);
|
refreshDupIdx(repIdx, lp.pairRepIdx, p.pairRepIdx);
|
||||||
|
|
@ -558,11 +511,9 @@ static void doIntersect(PipelineArg &pipeArg) {
|
||||||
DPSet<DupInfo> dupIdx;
|
DPSet<DupInfo> dupIdx;
|
||||||
MDSet<int64_t> opticalDupIdx;
|
MDSet<int64_t> opticalDupIdx;
|
||||||
DPSet<DupInfo> repIdx;
|
DPSet<DupInfo> repIdx;
|
||||||
MDSet<int64_t> singletonIdx;
|
|
||||||
MDSet<int64_t> notOpticalDupIdx;
|
MDSet<int64_t> notOpticalDupIdx;
|
||||||
MDSet<int64_t> notDupIdx;
|
MDSet<int64_t> notDupIdx;
|
||||||
MDSet<int64_t> notRepIdx;
|
MDSet<int64_t> notRepIdx;
|
||||||
MDSet<int64_t> notSingletonIdx;
|
|
||||||
|
|
||||||
// 先处理重叠的frags
|
// 先处理重叠的frags
|
||||||
getIntersectData(lpSM.frags, pSM.frags, &reArr);
|
getIntersectData(lpSM.frags, pSM.frags, &reArr);
|
||||||
|
|
@ -576,10 +527,8 @@ static void doIntersect(PipelineArg &pipeArg) {
|
||||||
notDupIdx.clear();
|
notDupIdx.clear();
|
||||||
getIntersectData(lpSM.pairs, pSM.pairs, &reArr, true);
|
getIntersectData(lpSM.pairs, pSM.pairs, &reArr, true);
|
||||||
|
|
||||||
processPairs(reArr, &dupIdx, &opticalDupIdx, &repIdx, &singletonIdx, ¬DupIdx, ¬OpticalDupIdx, ¬RepIdx,
|
processPairs(reArr, &dupIdx, &opticalDupIdx, &repIdx, ¬DupIdx, ¬OpticalDupIdx, ¬RepIdx);
|
||||||
¬SingletonIdx);
|
refreshMarkDupData(dupIdx, opticalDupIdx, repIdx, notDupIdx, notOpticalDupIdx, notRepIdx, lp, p);
|
||||||
refreshMarkDupData(dupIdx, opticalDupIdx, repIdx, singletonIdx, notDupIdx, notOpticalDupIdx, notRepIdx,
|
|
||||||
notSingletonIdx, lp, p);
|
|
||||||
|
|
||||||
// 处理之前未匹配的部分
|
// 处理之前未匹配的部分
|
||||||
map<CalcKey, int64_t> recalcPos;
|
map<CalcKey, int64_t> recalcPos;
|
||||||
|
|
@ -727,8 +676,8 @@ static void doIntersect(PipelineArg &pipeArg) {
|
||||||
pairArrP = &g.unpairedPosArr[posKey].pairArr;
|
pairArrP = &g.unpairedPosArr[posKey].pairArr;
|
||||||
else
|
else
|
||||||
pairArrP = &lpSM.unpairedPosArr[posKey].pairArr;
|
pairArrP = &lpSM.unpairedPosArr[posKey].pairArr;
|
||||||
processPairs(*pairArrP, &t.dupIdx, &t.opticalDupIdx, &t.repIdx, &t.singletonIdx, &t.notDupIdx,
|
processPairs(*pairArrP, &t.dupIdx, &t.opticalDupIdx, &t.repIdx, &t.notDupIdx, &t.notOpticalDupIdx,
|
||||||
&t.notOpticalDupIdx, &t.notRepIdx, &t.notSingletonIdx);
|
&t.notRepIdx);
|
||||||
if (taskSeq < lp.taskSeq)
|
if (taskSeq < lp.taskSeq)
|
||||||
g.unpairedPosArr.erase(posKey);
|
g.unpairedPosArr.erase(posKey);
|
||||||
}
|
}
|
||||||
|
|
@ -746,15 +695,15 @@ static void doIntersect(PipelineArg &pipeArg) {
|
||||||
auto &t = e.second;
|
auto &t = e.second;
|
||||||
if (taskSeq < lp.taskSeq) {
|
if (taskSeq < lp.taskSeq) {
|
||||||
refeshTaskDupInfo(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx,
|
refeshTaskDupInfo(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx,
|
||||||
t.notSingletonIdx, g.latterDupIdxArr[taskSeq], g.latterOpticalDupIdxArr[taskSeq],
|
g.latterDupIdxArr[taskSeq], g.latterOpticalDupIdxArr[taskSeq], g.latterRepIdxArr[taskSeq],
|
||||||
g.latterRepIdxArr[taskSeq], g.latterNotDupIdxArr[taskSeq],
|
g.latterNotDupIdxArr[taskSeq], g.latterNotOpticalDupIdxArr[taskSeq],
|
||||||
g.latterNotOpticalDupIdxArr[taskSeq], g.latterNotRepIdxArr[taskSeq]);
|
g.latterNotRepIdxArr[taskSeq]);
|
||||||
} else if (taskSeq == lp.taskSeq) {
|
} else if (taskSeq == lp.taskSeq) {
|
||||||
refreshMarkDupData(t.dupIdx, t.opticalDupIdx, t.repIdx, t.singletonIdx, t.notDupIdx, t.notOpticalDupIdx,
|
refreshMarkDupData(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx,
|
||||||
t.notRepIdx, t.notSingletonIdx, lp, p);
|
t.notRepIdx, lp, p);
|
||||||
} else {
|
} else {
|
||||||
refreshMarkDupData(t.dupIdx, t.opticalDupIdx, t.repIdx, t.singletonIdx, t.notDupIdx, t.notOpticalDupIdx,
|
refreshMarkDupData(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx,
|
||||||
t.notRepIdx, t.notSingletonIdx, p, lp); // 把结果放到p中
|
t.notRepIdx, p, lp); // 把结果放到p中
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1015,8 +964,7 @@ static void mergeAllTask(PipelineArg &pipeArg) {
|
||||||
|
|
||||||
if (arr.size() > 1) {
|
if (arr.size() > 1) {
|
||||||
std::sort(arr.begin(), arr.end());
|
std::sort(arr.begin(), arr.end());
|
||||||
processPairs(arr, &t.dupIdx, &t.opticalDupIdx, &t.repIdx, &t.singletonIdx, &t.notDupIdx,
|
processPairs(arr, &t.dupIdx, &t.opticalDupIdx, &t.repIdx, &t.notDupIdx, &t.notOpticalDupIdx, &t.notRepIdx);
|
||||||
&t.notOpticalDupIdx, &t.notRepIdx, &t.notSingletonIdx);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// 更新结果
|
// 更新结果
|
||||||
|
|
@ -1026,9 +974,9 @@ static void mergeAllTask(PipelineArg &pipeArg) {
|
||||||
auto taskSeq = e.first;
|
auto taskSeq = e.first;
|
||||||
auto &t = e.second;
|
auto &t = e.second;
|
||||||
refeshTaskDupInfo(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx,
|
refeshTaskDupInfo(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx,
|
||||||
t.notSingletonIdx, g.latterDupIdxArr[taskSeq], g.latterOpticalDupIdxArr[taskSeq],
|
g.latterDupIdxArr[taskSeq], g.latterOpticalDupIdxArr[taskSeq], g.latterRepIdxArr[taskSeq],
|
||||||
g.latterRepIdxArr[taskSeq], g.latterNotDupIdxArr[taskSeq],
|
g.latterNotDupIdxArr[taskSeq], g.latterNotOpticalDupIdxArr[taskSeq],
|
||||||
g.latterNotOpticalDupIdxArr[taskSeq], g.latterNotRepIdxArr[taskSeq]);
|
g.latterNotRepIdxArr[taskSeq]);
|
||||||
}
|
}
|
||||||
g.unpairedPosArr.clear();
|
g.unpairedPosArr.clear();
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue