418 lines
16 KiB
C
418 lines
16 KiB
C
|
|
/* 单线程处理冗余参数结构体 */
|
|||
|
|
struct SerailMarkDupArg
|
|||
|
|
{
|
|||
|
|
int64_t bamStartIdx; // 当前vBam数组中第一个bam记录在整体bam中所处的位置
|
|||
|
|
vector<BamWrap *> vBam; // 存放待处理的bam read
|
|||
|
|
map<int64_t, set<ReadEnds>> msPair; // 以冗余位置为索引,保存所有pairend reads
|
|||
|
|
map<int64_t, set<ReadEnds>> msFrag; // 保存所有reads,包括pairend
|
|||
|
|
map<int64_t, set<int64_t>> msPairDupIdx; // pair的冗余read的索引
|
|||
|
|
map<int64_t, set<int64_t>> msPairOpticalDupIdx; // optical冗余read的索引
|
|||
|
|
map<int64_t, set<int64_t>> msFragDupIdx; // frag的冗余read的索引
|
|||
|
|
unordered_map<string, ReadEnds> unpairedDic; // 用来寻找pair end
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
/* 全局保留的数据,因为有些paired数据比对到了不同的染色体,相距甚远 */
|
|||
|
|
struct GlobalDataArg
|
|||
|
|
{
|
|||
|
|
map<int64_t, set<ReadEnds>> msPair; // 以冗余位置为索引,保存所有pairend reads
|
|||
|
|
map<int64_t, set<int64_t>> msPairDupIdx; // pair的冗余read的索引
|
|||
|
|
map<int64_t, set<int64_t>> msPairOpticalDupIdx; // optical冗余read的索引
|
|||
|
|
unordered_map<string, ReadEnds> unpairedDic; // 用来寻找pair end
|
|||
|
|
set<int64_t> dupIdx;
|
|||
|
|
unordered_set<int64_t> opticalDupIdx;
|
|||
|
|
map<int64_t, set<int64_t>> test;
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
static GlobalDataArg gData;
|
|||
|
|
|
|||
|
|
/* 删除某个位点的pairend的冗余idx */
|
|||
|
|
static void rmPairIdxAtPos(const int64_t pos, SerailMarkDupArg *lastArg, SerailMarkDupArg *curArg)
|
|||
|
|
{
|
|||
|
|
delIdxAtPos(pos, &curArg->msPairDupIdx); // 删除该位点的冗余结果
|
|||
|
|
delIdxAtPos(pos, &curArg->msPairOpticalDupIdx);
|
|||
|
|
clearIdxAtPos(pos, &lastArg->msPairDupIdx); // 清除该位点的冗余结果
|
|||
|
|
clearIdxAtPos(pos, &lastArg->msPairOpticalDupIdx);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
static void clearPairIdxAtPos(const int64_t pos, SerailMarkDupArg *task)
|
|||
|
|
{
|
|||
|
|
clearIdxAtPos(pos, &task->msPairDupIdx); // 删除该位点的冗余结果
|
|||
|
|
clearIdxAtPos(pos, &task->msPairOpticalDupIdx);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
static void clearPairIdxAtPos(const int64_t pos,
|
|||
|
|
map<int64_t, set<int64_t>> *dupIdx,
|
|||
|
|
map<int64_t, set<int64_t>> *opticalDupIdx)
|
|||
|
|
{
|
|||
|
|
clearIdxAtPos(pos, dupIdx); // 删除该位点的冗余结果
|
|||
|
|
clearIdxAtPos(pos, opticalDupIdx);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* 删除某个位点的frag的冗余idx */
|
|||
|
|
static void rmFragIdxAtPos(const int64_t pos, SerailMarkDupArg *lastArg, SerailMarkDupArg *curArg)
|
|||
|
|
{
|
|||
|
|
delIdxAtPos(pos, &curArg->msFragDupIdx); // 删除该位点的冗余结果
|
|||
|
|
clearIdxAtPos(pos, &lastArg->msFragDupIdx); // 清除该位点的冗余结果
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* 单线程生成readends (第一步)*/
|
|||
|
|
static void generateReadEnds(SerailMarkDupArg *arg)
|
|||
|
|
{
|
|||
|
|
auto &p = *arg;
|
|||
|
|
auto &rnParser = g_vRnParser[0];
|
|||
|
|
/* 处理每个read,创建ReadEnd,并放入frag和pair中 */
|
|||
|
|
for (int i = 0; i < p.vBam.size(); ++i) // 循环处理每个read
|
|||
|
|
{
|
|||
|
|
BamWrap *bw = p.vBam[i];
|
|||
|
|
const int64_t bamIdx = p.bamStartIdx + i;
|
|||
|
|
if (bw->GetReadUnmappedFlag())
|
|||
|
|
{
|
|||
|
|
if (bw->b->core.tid == -1)
|
|||
|
|
// When we hit the unmapped reads with no coordinate, no reason to continue (only in coordinate sort).
|
|||
|
|
break;
|
|||
|
|
}
|
|||
|
|
else if (!bw->IsSecondaryOrSupplementary()) // 是主要比对
|
|||
|
|
{
|
|||
|
|
ReadEnds fragEnd;
|
|||
|
|
buildReadEnds(*bw, bamIdx, rnParser, &fragEnd);
|
|||
|
|
// if (fragEnd.posKey == 3547574 || fragEnd.posKey == 3547930)
|
|||
|
|
// {
|
|||
|
|
// cout << fragEnd.posKey << '\t' << bw->query_name() << endl;
|
|||
|
|
// }
|
|||
|
|
p.msFrag[fragEnd.posKey].insert(fragEnd); // 添加进frag集合
|
|||
|
|
if (bw->GetReadPairedFlag() && !bw->GetMateUnmappedFlag()) // 是pairend而且互补的read也比对上了
|
|||
|
|
{
|
|||
|
|
string key = bw->query_name();
|
|||
|
|
if (p.unpairedDic.find(key) == p.unpairedDic.end())
|
|||
|
|
{
|
|||
|
|
p.unpairedDic[key] = fragEnd;
|
|||
|
|
}
|
|||
|
|
else // 找到了pairend
|
|||
|
|
{
|
|||
|
|
auto &pairedEnds = p.unpairedDic.at(key);
|
|||
|
|
modifyPairedEnds(fragEnd, &pairedEnds);
|
|||
|
|
p.msPair[pairedEnds.posKey].insert(pairedEnds);
|
|||
|
|
p.unpairedDic.erase(key); // 删除找到的pairend
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* 单线程markdup (第二步)*/
|
|||
|
|
static void markdups(SerailMarkDupArg *arg)
|
|||
|
|
{
|
|||
|
|
auto &p = *arg;
|
|||
|
|
/* generateDuplicateIndexes,计算冗余read在所有read中的位置索引 */
|
|||
|
|
unordered_set<int64_t> usUnpairedPos; // 该位置有还未找到pair的read
|
|||
|
|
for (auto &ele : p.unpairedDic)
|
|||
|
|
{
|
|||
|
|
usUnpairedPos.insert(ele.second.posKey);
|
|||
|
|
}
|
|||
|
|
// 先处理 pair
|
|||
|
|
vector<const ReadEnds *> vRePotentialDup; // 有可能是冗余的reads
|
|||
|
|
for (auto &e : p.msPair) // 按比对的位置先后进行遍历
|
|||
|
|
{
|
|||
|
|
handlePairs(e.first, e.second, vRePotentialDup, &p.msPairDupIdx, &p.msPairOpticalDupIdx);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 再处理frag
|
|||
|
|
for (auto &e : p.msFrag)
|
|||
|
|
{
|
|||
|
|
handleFrags(e.first, e.second, vRePotentialDup, &p.msFragDupIdx);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 将pair set中的pair添加进另一个pair set
|
|||
|
|
static void mergeToPairSet(int64_t pos, map<int64_t, set<ReadEnds>> &src, set<ReadEnds> *dst)
|
|||
|
|
{
|
|||
|
|
if (src.find(pos) != src.end())
|
|||
|
|
{
|
|||
|
|
for (auto &pe : src[pos])
|
|||
|
|
{
|
|||
|
|
dst->insert(pe);
|
|||
|
|
}
|
|||
|
|
// src.erase(pos);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* 处理相邻的两个任务,有相交叉的数据 */
|
|||
|
|
static void handleIntersectData(SerailMarkDupArg *lastArg, SerailMarkDupArg *curArg, GlobalDataArg *gDataArg)
|
|||
|
|
{
|
|||
|
|
auto &lp = *lastArg;
|
|||
|
|
auto &p = *curArg;
|
|||
|
|
auto &g = *gDataArg;
|
|||
|
|
|
|||
|
|
vector<const ReadEnds *> vRePotentialDup; // 有可能是冗余的reads
|
|||
|
|
|
|||
|
|
int64_t lastPairPos = 0;
|
|||
|
|
if (lp.msPair.size() > 0)
|
|||
|
|
lastPairPos = lp.msPair.rbegin()->first; // 上一轮read最后到达的坐标
|
|||
|
|
for (auto &pair : p.msPair) // 重叠的pair
|
|||
|
|
{
|
|||
|
|
int64_t pos = pair.first;
|
|||
|
|
if (pos > lastPairPos) // 超过了上一个任务最大的位点坐标,那么就不再继续检查了
|
|||
|
|
break;
|
|||
|
|
if (lp.msPair.find(pos) != lp.msPair.end()) // 上一个任务里也有这个位点,两个任务在相同的点位上都有数据,则需要重新计算该点位
|
|||
|
|
{
|
|||
|
|
auto &pairedSet = lp.msPair[pos];
|
|||
|
|
rmPairIdxAtPos(pos, &lp, &p);
|
|||
|
|
for (auto &curPair : pair.second) // 改变了lp当前位点的paired set
|
|||
|
|
pairedSet.insert(curPair);
|
|||
|
|
handlePairs(pos, pairedSet, vRePotentialDup, &lp.msPairDupIdx, &lp.msPairOpticalDupIdx); // 把结果放在上个任务里
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
for (auto &unpair : lp.unpairedDic) // 上一个任务中没有找到匹配的pair
|
|||
|
|
{
|
|||
|
|
auto &readName = unpair.first;
|
|||
|
|
auto &unpairEnd = unpair.second;
|
|||
|
|
|
|||
|
|
if (p.unpairedDic.find(readName) != p.unpairedDic.end()) // 在当前任务中找到了匹配的pairend
|
|||
|
|
{
|
|||
|
|
auto &fe = p.unpairedDic.at(readName);
|
|||
|
|
modifyPairedEnds(fe, &unpairEnd);
|
|||
|
|
int64_t pos = unpairEnd.posKey;
|
|||
|
|
auto &pairedSet = lp.msPair[pos];
|
|||
|
|
rmPairIdxAtPos(pos, &lp, &p);
|
|||
|
|
pairedSet.insert(unpairEnd); // 改变了lp当前位点的paired set
|
|||
|
|
mergeToPairSet(pos, p.msPair, &pairedSet); // 将当前任务在该位点的数据合并进pairset
|
|||
|
|
handlePairs(pos, pairedSet, vRePotentialDup, &lp.msPairDupIdx, &lp.msPairOpticalDupIdx);
|
|||
|
|
p.unpairedDic.erase(readName);
|
|||
|
|
}
|
|||
|
|
else if (g.unpairedDic.find(readName) != g.unpairedDic.end()) // 在全局中找
|
|||
|
|
{
|
|||
|
|
auto &prePe = g.unpairedDic.at(readName);
|
|||
|
|
modifyPairedEnds(unpairEnd, &prePe);
|
|||
|
|
int64_t pos = prePe.posKey;
|
|||
|
|
rmPairIdxAtPos(pos, &lp, &p);
|
|||
|
|
clearPairIdxAtPos(pos, &g.msPairDupIdx, &g.msPairOpticalDupIdx);
|
|||
|
|
auto &prePairSet = g.msPair[pos];
|
|||
|
|
prePairSet.insert(prePe);
|
|||
|
|
mergeToPairSet(pos, lp.msPair, &prePairSet);
|
|||
|
|
mergeToPairSet(pos, p.msPair, &prePairSet);
|
|||
|
|
|
|||
|
|
// if (pos == 3547574)
|
|||
|
|
// {
|
|||
|
|
// cout <<"here-1: " << pos << '\t' << prePairSet.size() << '\t' << readName << '\t'
|
|||
|
|
// << (p.msPair.find(pos) != p.msPair.end()) << '\t'
|
|||
|
|
// << (p.unpairedDic.find(readName) != p.unpairedDic.end()) << '\t'
|
|||
|
|
// << (g.unpairedDic.find(readName) != g.unpairedDic.end()) << endl;
|
|||
|
|
// }
|
|||
|
|
|
|||
|
|
handlePairs(pos, prePairSet, vRePotentialDup, &g.msPairDupIdx, &g.msPairOpticalDupIdx);
|
|||
|
|
g.unpairedDic.erase(readName);
|
|||
|
|
// g.msPair.erase(pos);
|
|||
|
|
}
|
|||
|
|
else // 插入全局数据中
|
|||
|
|
{
|
|||
|
|
int64_t pos = unpairEnd.posKey;
|
|||
|
|
rmPairIdxAtPos(pos, &lp, &p);
|
|||
|
|
g.unpairedDic.insert(unpair);
|
|||
|
|
mergeToPairSet(pos, lp.msPair, &g.msPair[pos]);
|
|||
|
|
mergeToPairSet(pos, p.msPair, &g.msPair[pos]);
|
|||
|
|
// if (pos == 3547574)
|
|||
|
|
// {
|
|||
|
|
// cout << "here-3: " << pos << '\t' << g.msPair[pos].size() << '\t' << readName << '\t'
|
|||
|
|
// << lp.msPair[pos].size() << '\t'
|
|||
|
|
// << p.msPair[pos].size() << '\t'
|
|||
|
|
// << (g.unpairedDic.find(readName) != g.unpairedDic.end()) << endl;
|
|||
|
|
// }
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
int64_t lastFragPos = 0;
|
|||
|
|
if (lp.msFrag.size() > 0)
|
|||
|
|
lastFragPos = lp.msFrag.rbegin()->first; // 上一轮read最后到达的坐标, frag
|
|||
|
|
for (auto &frag : p.msFrag) // 重叠的frag
|
|||
|
|
{
|
|||
|
|
const int64_t pos = frag.first;
|
|||
|
|
if (pos > lastFragPos)
|
|||
|
|
break;
|
|||
|
|
if (lp.msFrag.find(pos) != lp.msFrag.end()) // 上一个任务里也有这个位点,两个任务在相同的点位上都有数据,则需要重新计算该点位
|
|||
|
|
{
|
|||
|
|
auto &fragSet = lp.msFrag[pos];
|
|||
|
|
rmFragIdxAtPos(pos, &lp, &p);
|
|||
|
|
for (auto &curFrag : frag.second) // 改变了lp当前位点的paired set
|
|||
|
|
fragSet.insert(curFrag);
|
|||
|
|
handleFrags(pos, fragSet, vRePotentialDup, &lp.msFragDupIdx); // 把结果放在上个任务里
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* 当所有任务结束后,global data里还有未处理的数据 */
|
|||
|
|
static void handleLastTask(SerailMarkDupArg *task, GlobalDataArg *gDataArg)
|
|||
|
|
{
|
|||
|
|
auto &p = *task;
|
|||
|
|
auto &g = *gDataArg;
|
|||
|
|
|
|||
|
|
vector<const ReadEnds *> vRePotentialDup;
|
|||
|
|
for (auto &unpair : p.unpairedDic) // 最后一个任务中没有找到匹配的pair
|
|||
|
|
{
|
|||
|
|
auto &readName = unpair.first;
|
|||
|
|
auto &unpairEnd = unpair.second;
|
|||
|
|
if (g.unpairedDic.find(readName) != g.unpairedDic.end()) // 在全局中找
|
|||
|
|
{
|
|||
|
|
|
|||
|
|
auto &prePe = g.unpairedDic.at(readName);
|
|||
|
|
modifyPairedEnds(unpairEnd, &prePe);
|
|||
|
|
int64_t pos = prePe.posKey;
|
|||
|
|
clearPairIdxAtPos(pos, &p);
|
|||
|
|
auto &prePairSet = g.msPair[pos];
|
|||
|
|
prePairSet.insert(prePe);
|
|||
|
|
mergeToPairSet(pos, p.msPair, &prePairSet);
|
|||
|
|
handlePairs(pos, prePairSet, vRePotentialDup, &g.msPairDupIdx, &g.msPairOpticalDupIdx);
|
|||
|
|
g.unpairedDic.erase(readName);
|
|||
|
|
|
|||
|
|
// if (pos == 3547574)
|
|||
|
|
// {
|
|||
|
|
// cout << "here-2: " << pos << '\t' << prePairSet.size() << '\t' << readName << '\t'
|
|||
|
|
// << (p.msPair.find(pos) != p.msPair.end()) << '\t'
|
|||
|
|
// << (p.unpairedDic.find(readName) != p.unpairedDic.end()) << '\t'
|
|||
|
|
// << (g.unpairedDic.find(readName) != g.unpairedDic.end()) << endl;
|
|||
|
|
// }
|
|||
|
|
}
|
|||
|
|
else // 插入全局数据中
|
|||
|
|
{
|
|||
|
|
int64_t pos = unpairEnd.posKey;
|
|||
|
|
g.unpairedDic.insert(unpair);
|
|||
|
|
mergeToPairSet(pos, p.msPair, &g.msPair[pos]);
|
|||
|
|
// if (pos == 3547574)
|
|||
|
|
// {
|
|||
|
|
// cout << "here-4: " << pos << '\t' << g.msPair[pos].size() << '\t' << readName << '\t'
|
|||
|
|
// << (p.msPair.find(pos) != p.msPair.end()) << '\t'
|
|||
|
|
// << (p.unpairedDic.find(readName) != p.unpairedDic.end()) << '\t'
|
|||
|
|
// << (g.unpairedDic.find(readName) != g.unpairedDic.end()) << endl;
|
|||
|
|
// }
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 处理剩余的
|
|||
|
|
for (auto &pe : g.msPair)
|
|||
|
|
{
|
|||
|
|
// if (pe.first == 3547574)
|
|||
|
|
// {
|
|||
|
|
// cout << "here-4: " << pe.first << '\t' << g.msPair[pe.first].size() << endl;
|
|||
|
|
// }
|
|||
|
|
handlePairs(pe.first, pe.second, vRePotentialDup, &g.msPairDupIdx, &g.msPairOpticalDupIdx);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* 功能函数,将冗余索引添加进结果中 */
|
|||
|
|
static void addIdxToSet(map<int64_t, set<int64_t>> &taskDupIdx, set<int64_t> *resultSet)
|
|||
|
|
{
|
|||
|
|
for (auto &idxSet : taskDupIdx)
|
|||
|
|
{
|
|||
|
|
// cout << idxSet.first << '\t' << idxSet.second.size() << endl;
|
|||
|
|
for (auto idx : idxSet.second)
|
|||
|
|
{
|
|||
|
|
resultSet->insert(idx);
|
|||
|
|
gData.test[idxSet.first].insert(idx);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* 功能函数,将冗余(包括光学冗余)索引添加进结果中 */
|
|||
|
|
static void addOpticalIdxToSet(map<int64_t, set<int64_t>> &taskDupIdx, set<int64_t> *resultSet, unordered_set<int64_t> *opticalResult)
|
|||
|
|
{
|
|||
|
|
for (auto &idxSet : taskDupIdx)
|
|||
|
|
{
|
|||
|
|
// cout << idxSet.first << '\t' << idxSet.second.size() << endl;
|
|||
|
|
for (auto idx : idxSet.second)
|
|||
|
|
{
|
|||
|
|
resultSet->insert(idx);
|
|||
|
|
opticalResult->insert(idx);
|
|||
|
|
gData.test[idxSet.first].insert(idx);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* 将每轮任务得到的冗余index添加到全局结果里 */
|
|||
|
|
static void addTaskIdxToSet(SerailMarkDupArg *task, GlobalDataArg *gDataArg)
|
|||
|
|
{
|
|||
|
|
auto &p = *task;
|
|||
|
|
auto &g = *gDataArg;
|
|||
|
|
addIdxToSet(p.msPairDupIdx, &g.dupIdx);
|
|||
|
|
addOpticalIdxToSet(p.msPairOpticalDupIdx, &g.dupIdx, &g.opticalDupIdx);
|
|||
|
|
addIdxToSet(p.msFragDupIdx, &g.dupIdx);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* 将所有任务结束后,剩余的冗余index添加到全局结果里 */
|
|||
|
|
static void addGlobalIdxToSet(GlobalDataArg *gDataArg)
|
|||
|
|
{
|
|||
|
|
auto &g = *gDataArg;
|
|||
|
|
addIdxToSet(g.msPairDupIdx, &g.dupIdx);
|
|||
|
|
addOpticalIdxToSet(g.msPairOpticalDupIdx, &g.dupIdx, &g.opticalDupIdx);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* 串行处理数据,标记冗余 */
|
|||
|
|
static void serialMarkDups()
|
|||
|
|
{
|
|||
|
|
tm_arr[5].acc_start();
|
|||
|
|
Timer::log_time("serial start");
|
|||
|
|
// 读取缓存初始化
|
|||
|
|
BamBufType inBamBuf(g_gArg.use_asyncio);
|
|||
|
|
inBamBuf.Init(g_inBamFp, g_inBamHeader, g_gArg.max_mem);
|
|||
|
|
// BamBufType inBamBuf(false);
|
|||
|
|
//inBamBuf.Init(g_inBamFp, g_inBamHeader, 10 * 1024 * 1024);
|
|||
|
|
int64_t processedBamNum = 0;
|
|||
|
|
SerailMarkDupArg *lastArgP = nullptr;
|
|||
|
|
SerailMarkDupArg *sMdArg = nullptr;
|
|||
|
|
while (inBamBuf.ReadStat() >= 0)
|
|||
|
|
{
|
|||
|
|
// 读取bam文件中的read
|
|||
|
|
tm_arr[4].acc_start();
|
|||
|
|
size_t readNum = inBamBuf.ReadBam();
|
|||
|
|
tm_arr[4].acc_end();
|
|||
|
|
cout << "read num: " << readNum << endl;
|
|||
|
|
lastArgP = sMdArg;
|
|||
|
|
tm_arr[6].acc_start();
|
|||
|
|
sMdArg = new SerailMarkDupArg({processedBamNum,
|
|||
|
|
inBamBuf.GetBamArr()});
|
|||
|
|
tm_arr[6].acc_end();
|
|||
|
|
tm_arr[0].acc_start();
|
|||
|
|
generateReadEnds(sMdArg);
|
|||
|
|
tm_arr[0].acc_end();
|
|||
|
|
|
|||
|
|
tm_arr[1].acc_start();
|
|||
|
|
markdups(sMdArg);
|
|||
|
|
tm_arr[1].acc_end();
|
|||
|
|
|
|||
|
|
if (lastArgP != nullptr)
|
|||
|
|
{
|
|||
|
|
tm_arr[2].acc_start();
|
|||
|
|
handleIntersectData(lastArgP, sMdArg, &gData);
|
|||
|
|
addTaskIdxToSet(lastArgP, &gData);
|
|||
|
|
tm_arr[2].acc_end();
|
|||
|
|
tm_arr[7].acc_start();
|
|||
|
|
delete lastArgP; // 清除用过的数据
|
|||
|
|
tm_arr[7].acc_end();
|
|||
|
|
}
|
|||
|
|
inBamBuf.ClearAll(); // 清理上一轮读入的数据
|
|||
|
|
processedBamNum += readNum;
|
|||
|
|
}
|
|||
|
|
tm_arr[3].acc_start();
|
|||
|
|
// 处理剩下的全局数据
|
|||
|
|
handleLastTask(sMdArg, &gData);
|
|||
|
|
addTaskIdxToSet(sMdArg, &gData);
|
|||
|
|
addGlobalIdxToSet(&gData);
|
|||
|
|
tm_arr[3].acc_end();
|
|||
|
|
|
|||
|
|
// for (auto &e : gData.test)
|
|||
|
|
// {
|
|||
|
|
// cout << e.first << '\t' << e.second.size() << endl;
|
|||
|
|
// }
|
|||
|
|
tm_arr[5].acc_end();
|
|||
|
|
// 统计所有冗余index数量
|
|||
|
|
cout << "dup num : " << gData.dupIdx.size() << endl;
|
|||
|
|
|
|||
|
|
cout << "calc readend: " << tm_arr[0].acc_seconds_elapsed() << endl;
|
|||
|
|
cout << "markdup : " << tm_arr[1].acc_seconds_elapsed() << endl;
|
|||
|
|
cout << "handle tail : " << tm_arr[2].acc_seconds_elapsed() << endl;
|
|||
|
|
cout << "handle last : " << tm_arr[3].acc_seconds_elapsed() << endl;
|
|||
|
|
cout << "read bam : " << tm_arr[4].acc_seconds_elapsed() << endl;
|
|||
|
|
cout << "new arg : " << tm_arr[6].acc_seconds_elapsed() << endl;
|
|||
|
|
cout << "del arg : " << tm_arr[7].acc_seconds_elapsed() << endl;
|
|||
|
|
cout << "all : " << tm_arr[5].acc_seconds_elapsed() << endl;
|
|||
|
|
|
|||
|
|
Timer::log_time("serial end ");
|
|||
|
|
}
|