需要完善optical dup idx计算
This commit is contained in:
parent
86954ffa85
commit
7352a00f2c
|
|
@ -1,5 +1,6 @@
|
|||
# for fast-markdup
|
||||
*.sam
|
||||
*.bam
|
||||
*.log
|
||||
# ---> C++
|
||||
# Prerequisites
|
||||
|
|
@ -36,4 +37,4 @@
|
|||
*.app
|
||||
|
||||
lib/
|
||||
build/
|
||||
build/
|
||||
|
|
|
|||
|
|
@ -13,11 +13,11 @@
|
|||
"program": "${workspaceRoot}/build/bin/picard_cpp",
|
||||
"args": [
|
||||
"MarkDuplicates",
|
||||
"--INPUT", "~/data/bam/100w.bam",
|
||||
"--INPUT", "~/data/bam/zy_normal.bam",
|
||||
"--OUTPUT", "out.bam",
|
||||
"--METRICS_FILE", "metrics.txt",
|
||||
"--num_threads", "1",
|
||||
"--max_mem", "4G",
|
||||
"--max_mem", "100G",
|
||||
"--verbosity", "DEBUG",
|
||||
"--asyncio", "true",
|
||||
],
|
||||
|
|
|
|||
8
run.sh
8
run.sh
|
|
@ -1,6 +1,8 @@
|
|||
#input=~/data/bam/zy_normal.bam
|
||||
input=~/data/bam/zy_tumor.bam
|
||||
input=~/data/bam/zy_normal.bam
|
||||
#input=~/data/bam/zy_tumor.bam
|
||||
#input=~/data/bam/100w.bam
|
||||
#input=~/data/bam/1kw.sam
|
||||
#input=~/data/bam/n1kw.sam
|
||||
|
||||
time /home/zzh/work/ngs/picard_cpp/build/bin/picard_cpp \
|
||||
MarkDuplicates \
|
||||
|
|
@ -8,7 +10,7 @@ time /home/zzh/work/ngs/picard_cpp/build/bin/picard_cpp \
|
|||
--OUTPUT ~/data/bam/out.bam \
|
||||
--INDEX_FORMAT BAI \
|
||||
--num_threads 1 \
|
||||
--max_mem 2G \
|
||||
--max_mem 1G \
|
||||
--verbosity DEBUG \
|
||||
--asyncio true #\
|
||||
#--READ_NAME_REGEX ".*?([0-9]+):([0-9]+):([0-9]+)$"
|
||||
|
|
|
|||
|
|
@ -71,7 +71,8 @@ void GlobalArg::parseArgument(int argNum) {
|
|||
mem_arg <<= 20;
|
||||
else if (*q == 'g' || *q == 'G')
|
||||
mem_arg <<= 30;
|
||||
if (mem_arg >= max_mem)
|
||||
//if (mem_arg >= max_mem)
|
||||
if (true)
|
||||
max_mem = mem_arg;
|
||||
else {
|
||||
std::cerr << "[Warn] Too small mem size, use default" << std::endl;
|
||||
|
|
|
|||
|
|
@ -57,6 +57,10 @@ static GlobalDataArg gData_;
|
|||
GlobalDataArg &gData = gData_;
|
||||
DuplicationMetrics gMetrics_;
|
||||
DuplicationMetrics &gMetrics = gMetrics_;
|
||||
|
||||
int zzhtestnum = 0;
|
||||
set<int64_t> zzhopticalSet;
|
||||
vector<int64_t> zzhopticalArr;
|
||||
/*
|
||||
* mark duplicate
|
||||
* 入口,假定bam是按照比对后的坐标排序的,同一个样本的话不需要考虑barcode的问题
|
||||
|
|
@ -161,11 +165,11 @@ int MarkDuplicates(int argc, char *argv[]) {
|
|||
while (inBuf.ReadStat() >= 0) {
|
||||
Timer tw1;
|
||||
size_t readNum = inBuf.ReadBam();
|
||||
cout << "read: " << readNum << endl;
|
||||
// cout << "read: " << readNum << endl;
|
||||
for (size_t i = 0; i < inBuf.Size(); ++i) {
|
||||
/* 判断是否冗余 */
|
||||
if (bamIdx == dupIdx) {
|
||||
// cout << "冗余" << bamIdx << endl;
|
||||
// cerr << bamIdx << endl;
|
||||
dupIdx = idxQue.Pop();
|
||||
}
|
||||
if (sam_write1(g_outBamFp, g_outBamHeader, inBuf[i]->b) < 0) {
|
||||
|
|
|
|||
|
|
@ -213,7 +213,7 @@ struct MarkDupsArg
|
|||
ns_md::SortOrder ASSUME_SORT_ORDER = ns_md::SortOrder::unsorted;
|
||||
|
||||
/* "The scoring strategy for choosing the non-duplicate among candidates." */
|
||||
ns_md::ScoringStrategy DUPLICATE_SCORING_STRATEGY = ns_md::ScoringStrategy::TOTAL_MAPPED_REFERENCE_LENGTH;
|
||||
ns_md::ScoringStrategy DUPLICATE_SCORING_STRATEGY = ns_md::ScoringStrategy::SUM_OF_BASE_QUALITIES;
|
||||
|
||||
/* "The program record ID for the @PG record(s) created by this program. Set to null to disable " +
|
||||
"PG record creation. This string may have a suffix appended to avoid collision with other " +
|
||||
|
|
|
|||
|
|
@ -28,8 +28,6 @@ using std::set;
|
|||
using std::unordered_map;
|
||||
using std::vector;
|
||||
|
||||
static int zzhtestnum = 0;
|
||||
|
||||
/* 清除key位置的数据 */
|
||||
void clearIdxAtPos(int64_t key, map<int64_t, set<int64_t>> *pmsIdx) {
|
||||
auto &msIdx = *pmsIdx;
|
||||
|
|
@ -247,6 +245,7 @@ void handleFrags(int64_t posKey, vector<ReadEnds> &readEnds,
|
|||
/* 对找到的pairend read end添加一些信息 */
|
||||
void modifyPairedEnds(const ReadEnds &fragEnd, ReadEnds *pPairedEnds) {
|
||||
auto &pairedEnds = *pPairedEnds;
|
||||
|
||||
int64_t bamIdx = fragEnd.read1IndexInFile;
|
||||
const int matesRefIndex = fragEnd.read1ReferenceIndex;
|
||||
const int matesCoordinate = fragEnd.read1Coordinate;
|
||||
|
|
@ -459,14 +458,23 @@ static int checkOpticalDuplicates(vector<const ReadEnds *> &readEndsArr, const R
|
|||
findOpticalDuplicates(readEndsArr, pBestRe, &opticalDuplicateFlags);
|
||||
int opticalDuplicates = 0;
|
||||
for (int i = 0; i < opticalDuplicateFlags.size(); ++i) {
|
||||
ReadEnds *pRe = const_cast<ReadEnds *>(readEndsArr[i]);
|
||||
if (opticalDuplicateFlags[i]) {
|
||||
++opticalDuplicates;
|
||||
ReadEnds *pRe = const_cast<ReadEnds *>(readEndsArr[i]);
|
||||
// if (zzhopticalSet.find(pRe->read1IndexInFile) != zzhopticalSet.end()) {
|
||||
// cout << "val: " << pRe->isOpticalDuplicate << endl;
|
||||
// }
|
||||
pRe->isOpticalDuplicate = true;
|
||||
zzhopticalSet.insert(pRe->read1IndexInFile);
|
||||
zzhopticalSet.insert(pRe->read2IndexInFile);
|
||||
zzhopticalArr.push_back(pRe->read1IndexInFile);
|
||||
zzhopticalArr.push_back(pRe->read2IndexInFile);
|
||||
} else {
|
||||
pRe->isOpticalDuplicate = false;
|
||||
zzhopticalSet.erase(pRe->read1IndexInFile);
|
||||
zzhopticalSet.erase(pRe->read2IndexInFile);
|
||||
}
|
||||
}
|
||||
if (opticalDuplicates > 0)
|
||||
gMetrics.OpticalDuplicatesByLibraryId += opticalDuplicates;
|
||||
return opticalDuplicates;
|
||||
}
|
||||
|
||||
|
|
@ -475,8 +483,11 @@ static int checkOpticalDuplicates(vector<const ReadEnds *> &readEndsArr, const R
|
|||
*/
|
||||
void trackOpticalDuplicates(vector<const ReadEnds *> &readEndsArr, const ReadEnds *pBestRe) {
|
||||
bool hasFR = false, hasRF = false;
|
||||
int prevOpticalDupNum = 0;
|
||||
// Check to see if we have a mixture of FR/RF
|
||||
for (auto pRe : readEndsArr) {
|
||||
if (pRe->isOpticalDuplicate)
|
||||
++prevOpticalDupNum;
|
||||
if (ReadEnds::FR == pRe->orientationForOpticalDuplicates)
|
||||
hasFR = true;
|
||||
else if (ReadEnds::RF == pRe->orientationForOpticalDuplicates)
|
||||
|
|
@ -513,5 +524,10 @@ void trackOpticalDuplicates(vector<const ReadEnds *> &readEndsArr, const ReadEnd
|
|||
if (nOpticalDup)
|
||||
gMetrics.OpticalDuplicatesCountHist += nOpticalDup + 1;
|
||||
|
||||
|
||||
gMetrics.OpticalDuplicatesByLibraryId += nOpticalDup - prevOpticalDupNum;
|
||||
//gMetrics.OpticalDuplicatesByLibraryId += nOpticalDup;
|
||||
|
||||
// cout << "zzh optical:" << (++zzhtestnum) << "\t" << readEndsArr.size() << "\t" << nOpticalDup << endl;
|
||||
// cerr << (zzhtestnum++) << " " << readEndsArr.size() << ":" << nOpticalDup << endl;
|
||||
}
|
||||
|
|
@ -13,10 +13,10 @@
|
|||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include "dup_metrics.h"
|
||||
#include "markdups_arg.h"
|
||||
#include "md_funcs.h"
|
||||
#include "shared_args.h"
|
||||
#include "dup_metrics.h"
|
||||
|
||||
using std::cout;
|
||||
using std::set;
|
||||
|
|
@ -78,25 +78,28 @@ static void markDupsForPairs(vector<const ReadEnds *> &vpRe, set<int64_t> *dupId
|
|||
}
|
||||
int maxScore = 0;
|
||||
const ReadEnds *pBest = nullptr;
|
||||
int maxOperateTime = 0;
|
||||
/** All read ends should have orientation FF, FR, RF, or RR **/
|
||||
for (auto pe : vpRe) { // 找分数最高的readend
|
||||
for (auto pe : vpRe) { // 找分数最高的readend
|
||||
maxOperateTime = max(maxOperateTime, pe->oprateTime);
|
||||
(const_cast<ReadEnds *>(pe))->oprateTime ++;
|
||||
if (pe->score > maxScore || pBest == nullptr) {
|
||||
maxScore = pe->score;
|
||||
pBest = pe;
|
||||
}
|
||||
}
|
||||
// cerr << zzhtestnum << " best: " << vpRe.size() << " " << pBest->read1IndexInFile << "-" << pBest->read2IndexInFile << endl;
|
||||
// if (maxOperateTime == 0) ++zzhtestnum;
|
||||
if (notDupIdx != nullptr) {
|
||||
notDupIdx->insert(pBest->read1IndexInFile);
|
||||
notDupIdx->insert(pBest->read2IndexInFile);
|
||||
}
|
||||
if (!g_mdArg.READ_NAME_REGEX.empty()) { // 检查光学冗余
|
||||
if (!g_mdArg.READ_NAME_REGEX.empty()) { // 检查光学冗余
|
||||
// trackOpticalDuplicates
|
||||
trackOpticalDuplicates(vpRe, pBest);
|
||||
}
|
||||
for (auto pe : vpRe) // 对非best read标记冗余
|
||||
{
|
||||
if (pe != pBest) // 非best
|
||||
{
|
||||
for (auto pe : vpRe) { // 对非best read标记冗余
|
||||
if (pe != pBest) { // 非best
|
||||
dupIdx->insert(pe->read1IndexInFile); // 添加read1
|
||||
if (pe->read2IndexInFile != pe->read1IndexInFile)
|
||||
dupIdx->insert(pe->read2IndexInFile); // 添加read2
|
||||
|
|
@ -139,8 +142,7 @@ static void markDupsForFrags(vector<const ReadEnds *> &vpRe, bool containsPairs,
|
|||
|
||||
/* 找到与readend pos相等的所有readend */
|
||||
static void getEqualRE(const ReadEnds &re, vector<ReadEnds> &src, vector<ReadEnds> *dst) {
|
||||
auto range = std::equal_range(src.begin(), src.end(), re,
|
||||
ReadEnds::PairsLittleThan); // 只比对位点
|
||||
auto range = std::equal_range(src.begin(), src.end(), re, ReadEnds::PairsLittleThan); // 只比对位点
|
||||
dst->insert(dst->end(), range.first, range.second);
|
||||
}
|
||||
|
||||
|
|
@ -158,24 +160,24 @@ static void generateReadEnds(SerailMarkDupArg *arg) {
|
|||
// set<ReadEnds> reSet;
|
||||
// ReadEnds lastRe;
|
||||
|
||||
for (int i = 0; i < p.bams.size(); ++i) { // 循环处理每个read
|
||||
for (int i = 0; i < p.bams.size(); ++i) { // 循环处理每个read
|
||||
BamWrap *bw = p.bams[i];
|
||||
const int64_t bamIdx = p.bamStartIdx + i;
|
||||
if (bw->GetReadUnmappedFlag()) {
|
||||
if (bw->b->core.tid == -1)
|
||||
// When we hit the unmapped reads with no coordinate, no reason to continue (only in coordinate sort).
|
||||
break;
|
||||
} else if (!bw->IsSecondaryOrSupplementary()) { // 是主要比对
|
||||
} else if (!bw->IsSecondaryOrSupplementary()) { // 是主要比对
|
||||
ReadEnds fragEnd;
|
||||
tm_arr[8].acc_start();
|
||||
buildReadEnds(*bw, bamIdx, rnParser, &fragEnd);
|
||||
tm_arr[8].acc_end();
|
||||
p.frags.push_back(fragEnd); // 添加进frag集合
|
||||
if (bw->GetReadPairedFlag() && !bw->GetMateUnmappedFlag()) { // 是pairend而且互补的read也比对上了
|
||||
p.frags.push_back(fragEnd); // 添加进frag集合
|
||||
if (bw->GetReadPairedFlag() && !bw->GetMateUnmappedFlag()) { // 是pairend而且互补的read也比对上了
|
||||
string key = bw->query_name();
|
||||
if (p.unpairedDic.find(key) == p.unpairedDic.end()) {
|
||||
p.unpairedDic[key] = {p.taskSeq, fragEnd};
|
||||
} else { // 找到了pairend
|
||||
} else { // 找到了pairend
|
||||
auto &pairedEnds = p.unpairedDic.at(key).unpairedRE;
|
||||
modifyPairedEnds(fragEnd, &pairedEnds);
|
||||
p.pairs.push_back(pairedEnds);
|
||||
|
|
@ -185,8 +187,8 @@ static void generateReadEnds(SerailMarkDupArg *arg) {
|
|||
}
|
||||
}
|
||||
tm_arr[9].acc_start();
|
||||
//sortReadEndsArr(p.frags);
|
||||
sort(p.frags.begin(), p.frags.end());
|
||||
sortReadEndsArr(p.frags);
|
||||
// sort(p.frags.begin(), p.frags.end());
|
||||
tm_arr[9].acc_end();
|
||||
// cout << "sort pairs" << endl;
|
||||
tm_arr[10].acc_start();
|
||||
|
|
@ -277,7 +279,7 @@ static inline void getIntersectData(vector<ReadEnds> &leftArr, vector<ReadEnds>
|
|||
|
||||
while (!ReadEnds::ReadLittleThan(leftArr[leftEndIdx - leftSpan], rightArr[rightStartIdx], isPairCmp)) {
|
||||
leftSpan += 1;
|
||||
if (leftSpan > leftEndIdx) { // 上一个的范围被下一个全部包围了(可能会有bug,上上个也与下一个有交集呢?)
|
||||
if (leftSpan > leftEndIdx) { // 上一个的范围被下一个全部包围了(可能会有bug,上上个也与下一个有交集呢?)
|
||||
leftSpan = leftArr.size() - 1;
|
||||
break;
|
||||
}
|
||||
|
|
@ -285,7 +287,7 @@ static inline void getIntersectData(vector<ReadEnds> &leftArr, vector<ReadEnds>
|
|||
|
||||
while (!ReadEnds::ReadLittleThan(leftArr[leftEndIdx], rightArr[rightSpan], isPairCmp)) {
|
||||
rightSpan += 1;
|
||||
if (rightSpan == rightArr.size() - 1) // 同上,可能会有bug
|
||||
if (rightSpan == rightArr.size() - 1) // 同上,可能会有bug
|
||||
break;
|
||||
}
|
||||
dst->insert(dst->end(), leftArr.end() - leftSpan, leftArr.end());
|
||||
|
|
@ -432,21 +434,18 @@ static void handleIntersectData(SerailMarkDupArg *lastArg, SerailMarkDupArg *cur
|
|||
// 1.
|
||||
// prevpos在交叉部分之前,nextpos在交叉部分之后,这种情况不需要获取pairarr中的数据;
|
||||
// 2.
|
||||
// prevpos在交叉部分之前,nextpos在交叉部分,需要获取lp中的相等read
|
||||
// pair进行重新计算
|
||||
// 复杂情况1.
|
||||
// g中包含prevPosKey对应的unpair,p中有对应的pair,此时应该把这些pair考虑进去
|
||||
// prevpos在交叉部分之前,nextpos在交叉部分,需要获取lp中的相等read pair进行重新计算
|
||||
// 复杂情况1. g中包含prevPosKey对应的unpair,p中有对应的pair,此时应该把这些pair考虑进去
|
||||
// 3.
|
||||
// prevpos在交叉部分,nextpos在交叉部分之后,需要获取p中的相等read
|
||||
// pair进行重新计算
|
||||
// 复杂情况2. p中是否包含prevPosKey对应的unpair
|
||||
// prevpos在交叉部分,nextpos在交叉部分之后,需要获取p中的相等read pair进行重新计算
|
||||
// 复杂情况2. p中是否包含prevPosKey对应的unpair
|
||||
// 4.
|
||||
// prevpos在交叉部分,nextpos在交叉部分,需要获取lp和p中的相等read
|
||||
// pair进行重新计算
|
||||
// prevpos在交叉部分,nextpos在交叉部分,需要获取lp和p中的相等read pair进行重新计算
|
||||
|
||||
bool addDataToPos = true;
|
||||
if (alreadyAdd.find(ck) != alreadyAdd.end()) {
|
||||
addDataToPos = false; // 之前已经添加过了,后面就不用再添加数据了
|
||||
// 之前已经添加过了,后面就不用再添加数据了,因为同一个位置可能找到两个及以上的unpair数据,处理之前的数据时候可能已经添加了这些数据
|
||||
addDataToPos = false;
|
||||
} else
|
||||
alreadyAdd.insert(ck);
|
||||
|
||||
|
|
@ -478,9 +477,9 @@ static void handleIntersectData(SerailMarkDupArg *lastArg, SerailMarkDupArg *cur
|
|||
}
|
||||
recalcPos[ck] = prevPosInfo.taskSeq;
|
||||
std::sort(prevPairArr.begin(), prevPairArr.end());
|
||||
} else { // prevpos在交叉部分
|
||||
if (nextPosKey > prevLastPos) { // nextpos在交叉部分之后 第三种情况
|
||||
if (nextUnpairInfoP != nullptr) { // 且在pos点,next task有unpair,这样才把这些数据放到next task里
|
||||
} else { // prevpos在交叉部分
|
||||
if (nextPosKey > prevLastPos) { // nextpos在交叉部分之后 第三种情况
|
||||
if (nextUnpairInfoP != nullptr) { // 且在pos点,next task有unpair,这样才把这些数据放到next task里
|
||||
auto &nextPairArr = nextUnpairInfoP->pairArr;
|
||||
nextPairArr.push_back(prevFragEnd);
|
||||
auto &prevPairArr = prevUnpairInfoP->pairArr;
|
||||
|
|
@ -490,9 +489,9 @@ static void handleIntersectData(SerailMarkDupArg *lastArg, SerailMarkDupArg *cur
|
|||
}
|
||||
// 将数据放到next task里,(这个位点以后会可能还会计算到,目前方案是都计算,只是把冗余剔除)
|
||||
recalcPos[ck] = nextPosInfo.taskSeq;
|
||||
|
||||
|
||||
std::sort(prevPairArr.begin(), prevPairArr.end());
|
||||
} else { // next task在该位点没有unpair,那就把数据放到prev task里
|
||||
} else { // next task在该位点没有unpair,那就把数据放到prev task里
|
||||
auto &prevPairArr = prevUnpairInfoP->pairArr; // prevUnpairInfoP肯定不是nullptr
|
||||
prevPairArr.push_back(prevFragEnd);
|
||||
if (addDataToPos) // 第二种情况
|
||||
|
|
@ -515,8 +514,8 @@ static void handleIntersectData(SerailMarkDupArg *lastArg, SerailMarkDupArg *cur
|
|||
std::sort(prevPairArr.begin(), prevPairArr.end());
|
||||
}
|
||||
}
|
||||
p.unpairedDic.erase(readName); // 在next task里删除该read
|
||||
} else if (g.unpairedDic.find(readName) != g.unpairedDic.end()) { // 在遗留数据中找到了匹配的read
|
||||
p.unpairedDic.erase(readName); // 在next task里删除该read
|
||||
} else if (g.unpairedDic.find(readName) != g.unpairedDic.end()) { // 在遗留数据中找到了匹配的read
|
||||
auto &remainPosInfo = g.unpairedDic[readName];
|
||||
auto remainFragEnd = remainPosInfo.unpairedRE;
|
||||
int64_t remainPosKey = remainFragEnd.posKey;
|
||||
|
|
@ -529,14 +528,12 @@ static void handleIntersectData(SerailMarkDupArg *lastArg, SerailMarkDupArg *cur
|
|||
std::sort(remainPairArr.begin(), remainPairArr.end());
|
||||
|
||||
g.unpairedDic.erase(readName);
|
||||
} else { // 都没找到,那就保存到遗留数据里
|
||||
} else { // 都没找到,那就保存到遗留数据里
|
||||
int64_t prevPosKey = prevFragEnd.posKey;
|
||||
g.unpairedDic.insert(prevUnpair);
|
||||
addToGlobal.insert(prevPosKey);
|
||||
}
|
||||
}
|
||||
// 最后再添加,以防开始赋值,后来这个位置要是又添加了新的数据
|
||||
for (auto posKey : addToGlobal) g.unpairedPosArr[posKey] = lp.unpairedPosArr[posKey];
|
||||
|
||||
map<int64_t, TaskSeqDupInfo> taskChanged;
|
||||
set<int64_t> posProcessed;
|
||||
|
|
@ -557,8 +554,12 @@ static void handleIntersectData(SerailMarkDupArg *lastArg, SerailMarkDupArg *cur
|
|||
if (taskSeq < lp.taskSeq)
|
||||
g.unpairedPosArr.erase(posKey);
|
||||
}
|
||||
// 更新结果
|
||||
|
||||
// 最后再添加,以防开始赋值,后来这个位置要是又添加了新的数据
|
||||
// 放在这里,因为lp中的unpairedPosArr中的readends可能会被修改(比如optical duplicate)
|
||||
for (auto posKey : addToGlobal) g.unpairedPosArr[posKey] = lp.unpairedPosArr[posKey];
|
||||
|
||||
// 更新结果
|
||||
for (auto &e : taskChanged) {
|
||||
auto taskSeq = e.first;
|
||||
auto &t = e.second;
|
||||
|
|
@ -594,12 +595,12 @@ static void handleLastTask(SerailMarkDupArg *task, GlobalDataArg *gDataArg) {
|
|||
auto &lp = *task;
|
||||
auto &g = *gDataArg;
|
||||
// 遗留的未匹配的pair
|
||||
for (auto &prevUnpair : lp.unpairedDic) { // 遍历上一个任务中的每个未匹配的read
|
||||
for (auto &prevUnpair : lp.unpairedDic) { // 遍历上一个任务中的每个未匹配的read
|
||||
auto &readName = prevUnpair.first;
|
||||
auto &prevPosInfo = prevUnpair.second;
|
||||
auto prevFragEnd = prevPosInfo.unpairedRE; // 未匹配的read end
|
||||
|
||||
if (g.unpairedDic.find(readName) != g.unpairedDic.end()) { // 在遗留数据中找到了匹配的read
|
||||
if (g.unpairedDic.find(readName) != g.unpairedDic.end()) { // 在遗留数据中找到了匹配的read
|
||||
auto &remainPosInfo = g.unpairedDic[readName];
|
||||
auto remainFragEnd = remainPosInfo.unpairedRE;
|
||||
int64_t remainPosKey = remainFragEnd.posKey;
|
||||
|
|
@ -719,8 +720,8 @@ void serialMarkDups() {
|
|||
// cout << "round time: " << t_round.seconds_elapsed() << endl;
|
||||
roundNum++;
|
||||
if (roundNum % 100 == 0) {
|
||||
cout << "read sum: " << readNumSum << endl;
|
||||
cout << "round time: " << t_round.seconds_elapsed() * 100 << " s" << endl;
|
||||
//cout << "read sum: " << readNumSum << endl;
|
||||
//cout << "round time: " << t_round.seconds_elapsed() * 100 << " s" << endl;
|
||||
}
|
||||
}
|
||||
// cout << "here" << endl;
|
||||
|
|
@ -769,10 +770,10 @@ void serialMarkDups() {
|
|||
cout << "sort frags : " << tm_arr[9].acc_seconds_elapsed() << endl;
|
||||
cout << "sort pairs : " << tm_arr[10].acc_seconds_elapsed() << endl;
|
||||
cout << "all : " << tm_arr[5].acc_seconds_elapsed() << endl;
|
||||
cout << "metrics: " << gMetrics.DuplicateCountHist << "\t"
|
||||
<< gMetrics.NonOpticalDuplicateCountHist << "\t"
|
||||
<< gMetrics.OpticalDuplicatesCountHist << "\t"
|
||||
<< gMetrics.OpticalDuplicatesByLibraryId << endl;
|
||||
cout << "metrics: " << gMetrics.DuplicateCountHist << "\t" << gMetrics.NonOpticalDuplicateCountHist << "\t"
|
||||
<< gMetrics.OpticalDuplicatesCountHist << "\t" << gMetrics.OpticalDuplicatesByLibraryId << endl;
|
||||
cout << "optical dup: " << zzhopticalSet.size() << endl;
|
||||
cout << "optical arr dup: " << zzhopticalArr.size() << endl;
|
||||
|
||||
Timer::log_time("serial end ");
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@
|
|||
#include <htslib/thread_pool.h>
|
||||
#include <sam/utils/read_ends.h>
|
||||
#include <sam/utils/read_name_parser.h>
|
||||
#include <set>
|
||||
using std::set;
|
||||
|
||||
extern Timer tm_arr[20]; // 用来测试性能
|
||||
/* 全局本地变量 */
|
||||
|
|
@ -23,4 +25,8 @@ extern MarkDupsArg &g_mdArg;
|
|||
class GlobalDataArg;
|
||||
extern GlobalDataArg &gData;
|
||||
class DuplicationMetrics;
|
||||
extern DuplicationMetrics &gMetrics;
|
||||
extern DuplicationMetrics &gMetrics;
|
||||
|
||||
extern int zzhtestnum;
|
||||
extern set<int64_t> zzhopticalSet;
|
||||
extern vector<int64_t> zzhopticalArr;
|
||||
|
|
@ -77,6 +77,9 @@ struct ReadEnds : PhysicalLocation {
|
|||
int64_t posKey = -1; // 根据位置信息生成的关键字 return (int64_t)tid <<
|
||||
// MAX_CONTIG_LEN_SHIFT | (int64_t)pos;
|
||||
|
||||
/* 用来做一些判断,因为一些readends会做多次操作,比如task之间有重叠等等 */
|
||||
int oprateTime = 0;
|
||||
|
||||
/* 根据pairend read的比对方向,来确定整体的比对方向 */
|
||||
static int8_t GetOrientationByte(bool read1NegativeStrand,
|
||||
bool read2NegativeStrand) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue