修改了一些格式
This commit is contained in:
parent
f84d7bb0dc
commit
899d40cbda
4
run.sh
4
run.sh
|
|
@ -1,7 +1,7 @@
|
||||||
#input=~/data/bam/zy_normal.bam
|
#input=~/data/bam/zy_normal.bam
|
||||||
input=~/data/bam/zy_tumor.bam
|
#input=~/data/bam/zy_tumor.bam
|
||||||
#input=~/data/bam/100w.bam
|
#input=~/data/bam/100w.bam
|
||||||
#input=~/data/bam/1kw.sam
|
input=~/data/bam/1kw.sam
|
||||||
#input=~/data/bam/n1kw.sam
|
#input=~/data/bam/n1kw.sam
|
||||||
|
|
||||||
time /home/zzh/work/ngs/picard_cpp/build/bin/picard_cpp \
|
time /home/zzh/work/ngs/picard_cpp/build/bin/picard_cpp \
|
||||||
|
|
|
||||||
28
src/main.cpp
28
src/main.cpp
|
|
@ -1,17 +1,15 @@
|
||||||
|
#include <htslib/sam.h>
|
||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <htslib/sam.h>
|
|
||||||
#include "module.h"
|
#include "module.h"
|
||||||
|
|
||||||
/* 版本信息 */
|
/* 版本信息 */
|
||||||
const char *version()
|
const char *version() { return PICARD_CPP_VERSION; }
|
||||||
{
|
|
||||||
return PICARD_CPP_VERSION;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* 使用说明 */
|
/* 使用说明 */
|
||||||
static void usage(FILE *fp)
|
static void usage(FILE *fp) {
|
||||||
{
|
|
||||||
fprintf(fp,
|
fprintf(fp,
|
||||||
"\n"
|
"\n"
|
||||||
"Program: picard_cpp (A cpp implementation for picard.)\n"
|
"Program: picard_cpp (A cpp implementation for picard.)\n"
|
||||||
|
|
@ -25,19 +23,14 @@ static void usage(FILE *fp)
|
||||||
"\n");
|
"\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[]) {
|
||||||
{
|
if (argc < 2) {
|
||||||
|
|
||||||
if (argc < 2)
|
|
||||||
{
|
|
||||||
usage(stderr);
|
usage(stderr);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (strcmp(argv[1], "help") == 0 || strcmp(argv[1], "--help") == 0)
|
if (strcmp(argv[1], "help") == 0 || strcmp(argv[1], "--help") == 0) {
|
||||||
{
|
if (argc == 2) {
|
||||||
if (argc == 2)
|
|
||||||
{
|
|
||||||
usage(stdout);
|
usage(stdout);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -49,8 +42,7 @@ int main(int argc, char *argv[])
|
||||||
|
|
||||||
if (strcmp(argv[1], "MarkDuplicates") == 0)
|
if (strcmp(argv[1], "MarkDuplicates") == 0)
|
||||||
ret = MarkDuplicates(argc - 1, argv + 1);
|
ret = MarkDuplicates(argc - 1, argv + 1);
|
||||||
else
|
else {
|
||||||
{
|
|
||||||
fprintf(stderr, "\n[Error]: unrecognized command '%s'\n\n", argv[1]);
|
fprintf(stderr, "\n[Error]: unrecognized command '%s'\n\n", argv[1]);
|
||||||
usage(stdout);
|
usage(stdout);
|
||||||
return 1;
|
return 1;
|
||||||
|
|
|
||||||
|
|
@ -26,12 +26,12 @@ Date : 2023/10/23
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "dup_metrics.h"
|
||||||
#include "markdups_arg.h"
|
#include "markdups_arg.h"
|
||||||
#include "md_funcs.h"
|
#include "md_funcs.h"
|
||||||
#include "parallel_md.h"
|
#include "parallel_md.h"
|
||||||
#include "serial_md.h"
|
#include "serial_md.h"
|
||||||
#include "shared_args.h"
|
#include "shared_args.h"
|
||||||
#include "dup_metrics.h"
|
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using std::cout;
|
using std::cout;
|
||||||
|
|
|
||||||
|
|
@ -102,8 +102,8 @@ static void markDupsForPairs(vector<const ReadEnds *> &vpRe, DPSet<DupInfo> *dup
|
||||||
cout << "mark pair end: " << endl;
|
cout << "mark pair end: " << endl;
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
// cerr << zzhtestnum << " best: " << vpRe.size() << " " << pBest->read1IndexInFile << "-" << pBest->read2IndexInFile << endl;
|
// cerr << zzhtestnum << " best: " << vpRe.size() << " " << pBest->read1IndexInFile << "-" <<
|
||||||
// if (maxOperateTime == 0) ++zzhtestnum;
|
// pBest->read2IndexInFile << endl; if (maxOperateTime == 0) ++zzhtestnum;
|
||||||
if (notDupIdx != nullptr) {
|
if (notDupIdx != nullptr) {
|
||||||
notDupIdx->insert(pBest->read1IndexInFile);
|
notDupIdx->insert(pBest->read1IndexInFile);
|
||||||
notDupIdx->insert(pBest->read2IndexInFile);
|
notDupIdx->insert(pBest->read2IndexInFile);
|
||||||
|
|
@ -370,13 +370,22 @@ static inline void refreshFragDupIdx(DPSet<DupInfo> &dupIdx, MDSet<int64_t> ¬
|
||||||
|
|
||||||
/* 将pairs重叠部分的dup idx放进数据中 */
|
/* 将pairs重叠部分的dup idx放进数据中 */
|
||||||
static inline void refreshPairDupIdx(DPSet<DupInfo> &dupIdx, MDSet<int64_t> &opticalDupIdx, DPSet<DupInfo> &repIdx,
|
static inline void refreshPairDupIdx(DPSet<DupInfo> &dupIdx, MDSet<int64_t> &opticalDupIdx, DPSet<DupInfo> &repIdx,
|
||||||
MDSet<int64_t> ¬DupIdx, MDSet<int64_t> ¬OpticalDupIdx, MDSet<int64_t> ¬RepIdx,
|
MDSet<int64_t> ¬DupIdx, MDSet<int64_t> ¬OpticalDupIdx,
|
||||||
SerailMarkDupArg *lastArg, SerailMarkDupArg *curArg) {
|
MDSet<int64_t> ¬RepIdx, SerailMarkDupArg *lastArg, SerailMarkDupArg *curArg) {
|
||||||
auto &lp = *lastArg;
|
auto &lp = *lastArg;
|
||||||
auto &p = *curArg;
|
auto &p = *curArg;
|
||||||
for (auto idx : dupIdx) { lp.pairDupIdx.insert(idx); p.pairDupIdx.erase(idx); }
|
for (auto idx : dupIdx) {
|
||||||
for (auto idx : opticalDupIdx) { lp.pairOpticalDupIdx.insert(idx); p.pairOpticalDupIdx.erase(idx); }
|
lp.pairDupIdx.insert(idx);
|
||||||
for (auto idx : repIdx) { lp.pairRepIdx.insert(idx); p.pairRepIdx.erase(idx); }
|
p.pairDupIdx.erase(idx);
|
||||||
|
}
|
||||||
|
for (auto idx : opticalDupIdx) {
|
||||||
|
lp.pairOpticalDupIdx.insert(idx);
|
||||||
|
p.pairOpticalDupIdx.erase(idx);
|
||||||
|
}
|
||||||
|
for (auto idx : repIdx) {
|
||||||
|
lp.pairRepIdx.insert(idx);
|
||||||
|
p.pairRepIdx.erase(idx);
|
||||||
|
}
|
||||||
// for (auto idx : notDupIdx) {
|
// for (auto idx : notDupIdx) {
|
||||||
// if (lp.pairDupIdx.find(idx) != lp.pairDupIdx.end()) cout << "find-1: " << idx << endl;
|
// if (lp.pairDupIdx.find(idx) != lp.pairDupIdx.end()) cout << "find-1: " << idx << endl;
|
||||||
// if (lp.pairDupIdx.find({idx}) != lp.pairDupIdx.end()) cout << "find-2: " << idx << endl;
|
// if (lp.pairDupIdx.find({idx}) != lp.pairDupIdx.end()) cout << "find-2: " << idx << endl;
|
||||||
|
|
@ -384,15 +393,22 @@ static inline void refreshPairDupIdx(DPSet<DupInfo> &dupIdx, MDSet<int64_t> &opt
|
||||||
// if (p.pairDupIdx.find({idx}) != p.pairDupIdx.end()) cout << "find-4: " << idx << endl;
|
// if (p.pairDupIdx.find({idx}) != p.pairDupIdx.end()) cout << "find-4: " << idx << endl;
|
||||||
// lp.pairDupIdx.erase(idx); p.pairDupIdx.erase(idx);
|
// lp.pairDupIdx.erase(idx); p.pairDupIdx.erase(idx);
|
||||||
// }
|
// }
|
||||||
for (auto idx : notOpticalDupIdx) { lp.pairOpticalDupIdx.erase(idx); p.pairOpticalDupIdx.erase(idx); }
|
for (auto idx : notOpticalDupIdx) {
|
||||||
for (auto idx : notRepIdx) { lp.pairRepIdx.erase(idx); p.pairRepIdx.erase(idx); }
|
lp.pairOpticalDupIdx.erase(idx);
|
||||||
|
p.pairOpticalDupIdx.erase(idx);
|
||||||
|
}
|
||||||
|
for (auto idx : notRepIdx) {
|
||||||
|
lp.pairRepIdx.erase(idx);
|
||||||
|
p.pairRepIdx.erase(idx);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 用来分别处理dup和optical dup
|
// 用来分别处理dup和optical dup
|
||||||
static void refeshTaskDupInfo(DPSet<DupInfo> &dupIdx, MDSet<int64_t> &opticalDupIdx, DPSet<DupInfo> &repIdx,
|
static void refeshTaskDupInfo(DPSet<DupInfo> &dupIdx, MDSet<int64_t> &opticalDupIdx, DPSet<DupInfo> &repIdx,
|
||||||
MDSet<int64_t> ¬DupIdx, MDSet<int64_t> ¬OpticalDupIdx, MDSet<int64_t> ¬RepIdx,
|
MDSet<int64_t> ¬DupIdx, MDSet<int64_t> ¬OpticalDupIdx, MDSet<int64_t> ¬RepIdx,
|
||||||
DPSet<DupInfo> &latterDupIdx, MDSet<int64_t> &latterOpticalDupIdx, DPSet<DupInfo> &latterRepIdx,
|
DPSet<DupInfo> &latterDupIdx, MDSet<int64_t> &latterOpticalDupIdx,
|
||||||
MDSet<int64_t> &latterNotDupIdx, MDSet<int64_t> &latterNotOpticalDupIdx, MDSet<int64_t> &latterNotRepIdx) {
|
DPSet<DupInfo> &latterRepIdx, MDSet<int64_t> &latterNotDupIdx,
|
||||||
|
MDSet<int64_t> &latterNotOpticalDupIdx, MDSet<int64_t> &latterNotRepIdx) {
|
||||||
for (auto idx : dupIdx) {
|
for (auto idx : dupIdx) {
|
||||||
latterDupIdx.insert(idx);
|
latterDupIdx.insert(idx);
|
||||||
// latterNotDupIdx.erase(idx); // 后来的更新为准
|
// latterNotDupIdx.erase(idx); // 后来的更新为准
|
||||||
|
|
@ -516,8 +532,8 @@ static void handleIntersectData(SerailMarkDupArg *lastArg, SerailMarkDupArg *cur
|
||||||
|
|
||||||
// if (prevFragEnd.read1IndexInFile == 255830545 || prevFragEnd.read1IndexInFile == 255830546 ||
|
// if (prevFragEnd.read1IndexInFile == 255830545 || prevFragEnd.read1IndexInFile == 255830546 ||
|
||||||
// prevFragEnd.read1IndexInFile == 255832599 || prevFragEnd.read1IndexInFile == 255832601) {
|
// prevFragEnd.read1IndexInFile == 255832599 || prevFragEnd.read1IndexInFile == 255832601) {
|
||||||
// cout << "find in p: " << lp.taskSeq << "\t" << prevFragEnd.read1IndexInFile << "\t" << readName << endl;
|
// cout << "find in p: " << lp.taskSeq << "\t" << prevFragEnd.read1IndexInFile << "\t" << readName <<
|
||||||
// if (nextUnpairInfoP != nullptr)
|
// endl; if (nextUnpairInfoP != nullptr)
|
||||||
// cout << "next p: " << nextUnpairInfoP->unpairedNum << endl;
|
// cout << "next p: " << nextUnpairInfoP->unpairedNum << endl;
|
||||||
// if (prevUnpairInfoP != nullptr)
|
// if (prevUnpairInfoP != nullptr)
|
||||||
// cout << "prev p: " << prevUnpairInfoP->unpairedNum << endl;
|
// cout << "prev p: " << prevUnpairInfoP->unpairedNum << endl;
|
||||||
|
|
@ -657,7 +673,8 @@ static void handleIntersectData(SerailMarkDupArg *lastArg, SerailMarkDupArg *cur
|
||||||
// if (p.taskSeq == 163) {
|
// if (p.taskSeq == 163) {
|
||||||
// cout << "final" << endl;
|
// cout << "final" << endl;
|
||||||
// }
|
// }
|
||||||
processPairs(*pairArrP, &t.dupIdx, &t.opticalDupIdx, &t.repIdx, &t.notDupIdx, &t.notOpticalDupIdx, &t.notRepIdx);
|
processPairs(*pairArrP, &t.dupIdx, &t.opticalDupIdx, &t.repIdx, &t.notDupIdx, &t.notOpticalDupIdx,
|
||||||
|
&t.notRepIdx);
|
||||||
if (taskSeq < lp.taskSeq)
|
if (taskSeq < lp.taskSeq)
|
||||||
g.unpairedPosArr.erase(posKey);
|
g.unpairedPosArr.erase(posKey);
|
||||||
}
|
}
|
||||||
|
|
@ -673,7 +690,8 @@ static void handleIntersectData(SerailMarkDupArg *lastArg, SerailMarkDupArg *cur
|
||||||
// for (auto &re: lp.unpairedPosArr[posKey].pairArr) {
|
// for (auto &re: lp.unpairedPosArr[posKey].pairArr) {
|
||||||
// cout << "lp reads: " << re.read1IndexInFile << "\t" << re.read2IndexInFile << endl;
|
// cout << "lp reads: " << re.read1IndexInFile << "\t" << re.read2IndexInFile << endl;
|
||||||
// }
|
// }
|
||||||
// cout << "found in g: " << lp.taskSeq << "\t" << lp.unpairedPosArr[posKey].unpairedNum << "\t" << lp.unpairedPosArr[posKey].pairArr.size() << endl;
|
// cout << "found in g: " << lp.taskSeq << "\t" << lp.unpairedPosArr[posKey].unpairedNum << "\t" <<
|
||||||
|
// lp.unpairedPosArr[posKey].pairArr.size() << endl;
|
||||||
// }
|
// }
|
||||||
g.unpairedPosArr[posKey] = lp.unpairedPosArr[posKey];
|
g.unpairedPosArr[posKey] = lp.unpairedPosArr[posKey];
|
||||||
}
|
}
|
||||||
|
|
@ -686,11 +704,14 @@ static void handleIntersectData(SerailMarkDupArg *lastArg, SerailMarkDupArg *cur
|
||||||
if (taskSeq < lp.taskSeq) {
|
if (taskSeq < lp.taskSeq) {
|
||||||
refeshTaskDupInfo(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx,
|
refeshTaskDupInfo(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx,
|
||||||
g.latterDupIdxArr[taskSeq], g.latterOpticalDupIdxArr[taskSeq], g.latterRepIdxArr[taskSeq],
|
g.latterDupIdxArr[taskSeq], g.latterOpticalDupIdxArr[taskSeq], g.latterRepIdxArr[taskSeq],
|
||||||
g.latterNotDupIdxArr[taskSeq], g.latterNotOpticalDupIdxArr[taskSeq], g.latterNotRepIdxArr[taskSeq]);
|
g.latterNotDupIdxArr[taskSeq], g.latterNotOpticalDupIdxArr[taskSeq],
|
||||||
|
g.latterNotRepIdxArr[taskSeq]);
|
||||||
} else if (taskSeq == lp.taskSeq) {
|
} else if (taskSeq == lp.taskSeq) {
|
||||||
refreshPairDupIdx(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx, &lp, &p);
|
refreshPairDupIdx(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx, &lp,
|
||||||
|
&p);
|
||||||
} else {
|
} else {
|
||||||
refreshPairDupIdx(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx, &p, &lp); // 把结果放到p中
|
refreshPairDupIdx(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx, &p,
|
||||||
|
&lp); // 把结果放到p中
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -769,7 +790,8 @@ static void handleLastTask(SerailMarkDupArg *task, GlobalDataArg *gDataArg) {
|
||||||
// cout << t.dupIdx.size() << "\t" << t.notDupIdx.size() << endl;
|
// cout << t.dupIdx.size() << "\t" << t.notDupIdx.size() << endl;
|
||||||
refeshTaskDupInfo(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx,
|
refeshTaskDupInfo(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx,
|
||||||
g.latterDupIdxArr[taskSeq], g.latterOpticalDupIdxArr[taskSeq], g.latterRepIdxArr[taskSeq],
|
g.latterDupIdxArr[taskSeq], g.latterOpticalDupIdxArr[taskSeq], g.latterRepIdxArr[taskSeq],
|
||||||
g.latterNotDupIdxArr[taskSeq], g.latterNotOpticalDupIdxArr[taskSeq], g.latterNotRepIdxArr[taskSeq]);
|
g.latterNotDupIdxArr[taskSeq], g.latterNotOpticalDupIdxArr[taskSeq],
|
||||||
|
g.latterNotRepIdxArr[taskSeq]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// cout << "last unpair info: " << g.unpairedDic.size() << '\t' << g.unpairedPosArr.size() << endl;
|
// cout << "last unpair info: " << g.unpairedDic.size() << '\t' << g.unpairedPosArr.size() << endl;
|
||||||
|
|
@ -781,9 +803,8 @@ static void handleLastTask(SerailMarkDupArg *task, GlobalDataArg *gDataArg) {
|
||||||
for (auto &arr : g.dupIdxArr) {
|
for (auto &arr : g.dupIdxArr) {
|
||||||
cout << taskSeq << "\t" << arr.size();
|
cout << taskSeq << "\t" << arr.size();
|
||||||
if (taskSeq < (int)g.dupIdxArr.size() - 1)
|
if (taskSeq < (int)g.dupIdxArr.size() - 1)
|
||||||
cout << "\t" << g.latterDupIdxArr[taskSeq].size() << "\t" << g.latterNotDupIdxArr[taskSeq].size() << endl;
|
cout << "\t" << g.latterDupIdxArr[taskSeq].size() << "\t" << g.latterNotDupIdxArr[taskSeq].size() <<
|
||||||
else
|
endl; else cout << endl;
|
||||||
cout << endl;
|
|
||||||
// if (taskSeq == 98) {
|
// if (taskSeq == 98) {
|
||||||
// vector<DupInfo> v;
|
// vector<DupInfo> v;
|
||||||
// v.insert(v.end(), g.latterDupIdxArr[taskSeq].begin(), g.latterDupIdxArr[taskSeq].end());
|
// v.insert(v.end(), g.latterDupIdxArr[taskSeq].begin(), g.latterDupIdxArr[taskSeq].end());
|
||||||
|
|
@ -837,7 +858,8 @@ static void handleLastTask(SerailMarkDupArg *task, GlobalDataArg *gDataArg) {
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
for (int i = 0; i < (int)g.opticalDupIdxArr.size() - 1; ++i)
|
for (int i = 0; i < (int)g.opticalDupIdxArr.size() - 1; ++i)
|
||||||
refeshFinalTaskDupInfo(g.latterOpticalDupIdxArr[i], g.latterNotOpticalDupIdxArr[i], g.opticalDupIdxArr[i], intCacheDupIdx, intMidArr);
|
refeshFinalTaskDupInfo(g.latterOpticalDupIdxArr[i], g.latterNotOpticalDupIdxArr[i], g.opticalDupIdxArr[i],
|
||||||
|
intCacheDupIdx, intMidArr);
|
||||||
for (int i = 0; i < (int)g.repIdxArr.size() - 1; ++i)
|
for (int i = 0; i < (int)g.repIdxArr.size() - 1; ++i)
|
||||||
refeshFinalTaskDupInfo(g.latterRepIdxArr[i], g.latterNotRepIdxArr[i], g.repIdxArr[i], cacheDupIdx, midArr);
|
refeshFinalTaskDupInfo(g.latterRepIdxArr[i], g.latterNotRepIdxArr[i], g.repIdxArr[i], cacheDupIdx, midArr);
|
||||||
|
|
||||||
|
|
@ -866,6 +888,15 @@ static void handleLastTask(SerailMarkDupArg *task, GlobalDataArg *gDataArg) {
|
||||||
std::sort(vRepIdx.begin(), vRepIdx.end());
|
std::sort(vRepIdx.begin(), vRepIdx.end());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void calculateMetrics(SerailMarkDupArg &lp, SerailMarkDupArg &p, GlobalDataArg &g, DuplicationMetrics *pgMetrics) {
|
||||||
|
DuplicationMetrics &gMetrics = *pgMetrics;
|
||||||
|
cout << "calculateMetrics start: " << endl;
|
||||||
|
|
||||||
|
cout << lp.unpairedDic.size() << "\t" << p.unpairedDic.size() << "\t" << g.unpairedDic.size() << endl;
|
||||||
|
|
||||||
|
cout << "calculateMetrics end" << endl;
|
||||||
|
}
|
||||||
|
|
||||||
/* 串行处理数据,标记冗余 */
|
/* 串行处理数据,标记冗余 */
|
||||||
void serialMarkDups() {
|
void serialMarkDups() {
|
||||||
tm_arr[5].acc_start();
|
tm_arr[5].acc_start();
|
||||||
|
|
@ -941,6 +972,10 @@ void serialMarkDups() {
|
||||||
tm_arr[3].acc_start();
|
tm_arr[3].acc_start();
|
||||||
// 处理剩下的全局数据
|
// 处理剩下的全局数据
|
||||||
handleLastTask(lastArgP, &gData);
|
handleLastTask(lastArgP, &gData);
|
||||||
|
|
||||||
|
// 计算各种统计指标
|
||||||
|
calculateMetrics(*lastArgP, *curArgP, gData, &gMetrics);
|
||||||
|
|
||||||
// cout << "here 2" << endl;
|
// cout << "here 2" << endl;
|
||||||
tm_arr[3].acc_end();
|
tm_arr[3].acc_end();
|
||||||
|
|
||||||
|
|
@ -964,7 +999,6 @@ void serialMarkDups() {
|
||||||
taskSeq++;
|
taskSeq++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// #include <fstream>
|
// #include <fstream>
|
||||||
// ofstream out("tumor_dup.txt");
|
// ofstream out("tumor_dup.txt");
|
||||||
// for (auto idx : dup)
|
// for (auto idx : dup)
|
||||||
|
|
@ -989,12 +1023,22 @@ void serialMarkDups() {
|
||||||
cout << "sort frags : " << tm_arr[9].acc_seconds_elapsed() << endl;
|
cout << "sort frags : " << tm_arr[9].acc_seconds_elapsed() << endl;
|
||||||
cout << "sort pairs : " << tm_arr[10].acc_seconds_elapsed() << endl;
|
cout << "sort pairs : " << tm_arr[10].acc_seconds_elapsed() << endl;
|
||||||
cout << "all : " << tm_arr[5].acc_seconds_elapsed() << endl;
|
cout << "all : " << tm_arr[5].acc_seconds_elapsed() << endl;
|
||||||
cout << "metrics: " << gMetrics.DuplicateCountHist << "\t" << gMetrics.NonOpticalDuplicateCountHist << "\t"
|
|
||||||
<< gMetrics.OpticalDuplicatesCountHist << "\t" << gMetrics.OpticalDuplicatesByLibraryId << endl;
|
|
||||||
cout << "optical dup: " << zzhopticalSet.size() << endl;
|
cout << "optical dup: " << zzhopticalSet.size() << endl;
|
||||||
cout << "optical arr dup: " << zzhopticalArr.size() << endl;
|
cout << "optical arr dup: " << zzhopticalArr.size() << endl;
|
||||||
cout << "optical size: " << opticalDupNum << endl;
|
cout << "optical size: " << opticalDupNum << endl;
|
||||||
|
|
||||||
|
cout << "metrics: \n"
|
||||||
|
<< "LIBRARY: " << gMetrics.LIBRARY << "\n"
|
||||||
|
<< "UNPAIRED_READS_EXAMINED: " << gMetrics.UNPAIRED_READS_EXAMINED << "\n"
|
||||||
|
<< "READ_PAIRS_EXAMINED: " << gMetrics.READ_PAIRS_EXAMINED << "\n"
|
||||||
|
<< "SECONDARY_OR_SUPPLEMENTARY_RDS: " << gMetrics.SECONDARY_OR_SUPPLEMENTARY_RDS << "\n"
|
||||||
|
<< "UNMAPPED_READS: " << gMetrics.UNMAPPED_READS << "\n"
|
||||||
|
<< "UNPAIRED_READ_DUPLICATES: " << gMetrics.UNPAIRED_READ_DUPLICATES << "\n"
|
||||||
|
<< "READ_PAIR_DUPLICATES: " << gMetrics.READ_PAIR_DUPLICATES << "\n"
|
||||||
|
<< "READ_PAIR_OPTICAL_DUPLICATES: " << gMetrics.READ_PAIR_OPTICAL_DUPLICATES << "\n"
|
||||||
|
<< "PERCENT_DUPLICATION: " << gMetrics.PERCENT_DUPLICATION << "\n"
|
||||||
|
<< "ESTIMATED_LIBRARY_SIZE: " << gMetrics.ESTIMATED_LIBRARY_SIZE << endl;
|
||||||
|
|
||||||
Timer::log_time("serial end ");
|
Timer::log_time("serial end ");
|
||||||
|
|
||||||
// for (auto i : gData.dupArr)
|
// for (auto i : gData.dupArr)
|
||||||
|
|
|
||||||
|
|
@ -7,12 +7,12 @@
|
||||||
|
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
using std::set;
|
using std::set;
|
||||||
using std::unordered_set;
|
|
||||||
using std::string;
|
using std::string;
|
||||||
|
using std::unordered_set;
|
||||||
using std::vector;
|
using std::vector;
|
||||||
|
|
||||||
/* 存放未匹配readend相同位点的所有readend */
|
/* 存放未匹配readend相同位点的所有readend */
|
||||||
|
|
@ -52,15 +52,9 @@ struct DupInfo {
|
||||||
DupInfo() : DupInfo(-1, 0, 0) {}
|
DupInfo() : DupInfo(-1, 0, 0) {}
|
||||||
DupInfo(int64_t idx_) : DupInfo(idx_, 0, 0) {}
|
DupInfo(int64_t idx_) : DupInfo(idx_, 0, 0) {}
|
||||||
DupInfo(int64_t idx_, int64_t repIdx_, int dupSet_) : idx(idx_), repIdx(repIdx_), dupSet(dupSet_) {}
|
DupInfo(int64_t idx_, int64_t repIdx_, int dupSet_) : idx(idx_), repIdx(repIdx_), dupSet(dupSet_) {}
|
||||||
bool operator<(const DupInfo &o) const {
|
bool operator<(const DupInfo &o) const { return idx < o.idx; }
|
||||||
return idx < o.idx;
|
bool operator>(const DupInfo &o) const { return idx > o.idx; }
|
||||||
}
|
operator int64_t() const { return idx; }
|
||||||
bool operator>(const DupInfo &o) const {
|
|
||||||
return idx > o.idx;
|
|
||||||
}
|
|
||||||
operator int64_t() const {
|
|
||||||
return idx;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct DupInfoHash {
|
struct DupInfoHash {
|
||||||
|
|
@ -108,7 +102,8 @@ struct UnpairedPosInfo {
|
||||||
// typedef unordered_map<int64_t, UnpairedPosInfo> UnpairedPositionMap;
|
// typedef unordered_map<int64_t, UnpairedPosInfo> UnpairedPositionMap;
|
||||||
|
|
||||||
typedef tsl::robin_map<string, UnpairedREInfo> UnpairedNameMap; // 以read name为索引,保存未匹配的pair read
|
typedef tsl::robin_map<string, UnpairedREInfo> UnpairedNameMap; // 以read name为索引,保存未匹配的pair read
|
||||||
typedef tsl::robin_map<int64_t, UnpairedPosInfo> UnpairedPositionMap; // 以位点为索引,保存该位点包含的对应的所有read和该位点包含的剩余未匹配的read的数量
|
typedef tsl::robin_map<int64_t, UnpairedPosInfo>
|
||||||
|
UnpairedPositionMap; // 以位点为索引,保存该位点包含的对应的所有read和该位点包含的剩余未匹配的read的数量
|
||||||
|
|
||||||
/* 单线程处理冗余参数结构体 */
|
/* 单线程处理冗余参数结构体 */
|
||||||
struct SerailMarkDupArg {
|
struct SerailMarkDupArg {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,48 @@
|
||||||
|
# 各种统计信息的计算,metrics文件中记录的信息
|
||||||
|
- htsjdk.samtools.metrics.StringHeader
|
||||||
|
- 命令行参数
|
||||||
|
- 执行命令的时间
|
||||||
|
- METRICS CLASS picard.sam.DuplicationMetrics
|
||||||
|
```
|
||||||
|
LIBRARY 样本ID(建库制备的样本id)
|
||||||
|
UNPAIRED_READS_EXAMINED 未匹配的reads数量
|
||||||
|
READ_PAIRS_EXAMINED 匹配的reads数量
|
||||||
|
SECONDARY_OR_SUPPLEMENTARY_RDS 非主要匹配的reads个数
|
||||||
|
UNMAPPED_READS 未匹配的reads个数
|
||||||
|
UNPAIRED_READ_DUPLICATES 未匹配的reads,冗余的个数
|
||||||
|
READ_PAIR_DUPLICATES 匹配的reads,冗余个数
|
||||||
|
READ_PAIR_OPTICAL_DUPLICATES 匹配的reads,光学原因造成的冗余个数
|
||||||
|
PERCENT_DUPLICATION 冗余reads占比
|
||||||
|
ESTIMATED_LIBRARY_SIZE 估计的样本reads总量
|
||||||
|
|
||||||
|
normal 1205 498430 729 1205 763 117212 6877 0.235643 924111
|
||||||
|
```
|
||||||
|
- HISTOGRAM java.lang.Double
|
||||||
|
```
|
||||||
|
BIN 坐标值
|
||||||
|
CoverageMult
|
||||||
|
all_sets
|
||||||
|
optical_sets
|
||||||
|
non_optical_sets
|
||||||
|
|
||||||
|
1.0 1.010558 287416 0 291706
|
||||||
|
2.0 1.599836 72561 6664 70093
|
||||||
|
3.0 1.943455 15542 105 14299
|
||||||
|
4.0 2.143827 3274 1 2831
|
||||||
|
5.0 2.260667 708 0 607
|
||||||
|
6.0 2.3288 136 0 114
|
||||||
|
7.0 2.368529 28 0 17
|
||||||
|
8.0 2.391696 7 0 5
|
||||||
|
9.0 2.405205 2 0 2
|
||||||
|
10.0 2.413082 0 0 0
|
||||||
|
11.0 2.417676 0 0 0
|
||||||
|
12.0 2.420354 0 0 0
|
||||||
|
13.0 2.421916 0 0 0
|
||||||
|
14.0 2.422827 0 0 0
|
||||||
|
15.0 2.423358 0 0 0
|
||||||
|
...
|
||||||
|
...
|
||||||
|
98.0 2.424101 0 0 0
|
||||||
|
99.0 2.424101 0 0 0
|
||||||
|
100.0 2.424101 0 0 0
|
||||||
|
```
|
||||||
Loading…
Reference in New Issue