修改了一些格式

This commit is contained in:
zzh 2024-11-05 15:53:04 +08:00
parent f84d7bb0dc
commit 899d40cbda
6 changed files with 197 additions and 118 deletions

4
run.sh
View File

@ -1,7 +1,7 @@
#input=~/data/bam/zy_normal.bam #input=~/data/bam/zy_normal.bam
input=~/data/bam/zy_tumor.bam #input=~/data/bam/zy_tumor.bam
#input=~/data/bam/100w.bam #input=~/data/bam/100w.bam
#input=~/data/bam/1kw.sam input=~/data/bam/1kw.sam
#input=~/data/bam/n1kw.sam #input=~/data/bam/n1kw.sam
time /home/zzh/work/ngs/picard_cpp/build/bin/picard_cpp \ time /home/zzh/work/ngs/picard_cpp/build/bin/picard_cpp \

View File

@ -1,17 +1,15 @@
#include <htslib/sam.h>
#include <cstdio> #include <cstdio>
#include <cstring> #include <cstring>
#include <htslib/sam.h>
#include "module.h" #include "module.h"
/* 版本信息 */ /* 版本信息 */
const char *version() const char *version() { return PICARD_CPP_VERSION; }
{
return PICARD_CPP_VERSION;
}
/* 使用说明 */ /* 使用说明 */
static void usage(FILE *fp) static void usage(FILE *fp) {
{
fprintf(fp, fprintf(fp,
"\n" "\n"
"Program: picard_cpp (A cpp implementation for picard.)\n" "Program: picard_cpp (A cpp implementation for picard.)\n"
@ -25,19 +23,14 @@ static void usage(FILE *fp)
"\n"); "\n");
} }
int main(int argc, char *argv[]) int main(int argc, char *argv[]) {
{ if (argc < 2) {
if (argc < 2)
{
usage(stderr); usage(stderr);
return 1; return 1;
} }
if (strcmp(argv[1], "help") == 0 || strcmp(argv[1], "--help") == 0) if (strcmp(argv[1], "help") == 0 || strcmp(argv[1], "--help") == 0) {
{ if (argc == 2) {
if (argc == 2)
{
usage(stdout); usage(stdout);
return 0; return 0;
} }
@ -49,8 +42,7 @@ int main(int argc, char *argv[])
if (strcmp(argv[1], "MarkDuplicates") == 0) if (strcmp(argv[1], "MarkDuplicates") == 0)
ret = MarkDuplicates(argc - 1, argv + 1); ret = MarkDuplicates(argc - 1, argv + 1);
else else {
{
fprintf(stderr, "\n[Error]: unrecognized command '%s'\n\n", argv[1]); fprintf(stderr, "\n[Error]: unrecognized command '%s'\n\n", argv[1]);
usage(stdout); usage(stdout);
return 1; return 1;

View File

@ -26,12 +26,12 @@ Date : 2023/10/23
#include <unordered_set> #include <unordered_set>
#include <vector> #include <vector>
#include "dup_metrics.h"
#include "markdups_arg.h" #include "markdups_arg.h"
#include "md_funcs.h" #include "md_funcs.h"
#include "parallel_md.h" #include "parallel_md.h"
#include "serial_md.h" #include "serial_md.h"
#include "shared_args.h" #include "shared_args.h"
#include "dup_metrics.h"
using namespace std; using namespace std;
using std::cout; using std::cout;

View File

@ -102,8 +102,8 @@ static void markDupsForPairs(vector<const ReadEnds *> &vpRe, DPSet<DupInfo> *dup
cout << "mark pair end: " << endl; cout << "mark pair end: " << endl;
} }
*/ */
// cerr << zzhtestnum << " best: " << vpRe.size() << " " << pBest->read1IndexInFile << "-" << pBest->read2IndexInFile << endl; // cerr << zzhtestnum << " best: " << vpRe.size() << " " << pBest->read1IndexInFile << "-" <<
// if (maxOperateTime == 0) ++zzhtestnum; // pBest->read2IndexInFile << endl; if (maxOperateTime == 0) ++zzhtestnum;
if (notDupIdx != nullptr) { if (notDupIdx != nullptr) {
notDupIdx->insert(pBest->read1IndexInFile); notDupIdx->insert(pBest->read1IndexInFile);
notDupIdx->insert(pBest->read2IndexInFile); notDupIdx->insert(pBest->read2IndexInFile);
@ -370,13 +370,22 @@ static inline void refreshFragDupIdx(DPSet<DupInfo> &dupIdx, MDSet<int64_t> &not
/* 将pairs重叠部分的dup idx放进数据中 */ /* 将pairs重叠部分的dup idx放进数据中 */
static inline void refreshPairDupIdx(DPSet<DupInfo> &dupIdx, MDSet<int64_t> &opticalDupIdx, DPSet<DupInfo> &repIdx, static inline void refreshPairDupIdx(DPSet<DupInfo> &dupIdx, MDSet<int64_t> &opticalDupIdx, DPSet<DupInfo> &repIdx,
MDSet<int64_t> &notDupIdx, MDSet<int64_t> &notOpticalDupIdx, MDSet<int64_t> &notRepIdx, MDSet<int64_t> &notDupIdx, MDSet<int64_t> &notOpticalDupIdx,
SerailMarkDupArg *lastArg, SerailMarkDupArg *curArg) { MDSet<int64_t> &notRepIdx, SerailMarkDupArg *lastArg, SerailMarkDupArg *curArg) {
auto &lp = *lastArg; auto &lp = *lastArg;
auto &p = *curArg; auto &p = *curArg;
for (auto idx : dupIdx) { lp.pairDupIdx.insert(idx); p.pairDupIdx.erase(idx); } for (auto idx : dupIdx) {
for (auto idx : opticalDupIdx) { lp.pairOpticalDupIdx.insert(idx); p.pairOpticalDupIdx.erase(idx); } lp.pairDupIdx.insert(idx);
for (auto idx : repIdx) { lp.pairRepIdx.insert(idx); p.pairRepIdx.erase(idx); } p.pairDupIdx.erase(idx);
}
for (auto idx : opticalDupIdx) {
lp.pairOpticalDupIdx.insert(idx);
p.pairOpticalDupIdx.erase(idx);
}
for (auto idx : repIdx) {
lp.pairRepIdx.insert(idx);
p.pairRepIdx.erase(idx);
}
// for (auto idx : notDupIdx) { // for (auto idx : notDupIdx) {
// if (lp.pairDupIdx.find(idx) != lp.pairDupIdx.end()) cout << "find-1: " << idx << endl; // if (lp.pairDupIdx.find(idx) != lp.pairDupIdx.end()) cout << "find-1: " << idx << endl;
// if (lp.pairDupIdx.find({idx}) != lp.pairDupIdx.end()) cout << "find-2: " << idx << endl; // if (lp.pairDupIdx.find({idx}) != lp.pairDupIdx.end()) cout << "find-2: " << idx << endl;
@ -384,15 +393,22 @@ static inline void refreshPairDupIdx(DPSet<DupInfo> &dupIdx, MDSet<int64_t> &opt
// if (p.pairDupIdx.find({idx}) != p.pairDupIdx.end()) cout << "find-4: " << idx << endl; // if (p.pairDupIdx.find({idx}) != p.pairDupIdx.end()) cout << "find-4: " << idx << endl;
// lp.pairDupIdx.erase(idx); p.pairDupIdx.erase(idx); // lp.pairDupIdx.erase(idx); p.pairDupIdx.erase(idx);
// } // }
for (auto idx : notOpticalDupIdx) { lp.pairOpticalDupIdx.erase(idx); p.pairOpticalDupIdx.erase(idx); } for (auto idx : notOpticalDupIdx) {
for (auto idx : notRepIdx) { lp.pairRepIdx.erase(idx); p.pairRepIdx.erase(idx); } lp.pairOpticalDupIdx.erase(idx);
p.pairOpticalDupIdx.erase(idx);
}
for (auto idx : notRepIdx) {
lp.pairRepIdx.erase(idx);
p.pairRepIdx.erase(idx);
}
} }
// 用来分别处理dup和optical dup // 用来分别处理dup和optical dup
static void refeshTaskDupInfo(DPSet<DupInfo> &dupIdx, MDSet<int64_t> &opticalDupIdx, DPSet<DupInfo> &repIdx, static void refeshTaskDupInfo(DPSet<DupInfo> &dupIdx, MDSet<int64_t> &opticalDupIdx, DPSet<DupInfo> &repIdx,
MDSet<int64_t> &notDupIdx, MDSet<int64_t> &notOpticalDupIdx, MDSet<int64_t> &notRepIdx, MDSet<int64_t> &notDupIdx, MDSet<int64_t> &notOpticalDupIdx, MDSet<int64_t> &notRepIdx,
DPSet<DupInfo> &latterDupIdx, MDSet<int64_t> &latterOpticalDupIdx, DPSet<DupInfo> &latterRepIdx, DPSet<DupInfo> &latterDupIdx, MDSet<int64_t> &latterOpticalDupIdx,
MDSet<int64_t> &latterNotDupIdx, MDSet<int64_t> &latterNotOpticalDupIdx, MDSet<int64_t> &latterNotRepIdx) { DPSet<DupInfo> &latterRepIdx, MDSet<int64_t> &latterNotDupIdx,
MDSet<int64_t> &latterNotOpticalDupIdx, MDSet<int64_t> &latterNotRepIdx) {
for (auto idx : dupIdx) { for (auto idx : dupIdx) {
latterDupIdx.insert(idx); latterDupIdx.insert(idx);
// latterNotDupIdx.erase(idx); // 后来的更新为准 // latterNotDupIdx.erase(idx); // 后来的更新为准
@ -516,8 +532,8 @@ static void handleIntersectData(SerailMarkDupArg *lastArg, SerailMarkDupArg *cur
// if (prevFragEnd.read1IndexInFile == 255830545 || prevFragEnd.read1IndexInFile == 255830546 || // if (prevFragEnd.read1IndexInFile == 255830545 || prevFragEnd.read1IndexInFile == 255830546 ||
// prevFragEnd.read1IndexInFile == 255832599 || prevFragEnd.read1IndexInFile == 255832601) { // prevFragEnd.read1IndexInFile == 255832599 || prevFragEnd.read1IndexInFile == 255832601) {
// cout << "find in p: " << lp.taskSeq << "\t" << prevFragEnd.read1IndexInFile << "\t" << readName << endl; // cout << "find in p: " << lp.taskSeq << "\t" << prevFragEnd.read1IndexInFile << "\t" << readName <<
// if (nextUnpairInfoP != nullptr) // endl; if (nextUnpairInfoP != nullptr)
// cout << "next p: " << nextUnpairInfoP->unpairedNum << endl; // cout << "next p: " << nextUnpairInfoP->unpairedNum << endl;
// if (prevUnpairInfoP != nullptr) // if (prevUnpairInfoP != nullptr)
// cout << "prev p: " << prevUnpairInfoP->unpairedNum << endl; // cout << "prev p: " << prevUnpairInfoP->unpairedNum << endl;
@ -657,7 +673,8 @@ static void handleIntersectData(SerailMarkDupArg *lastArg, SerailMarkDupArg *cur
// if (p.taskSeq == 163) { // if (p.taskSeq == 163) {
// cout << "final" << endl; // cout << "final" << endl;
// } // }
processPairs(*pairArrP, &t.dupIdx, &t.opticalDupIdx, &t.repIdx, &t.notDupIdx, &t.notOpticalDupIdx, &t.notRepIdx); processPairs(*pairArrP, &t.dupIdx, &t.opticalDupIdx, &t.repIdx, &t.notDupIdx, &t.notOpticalDupIdx,
&t.notRepIdx);
if (taskSeq < lp.taskSeq) if (taskSeq < lp.taskSeq)
g.unpairedPosArr.erase(posKey); g.unpairedPosArr.erase(posKey);
} }
@ -673,7 +690,8 @@ static void handleIntersectData(SerailMarkDupArg *lastArg, SerailMarkDupArg *cur
// for (auto &re: lp.unpairedPosArr[posKey].pairArr) { // for (auto &re: lp.unpairedPosArr[posKey].pairArr) {
// cout << "lp reads: " << re.read1IndexInFile << "\t" << re.read2IndexInFile << endl; // cout << "lp reads: " << re.read1IndexInFile << "\t" << re.read2IndexInFile << endl;
// } // }
// cout << "found in g: " << lp.taskSeq << "\t" << lp.unpairedPosArr[posKey].unpairedNum << "\t" << lp.unpairedPosArr[posKey].pairArr.size() << endl; // cout << "found in g: " << lp.taskSeq << "\t" << lp.unpairedPosArr[posKey].unpairedNum << "\t" <<
// lp.unpairedPosArr[posKey].pairArr.size() << endl;
// } // }
g.unpairedPosArr[posKey] = lp.unpairedPosArr[posKey]; g.unpairedPosArr[posKey] = lp.unpairedPosArr[posKey];
} }
@ -686,11 +704,14 @@ static void handleIntersectData(SerailMarkDupArg *lastArg, SerailMarkDupArg *cur
if (taskSeq < lp.taskSeq) { if (taskSeq < lp.taskSeq) {
refeshTaskDupInfo(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx, refeshTaskDupInfo(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx,
g.latterDupIdxArr[taskSeq], g.latterOpticalDupIdxArr[taskSeq], g.latterRepIdxArr[taskSeq], g.latterDupIdxArr[taskSeq], g.latterOpticalDupIdxArr[taskSeq], g.latterRepIdxArr[taskSeq],
g.latterNotDupIdxArr[taskSeq], g.latterNotOpticalDupIdxArr[taskSeq], g.latterNotRepIdxArr[taskSeq]); g.latterNotDupIdxArr[taskSeq], g.latterNotOpticalDupIdxArr[taskSeq],
g.latterNotRepIdxArr[taskSeq]);
} else if (taskSeq == lp.taskSeq) { } else if (taskSeq == lp.taskSeq) {
refreshPairDupIdx(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx, &lp, &p); refreshPairDupIdx(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx, &lp,
&p);
} else { } else {
refreshPairDupIdx(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx, &p, &lp); // 把结果放到p中 refreshPairDupIdx(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx, &p,
&lp); // 把结果放到p中
} }
} }
@ -769,7 +790,8 @@ static void handleLastTask(SerailMarkDupArg *task, GlobalDataArg *gDataArg) {
// cout << t.dupIdx.size() << "\t" << t.notDupIdx.size() << endl; // cout << t.dupIdx.size() << "\t" << t.notDupIdx.size() << endl;
refeshTaskDupInfo(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx, refeshTaskDupInfo(t.dupIdx, t.opticalDupIdx, t.repIdx, t.notDupIdx, t.notOpticalDupIdx, t.notRepIdx,
g.latterDupIdxArr[taskSeq], g.latterOpticalDupIdxArr[taskSeq], g.latterRepIdxArr[taskSeq], g.latterDupIdxArr[taskSeq], g.latterOpticalDupIdxArr[taskSeq], g.latterRepIdxArr[taskSeq],
g.latterNotDupIdxArr[taskSeq], g.latterNotOpticalDupIdxArr[taskSeq], g.latterNotRepIdxArr[taskSeq]); g.latterNotDupIdxArr[taskSeq], g.latterNotOpticalDupIdxArr[taskSeq],
g.latterNotRepIdxArr[taskSeq]);
} }
// cout << "last unpair info: " << g.unpairedDic.size() << '\t' << g.unpairedPosArr.size() << endl; // cout << "last unpair info: " << g.unpairedDic.size() << '\t' << g.unpairedPosArr.size() << endl;
@ -781,9 +803,8 @@ static void handleLastTask(SerailMarkDupArg *task, GlobalDataArg *gDataArg) {
for (auto &arr : g.dupIdxArr) { for (auto &arr : g.dupIdxArr) {
cout << taskSeq << "\t" << arr.size(); cout << taskSeq << "\t" << arr.size();
if (taskSeq < (int)g.dupIdxArr.size() - 1) if (taskSeq < (int)g.dupIdxArr.size() - 1)
cout << "\t" << g.latterDupIdxArr[taskSeq].size() << "\t" << g.latterNotDupIdxArr[taskSeq].size() << endl; cout << "\t" << g.latterDupIdxArr[taskSeq].size() << "\t" << g.latterNotDupIdxArr[taskSeq].size() <<
else endl; else cout << endl;
cout << endl;
// if (taskSeq == 98) { // if (taskSeq == 98) {
// vector<DupInfo> v; // vector<DupInfo> v;
// v.insert(v.end(), g.latterDupIdxArr[taskSeq].begin(), g.latterDupIdxArr[taskSeq].end()); // v.insert(v.end(), g.latterDupIdxArr[taskSeq].begin(), g.latterDupIdxArr[taskSeq].end());
@ -837,7 +858,8 @@ static void handleLastTask(SerailMarkDupArg *task, GlobalDataArg *gDataArg) {
*/ */
} }
for (int i = 0; i < (int)g.opticalDupIdxArr.size() - 1; ++i) for (int i = 0; i < (int)g.opticalDupIdxArr.size() - 1; ++i)
refeshFinalTaskDupInfo(g.latterOpticalDupIdxArr[i], g.latterNotOpticalDupIdxArr[i], g.opticalDupIdxArr[i], intCacheDupIdx, intMidArr); refeshFinalTaskDupInfo(g.latterOpticalDupIdxArr[i], g.latterNotOpticalDupIdxArr[i], g.opticalDupIdxArr[i],
intCacheDupIdx, intMidArr);
for (int i = 0; i < (int)g.repIdxArr.size() - 1; ++i) for (int i = 0; i < (int)g.repIdxArr.size() - 1; ++i)
refeshFinalTaskDupInfo(g.latterRepIdxArr[i], g.latterNotRepIdxArr[i], g.repIdxArr[i], cacheDupIdx, midArr); refeshFinalTaskDupInfo(g.latterRepIdxArr[i], g.latterNotRepIdxArr[i], g.repIdxArr[i], cacheDupIdx, midArr);
@ -866,6 +888,15 @@ static void handleLastTask(SerailMarkDupArg *task, GlobalDataArg *gDataArg) {
std::sort(vRepIdx.begin(), vRepIdx.end()); std::sort(vRepIdx.begin(), vRepIdx.end());
} }
void calculateMetrics(SerailMarkDupArg &lp, SerailMarkDupArg &p, GlobalDataArg &g, DuplicationMetrics *pgMetrics) {
DuplicationMetrics &gMetrics = *pgMetrics;
cout << "calculateMetrics start: " << endl;
cout << lp.unpairedDic.size() << "\t" << p.unpairedDic.size() << "\t" << g.unpairedDic.size() << endl;
cout << "calculateMetrics end" << endl;
}
/* 串行处理数据,标记冗余 */ /* 串行处理数据,标记冗余 */
void serialMarkDups() { void serialMarkDups() {
tm_arr[5].acc_start(); tm_arr[5].acc_start();
@ -941,6 +972,10 @@ void serialMarkDups() {
tm_arr[3].acc_start(); tm_arr[3].acc_start();
// 处理剩下的全局数据 // 处理剩下的全局数据
handleLastTask(lastArgP, &gData); handleLastTask(lastArgP, &gData);
// 计算各种统计指标
calculateMetrics(*lastArgP, *curArgP, gData, &gMetrics);
// cout << "here 2" << endl; // cout << "here 2" << endl;
tm_arr[3].acc_end(); tm_arr[3].acc_end();
@ -964,7 +999,6 @@ void serialMarkDups() {
taskSeq++; taskSeq++;
} }
// #include <fstream> // #include <fstream>
// ofstream out("tumor_dup.txt"); // ofstream out("tumor_dup.txt");
// for (auto idx : dup) // for (auto idx : dup)
@ -989,12 +1023,22 @@ void serialMarkDups() {
cout << "sort frags : " << tm_arr[9].acc_seconds_elapsed() << endl; cout << "sort frags : " << tm_arr[9].acc_seconds_elapsed() << endl;
cout << "sort pairs : " << tm_arr[10].acc_seconds_elapsed() << endl; cout << "sort pairs : " << tm_arr[10].acc_seconds_elapsed() << endl;
cout << "all : " << tm_arr[5].acc_seconds_elapsed() << endl; cout << "all : " << tm_arr[5].acc_seconds_elapsed() << endl;
cout << "metrics: " << gMetrics.DuplicateCountHist << "\t" << gMetrics.NonOpticalDuplicateCountHist << "\t"
<< gMetrics.OpticalDuplicatesCountHist << "\t" << gMetrics.OpticalDuplicatesByLibraryId << endl;
cout << "optical dup: " << zzhopticalSet.size() << endl; cout << "optical dup: " << zzhopticalSet.size() << endl;
cout << "optical arr dup: " << zzhopticalArr.size() << endl; cout << "optical arr dup: " << zzhopticalArr.size() << endl;
cout << "optical size: " << opticalDupNum << endl; cout << "optical size: " << opticalDupNum << endl;
cout << "metrics: \n"
<< "LIBRARY: " << gMetrics.LIBRARY << "\n"
<< "UNPAIRED_READS_EXAMINED: " << gMetrics.UNPAIRED_READS_EXAMINED << "\n"
<< "READ_PAIRS_EXAMINED: " << gMetrics.READ_PAIRS_EXAMINED << "\n"
<< "SECONDARY_OR_SUPPLEMENTARY_RDS: " << gMetrics.SECONDARY_OR_SUPPLEMENTARY_RDS << "\n"
<< "UNMAPPED_READS: " << gMetrics.UNMAPPED_READS << "\n"
<< "UNPAIRED_READ_DUPLICATES: " << gMetrics.UNPAIRED_READ_DUPLICATES << "\n"
<< "READ_PAIR_DUPLICATES: " << gMetrics.READ_PAIR_DUPLICATES << "\n"
<< "READ_PAIR_OPTICAL_DUPLICATES: " << gMetrics.READ_PAIR_OPTICAL_DUPLICATES << "\n"
<< "PERCENT_DUPLICATION: " << gMetrics.PERCENT_DUPLICATION << "\n"
<< "ESTIMATED_LIBRARY_SIZE: " << gMetrics.ESTIMATED_LIBRARY_SIZE << endl;
Timer::log_time("serial end "); Timer::log_time("serial end ");
// for (auto i : gData.dupArr) // for (auto i : gData.dupArr)

View File

@ -7,12 +7,12 @@
#include <set> #include <set>
#include <string> #include <string>
#include <vector>
#include <unordered_set> #include <unordered_set>
#include <vector>
using std::set; using std::set;
using std::unordered_set;
using std::string; using std::string;
using std::unordered_set;
using std::vector; using std::vector;
/* 存放未匹配readend相同位点的所有readend */ /* 存放未匹配readend相同位点的所有readend */
@ -52,15 +52,9 @@ struct DupInfo {
DupInfo() : DupInfo(-1, 0, 0) {} DupInfo() : DupInfo(-1, 0, 0) {}
DupInfo(int64_t idx_) : DupInfo(idx_, 0, 0) {} DupInfo(int64_t idx_) : DupInfo(idx_, 0, 0) {}
DupInfo(int64_t idx_, int64_t repIdx_, int dupSet_) : idx(idx_), repIdx(repIdx_), dupSet(dupSet_) {} DupInfo(int64_t idx_, int64_t repIdx_, int dupSet_) : idx(idx_), repIdx(repIdx_), dupSet(dupSet_) {}
bool operator<(const DupInfo &o) const { bool operator<(const DupInfo &o) const { return idx < o.idx; }
return idx < o.idx; bool operator>(const DupInfo &o) const { return idx > o.idx; }
} operator int64_t() const { return idx; }
bool operator>(const DupInfo &o) const {
return idx > o.idx;
}
operator int64_t() const {
return idx;
}
}; };
struct DupInfoHash { struct DupInfoHash {
@ -108,7 +102,8 @@ struct UnpairedPosInfo {
// typedef unordered_map<int64_t, UnpairedPosInfo> UnpairedPositionMap; // typedef unordered_map<int64_t, UnpairedPosInfo> UnpairedPositionMap;
typedef tsl::robin_map<string, UnpairedREInfo> UnpairedNameMap; // 以read name为索引保存未匹配的pair read typedef tsl::robin_map<string, UnpairedREInfo> UnpairedNameMap; // 以read name为索引保存未匹配的pair read
typedef tsl::robin_map<int64_t, UnpairedPosInfo> UnpairedPositionMap; // 以位点为索引保存该位点包含的对应的所有read和该位点包含的剩余未匹配的read的数量 typedef tsl::robin_map<int64_t, UnpairedPosInfo>
UnpairedPositionMap; // 以位点为索引保存该位点包含的对应的所有read和该位点包含的剩余未匹配的read的数量
/* 单线程处理冗余参数结构体 */ /* 单线程处理冗余参数结构体 */
struct SerailMarkDupArg { struct SerailMarkDupArg {

48
todo.md 100644
View File

@ -0,0 +1,48 @@
# 各种统计信息的计算metrics文件中记录的信息
- htsjdk.samtools.metrics.StringHeader
- 命令行参数
- 执行命令的时间
- METRICS CLASS picard.sam.DuplicationMetrics
```
LIBRARY 样本ID(建库制备的样本id)
UNPAIRED_READS_EXAMINED 未匹配的reads数量
READ_PAIRS_EXAMINED 匹配的reads数量
SECONDARY_OR_SUPPLEMENTARY_RDS 非主要匹配的reads个数
UNMAPPED_READS 未匹配的reads个数
UNPAIRED_READ_DUPLICATES 未匹配的reads冗余的个数
READ_PAIR_DUPLICATES 匹配的reads冗余个数
READ_PAIR_OPTICAL_DUPLICATES 匹配的reads光学原因造成的冗余个数
PERCENT_DUPLICATION 冗余reads占比
ESTIMATED_LIBRARY_SIZE 估计的样本reads总量
normal 1205 498430 729 1205 763 117212 6877 0.235643 924111
```
- HISTOGRAM java.lang.Double
```
BIN 坐标值
CoverageMult
all_sets
optical_sets
non_optical_sets
1.0 1.010558 287416 0 291706
2.0 1.599836 72561 6664 70093
3.0 1.943455 15542 105 14299
4.0 2.143827 3274 1 2831
5.0 2.260667 708 0 607
6.0 2.3288 136 0 114
7.0 2.368529 28 0 17
8.0 2.391696 7 0 5
9.0 2.405205 2 0 2
10.0 2.413082 0 0 0
11.0 2.417676 0 0 0
12.0 2.420354 0 0 0
13.0 2.421916 0 0 0
14.0 2.422827 0 0 0
15.0 2.423358 0 0 0
...
...
98.0 2.424101 0 0 0
99.0 2.424101 0 0 0
100.0 2.424101 0 0 0
```