修复了一个bug,把结果代码移除round循环,对于大文件(5G的fastdup-mini)结果不对,差挺多

This commit is contained in:
zzh 2025-12-28 23:43:58 +08:00
parent 6662435948
commit 1cd1a72760
1 changed files with 10 additions and 56 deletions

View File

@ -879,68 +879,22 @@ int SerialBQSR() {
}
// exit(0);
// 12. 创建总结数据
collapseQualityScoreTableToReadGroupTable(nsgv::gRecalTables.readGroupTable, nsgv::gRecalTables.qualityScoreTable);
roundTableValues(nsgv::gRecalTables);
// 13. 量化质量分数
QuantizationInfo quantInfo(nsgv::gRecalTables, nsgv::gBqsrArg.QUANTIZING_LEVELS);
// 14. 输出结果
RecalUtils::outputRecalibrationReport(nsgv::gBqsrArg, quantInfo, nsgv::gRecalTables);
#if 0
// spdlog::info("region: {} - {}", bams[0]->global_softclip_start(), bams.back()->global_softclip_end());
// 1. 获取bams数组覆盖的region范围
// 如果读取的bam数组跨越了不同的染色体咋搞还是按照每个线程都有独立的vcf文件来做吧
int64_t region_start = bams[0]->global_softclip_start();
vector<Region> contig_bams;
int contig_id = bams[0]->contig_id();
int64_t start = 0, stop = 0;
while (true) {
stop = start;
while (stop < bams.size() && bams[stop]->contig_id() == contig_id) ++stop;
if (stop > start) contig_bams.push_back(Region{start, stop});
if (stop >= bams.size()) break;
contig_id = bams[stop]->contig_id();
start = stop;
}
spdlog::info("{}, {} contig regions", contig_id, contig_bams.size());
for (int i = 0; i < bams.size();) {
int64_t a1 = bams[i]->contig_pos();
int64_t b1 = bams[i]->contig_end_pos();
int64_t a = bams[i]->softclip_start();
int64_t b = bams[i]->softclip_end();
spdlog::info("{}: ({}, {}), ({}, {})", bams[i]->query_name(), a1, b1, a, b);
++i;
}
// 依次处理每个contig的bams
vector<uint32_t> bitmap(100, 0); // 用来表示known sites覆盖情况的bitmap
for (const auto& cr : contig_bams) {
spdlog::info(" contig id: {}, bam count: {}, bitmap size: {}", contig_id, cr.end - cr.start, bitmap.size());
// 当前处理的contig
int contig_id = bams[cr.start]->contig_id();
int64_t region_start = bams[cr.start]->softclip_start();
int64_t region_end = bams[cr.end - 1]->softclip_end();
if ((bitmap.size() << 5)) {
}
}
#endif
// 2. 开辟一个uint32_t的数组作为bitmap如果上一轮的不够就重开用来表示region的每个位点是否有known sites覆盖每轮使用前需清零
// 3. 读取在region范围内的所有known sites并为对应的bitmap设定0 or 1 (作为skip标识)
// 4. 遍历bams数组中的每一条记录并进行处理
readNumSum += readNum;
inBamBuf.ClearAll(); //
}
spdlog::info("read count: {}", readNumSum);
// 12. 创建总结数据
collapseQualityScoreTableToReadGroupTable(nsgv::gRecalTables.readGroupTable, nsgv::gRecalTables.qualityScoreTable);
roundTableValues(nsgv::gRecalTables);
// 13. 量化质量分数
QuantizationInfo quantInfo(nsgv::gRecalTables, nsgv::gBqsrArg.QUANTIZING_LEVELS);
// 14. 输出结果
RecalUtils::outputRecalibrationReport(nsgv::gBqsrArg, quantInfo, nsgv::gRecalTables);
close_debug_files();
return 0;