From 81cbd6831c5a147f9eef2b2c7f6df64273b38c43 Mon Sep 17 00:00:00 2001 From: zzh Date: Tue, 30 Dec 2025 19:27:28 +0800 Subject: [PATCH] =?UTF-8?q?=E5=8F=88=E8=A7=A3=E5=86=B3=E4=BA=86=E4=B8=80?= =?UTF-8?q?=E4=B8=AAbug=EF=BC=8C=E5=BF=BD=E7=95=A5=E4=BA=86=E6=9C=89?= =?UTF-8?q?=E4=BA=9Bread=E7=9A=84=E8=BF=87=E6=BB=A4=EF=BC=8C=E6=8A=8A?= =?UTF-8?q?=E8=BF=87=E6=BB=A4=E5=87=BD=E6=95=B0=E6=94=BE=E5=88=B0append=5F?= =?UTF-8?q?one=5Fbam=E9=87=8C=E5=B0=B1=E5=A5=BD=E4=BA=86=EF=BC=8C=E7=8E=B0?= =?UTF-8?q?=E5=9C=A8=E5=8F=91=E7=8E=B0=E4=B8=B2=E8=A1=8C=E5=92=8C=E5=B9=B6?= =?UTF-8?q?=E8=A1=8C=E7=BB=93=E6=9E=9C=E8=BF=98=E6=98=AF=E6=9C=89=E7=82=B9?= =?UTF-8?q?=E4=B8=8D=E4=B8=80=E8=87=B4=EF=BC=8C=E6=AD=A3=E5=9C=A8=E8=B0=83?= =?UTF-8?q?=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/bqsr/bqsr_entry.cpp | 26 ++++++++++++-------------- src/util/bam_buf.cpp | 14 +++++++------- src/util/bam_buf.h | 2 +- 3 files changed, 20 insertions(+), 22 deletions(-) diff --git a/src/bqsr/bqsr_entry.cpp b/src/bqsr/bqsr_entry.cpp index 5e627cf..e965ffc 100644 --- a/src/bqsr/bqsr_entry.cpp +++ b/src/bqsr/bqsr_entry.cpp @@ -252,17 +252,15 @@ int SerialBQSR(AuxVar &aux) { skips[ii] = skips[ii] || (ContextCovariate::baseIndexMap[sd.bases[ii]] == -1) || sd.base_quals[ii] < nsgv::gBqsrArg.PRESERVE_QSCORES_LESS_THAN; } + //stringstream ss; + //for (auto s : skips) ss << (int)s << ' '; + //spdlog::info("{}", ss.str()); PROF_GP_END(read_vcf); -#if 0 - int fidx = 0; - if (sd.rid % 2 == 0) - fidx = 0; - else - fidx = 1; - fprintf(gf[fidx], "%ld %d\t", sd.rid, sd.read_len); - for (int ii = 0; ii < sd.read_len; ++ii) fprintf(gf[fidx], "%d ", skips[ii] ? 1 : 0); - fprintf(gf[fidx], "\n"); -#endif + + // fprintf(gf[4], "%s %d %ld ", bam_get_qname(sd.bw->b), sd.bw->b->core.flag, sd.rid); + // for (int ii = 0; ii < sd.read_len; ++ii) fprintf(gf[4], "%d ", skips[ii] ? 1 : 0); + // fprintf(gf[4], "\n"); + // fprintf(gf[0], "%ld %d\t", sd.rid, sd.read_len); // for (int ii = 0; ii < sd.read_len; ++ii) fprintf(gf[0], "%d ", skips[ii] ? 1 : 0); // fprintf(gf[0], "\n"); @@ -306,7 +304,8 @@ int SerialBQSR(AuxVar &aux) { } // 多线程处理bam数据, tmd是乱序的? -static void thread_worker(void* data, long idx, int tid, int steal) { +// static void thread_worker(void* data, long idx, int tid, int steal) { +static void thread_worker(void* data, long idx, int tid) { AuxVar& aux = (*(vector*)data)[tid]; auto& readCovariates = aux.readCovariates; RecalTables& recalTables = aux.recalTables; @@ -316,8 +315,7 @@ static void thread_worker(void* data, long idx, int tid, int steal) { StableArray&snpErrors = aux.snpErrors, &insErrors = aux.insErrors, &delErrors = aux.delErrors; StableArray& skips = aux.skips; // 该位置是否是已知位点 auto &bams = *aux.bamArr; - if (steal) - for (auto& vcf : aux.vcfArr) vcf.knownSites.clear(); + // if (steal) for (auto& vcf : aux.vcfArr) vcf.knownSites.clear(); #if 1 int startIdx = idx * aux.BAM_BLOCK_NUM; int stopIdx = std::min((size_t)(idx + 1) * aux.BAM_BLOCK_NUM, bams.size()); @@ -407,7 +405,7 @@ int ParallelBQSR(vector& auxArr) { spdlog::info("{} reads processed in {} round", readNum, round); #if 1 - kt_for_steal(auxArr.size(), thread_worker, &auxArr, (readNum + AuxVar::BAM_BLOCK_NUM - 1) / AuxVar::BAM_BLOCK_NUM); + kt_for_no_steal(auxArr.size(), thread_worker, &auxArr, (readNum + AuxVar::BAM_BLOCK_NUM - 1) / AuxVar::BAM_BLOCK_NUM); #else kt_for_steal(auxArr.size(), thread_worker, &auxArr, auxArr.size()); #endif diff --git a/src/util/bam_buf.cpp b/src/util/bam_buf.cpp index 54a9149..07c5877 100644 --- a/src/util/bam_buf.cpp +++ b/src/util/bam_buf.cpp @@ -18,8 +18,7 @@ int BamBuf::ReadBam() { if (handle_last) { // 处理上次读入的最后一个bam if (has_enough_space()) { // 必须调用,在边界处调整memffset if (filter_out == nullptr || !filter_out(bw->b)) { // 这里也要加过滤器 - ++read_num; - append_one_bam(); + read_num += append_one_bam(); } } else { return read_num; // 还是没空间 @@ -28,11 +27,9 @@ int BamBuf::ReadBam() { while (read_stat_ >= 0 && (read_stat_ = sam_read1(fp, hdr, bw->b)) >= 0) { bw->end_pos_ = BamWrap::BamEndPos(bw->b); if (has_enough_space()) { // 还有空间 - // if (true) { // 还有空间 // 加过滤器 if (filter_out == nullptr || !filter_out(bw->b)) { - append_one_bam(); - ++read_num; // 放进缓存才算读取到 + read_num += append_one_bam(); // 放进缓存才算读取到 } } else { break; @@ -110,8 +107,10 @@ inline bool BamBuf::has_enough_space() { } // 处理一个读取后的bam -inline void BamBuf::append_one_bam() { - BamWrap *bwp = (BamWrap *)(mem + mem_offset); +inline int BamBuf::append_one_bam() { + if (filter_out != nullptr && filter_out(bw->b)) + return 0; + BamWrap* bwp = (BamWrap*)(mem + mem_offset); *bwp = *bw; bwp->b = (bam1_t *)((char *)bwp + sizeof(*bwp)); bam1_t *bp = bwp->b; @@ -121,6 +120,7 @@ inline void BamBuf::append_one_bam() { // 更新下次存储的位置 mem_offset = (mem_offset + bw->length() + 8 - 1) & ~((size_t)(8 - 1)); bv.push_back(bwp); + return 1; } // 处理上次读入的最后一个read diff --git a/src/util/bam_buf.h b/src/util/bam_buf.h index dfb3405..750a2f3 100644 --- a/src/util/bam_buf.h +++ b/src/util/bam_buf.h @@ -85,7 +85,7 @@ struct BamBuf { // 检查缓存是否还有空间 bool has_enough_space(); // 处理一个读取后的bam - void append_one_bam(); + int append_one_bam(); // 处理上次读入的最后一个read bool handle_last_read();