From 022e70e1f395e4ede6a9dff307a7bd5013eadf0d Mon Sep 17 00:00:00 2001 From: zzh Date: Sun, 4 Jan 2026 16:27:50 +0800 Subject: [PATCH] =?UTF-8?q?=E8=A7=A3=E5=86=B3=E4=BA=86bug=EF=BC=8C?= =?UTF-8?q?=E7=8E=B0=E5=9C=A8apply=20bqsr=E7=BB=93=E6=9E=9C=E4=B8=80?= =?UTF-8?q?=E8=87=B4=E4=BA=86=EF=BC=8C=E5=8E=9F=E5=9B=A0=E6=98=AF=E8=AF=BB?= =?UTF-8?q?=E5=8F=96bqsr=20table=E7=9A=84=E6=97=B6=E5=80=99=EF=BC=8C?= =?UTF-8?q?=E5=B9=B6=E6=B2=A1=E6=9C=89=E5=B0=86table=E4=B8=AD=E7=9A=84empi?= =?UTF-8?q?ricalQuality=E8=B5=8B=E5=80=BC=E7=BB=99Datum=EF=BC=8C=E6=98=AF?= =?UTF-8?q?=E5=90=8E=E7=BB=AD=E5=9C=A8=E8=AE=A1=E7=AE=97=E8=B4=A8=E9=87=8F?= =?UTF-8?q?=E5=88=86=E6=95=B0=E7=9A=84=E6=97=B6=E5=80=99=E9=87=8D=E6=96=B0?= =?UTF-8?q?=E8=AE=A1=E7=AE=97=E7=9A=84empiricalQuality?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/bqsr/apply_bqsr_entry.cpp | 10 +++++++++- src/bqsr/recal_datum.h | 1 + src/bqsr/recal_utils.h | 9 ++++++--- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/bqsr/apply_bqsr_entry.cpp b/src/bqsr/apply_bqsr_entry.cpp index fa9dec0..806f9f4 100644 --- a/src/bqsr/apply_bqsr_entry.cpp +++ b/src/bqsr/apply_bqsr_entry.cpp @@ -73,11 +73,13 @@ int SerialApplyBQSR(AuxVar &aux) { // 二. 遍历每个bam(read)记录,进行处理 for (int i = 0; i < bams.size(); ++i) { + BamWrap *bw = bams[i]; + if (bam_copy1(recalBams[i], bams[i]->b) == NULL) { spdlog::error("Copy bam error"); exit(1); } - BamWrap* bw = bams[i]; + // BamWrap* bw = bams[i]; bw->b = recalBams[i]; // 注意这里的赋值,然后就可以对b进行更改了 sd.init(); sd.parseForApplyBQSR(bw); @@ -151,6 +153,12 @@ int SerialApplyBQSR(AuxVar &aux) { recalibratedQuals[offset] = quantizedQualityScore; } + // fprintf(gf[4], "%s %d %ld ", bam_get_qname(sd.bw->b), sd.bw->b->core.flag, sd.rid); + // for (size_t si = 0; si < sd.read_len; ++si) { + // fprintf(gf[4], "%d ", (int)recalibratedQuals[si]); + // } + // fprintf(gf[4], "\n"); + if (sam_write1(nsgv::gOutBamFp, nsgv::gOutBamHeader, bw->b) < 0) { spdlog::error("failed writing sam record to \"{}\"", nsgv::gBqsrArg.OUTPUT_FILE.c_str()); sam_close(nsgv::gInBamFp); diff --git a/src/bqsr/recal_datum.h b/src/bqsr/recal_datum.h index 2339c25..04c06e6 100644 --- a/src/bqsr/recal_datum.h +++ b/src/bqsr/recal_datum.h @@ -159,6 +159,7 @@ struct RecalDatum { */ double getEmpiricalQuality() { return getEmpiricalQuality(getReportedQuality()); } inline void setEmpiricalQuality(double val) { empiricalQuality = val; } + inline void setEmpiricalQuality(int val) { empiricalQuality = val; } /** * Computes the empirical base quality (roughly (num errors)/(num observations)) from the counts stored in this datum. diff --git a/src/bqsr/recal_utils.h b/src/bqsr/recal_utils.h index 15ad673..febde7a 100644 --- a/src/bqsr/recal_utils.h +++ b/src/bqsr/recal_utils.h @@ -399,7 +399,8 @@ struct RecalUtils { int k2 = ReadGroupCovariate::RgToId[dats[i][0]]; int k1 = EventType::GetIndexForEventRep(dats[i][1][0]); auto& datum = recalTables.readGroupTable(k1, k2); - datum.setEmpiricalQuality(ReportUtil::ToDouble(dats[i][2])); + // datum.setEmpiricalQuality(ReportUtil::ToDouble(dats[i][2])); // 这个数值在GATK4里是需要重新计算的 + datum.setEmpiricalQuality(RecalDatum::UNINITIALIZED_EMPIRICAL_QUALITY); datum.setReportedQuality(ReportUtil::ToDouble(dats[i][3])); datum.setNumObservations(ReportUtil::ToUint64(dats[i][4])); datum.setNumMismatches(ReportUtil::ToDouble(dats[i][5])); @@ -425,7 +426,8 @@ struct RecalUtils { int k3 = ReportUtil::ToInt(dat[1]); int k1 = EventType::GetIndexForEventRep(dat[2][0]); auto& datum = recalTables.qualityScoreTable(k1, k2, k3); - datum.setEmpiricalQuality(ReportUtil::ToDouble(dat[3])); + // datum.setEmpiricalQuality(ReportUtil::ToDouble(dat[3])); // 这个数值在GATK4里是需要重新计算的 + datum.setEmpiricalQuality(RecalDatum::UNINITIALIZED_EMPIRICAL_QUALITY); datum.setReportedQuality((double)k3); datum.setNumObservations(ReportUtil::ToUint64(dat[4])); datum.setNumMismatches(ReportUtil::ToDouble(dat[5])); @@ -461,7 +463,8 @@ struct RecalUtils { } else { spdlog::error("Unknown CovariateName {}", dat[3]); } - dp->setEmpiricalQuality(ReportUtil::ToDouble(dat[5])); + // dp->setEmpiricalQuality(ReportUtil::ToDouble(dat[5])); // 这个数值在GATK4里是需要重新计算的 + dp->setEmpiricalQuality(RecalDatum::UNINITIALIZED_EMPIRICAL_QUALITY); // 这个数值在GATK4里是需要重新计算的 dp->setReportedQuality((double)k3); dp->setNumObservations(ReportUtil::ToUint64(dat[6])); dp->setNumMismatches(ReportUtil::ToDouble(dat[7]));