解决了bug,现在apply bqsr结果一致了,原因是读取bqsr table的时候,并没有将table中的empiricalQuality赋值给Datum,是后续在计算质量分数的时候重新计算的empiricalQuality

This commit is contained in:
zzh 2026-01-04 16:27:50 +08:00
parent 94e06338cd
commit 022e70e1f3
3 changed files with 16 additions and 4 deletions

View File

@ -73,11 +73,13 @@ int SerialApplyBQSR(AuxVar &aux) {
// 二. 遍历每个bamread记录进行处理 // 二. 遍历每个bamread记录进行处理
for (int i = 0; i < bams.size(); ++i) { for (int i = 0; i < bams.size(); ++i) {
BamWrap *bw = bams[i];
if (bam_copy1(recalBams[i], bams[i]->b) == NULL) { if (bam_copy1(recalBams[i], bams[i]->b) == NULL) {
spdlog::error("Copy bam error"); spdlog::error("Copy bam error");
exit(1); exit(1);
} }
BamWrap* bw = bams[i]; // BamWrap* bw = bams[i];
bw->b = recalBams[i]; // 注意这里的赋值然后就可以对b进行更改了 bw->b = recalBams[i]; // 注意这里的赋值然后就可以对b进行更改了
sd.init(); sd.init();
sd.parseForApplyBQSR(bw); sd.parseForApplyBQSR(bw);
@ -151,6 +153,12 @@ int SerialApplyBQSR(AuxVar &aux) {
recalibratedQuals[offset] = quantizedQualityScore; recalibratedQuals[offset] = quantizedQualityScore;
} }
// fprintf(gf[4], "%s %d %ld ", bam_get_qname(sd.bw->b), sd.bw->b->core.flag, sd.rid);
// for (size_t si = 0; si < sd.read_len; ++si) {
// fprintf(gf[4], "%d ", (int)recalibratedQuals[si]);
// }
// fprintf(gf[4], "\n");
if (sam_write1(nsgv::gOutBamFp, nsgv::gOutBamHeader, bw->b) < 0) { if (sam_write1(nsgv::gOutBamFp, nsgv::gOutBamHeader, bw->b) < 0) {
spdlog::error("failed writing sam record to \"{}\"", nsgv::gBqsrArg.OUTPUT_FILE.c_str()); spdlog::error("failed writing sam record to \"{}\"", nsgv::gBqsrArg.OUTPUT_FILE.c_str());
sam_close(nsgv::gInBamFp); sam_close(nsgv::gInBamFp);

View File

@ -159,6 +159,7 @@ struct RecalDatum {
*/ */
double getEmpiricalQuality() { return getEmpiricalQuality(getReportedQuality()); } double getEmpiricalQuality() { return getEmpiricalQuality(getReportedQuality()); }
inline void setEmpiricalQuality(double val) { empiricalQuality = val; } inline void setEmpiricalQuality(double val) { empiricalQuality = val; }
inline void setEmpiricalQuality(int val) { empiricalQuality = val; }
/** /**
* Computes the empirical base quality (roughly (num errors)/(num observations)) from the counts stored in this datum. * Computes the empirical base quality (roughly (num errors)/(num observations)) from the counts stored in this datum.

View File

@ -399,7 +399,8 @@ struct RecalUtils {
int k2 = ReadGroupCovariate::RgToId[dats[i][0]]; int k2 = ReadGroupCovariate::RgToId[dats[i][0]];
int k1 = EventType::GetIndexForEventRep(dats[i][1][0]); int k1 = EventType::GetIndexForEventRep(dats[i][1][0]);
auto& datum = recalTables.readGroupTable(k1, k2); auto& datum = recalTables.readGroupTable(k1, k2);
datum.setEmpiricalQuality(ReportUtil::ToDouble(dats[i][2])); // datum.setEmpiricalQuality(ReportUtil::ToDouble(dats[i][2])); // 这个数值在GATK4里是需要重新计算的
datum.setEmpiricalQuality(RecalDatum::UNINITIALIZED_EMPIRICAL_QUALITY);
datum.setReportedQuality(ReportUtil::ToDouble(dats[i][3])); datum.setReportedQuality(ReportUtil::ToDouble(dats[i][3]));
datum.setNumObservations(ReportUtil::ToUint64(dats[i][4])); datum.setNumObservations(ReportUtil::ToUint64(dats[i][4]));
datum.setNumMismatches(ReportUtil::ToDouble(dats[i][5])); datum.setNumMismatches(ReportUtil::ToDouble(dats[i][5]));
@ -425,7 +426,8 @@ struct RecalUtils {
int k3 = ReportUtil::ToInt(dat[1]); int k3 = ReportUtil::ToInt(dat[1]);
int k1 = EventType::GetIndexForEventRep(dat[2][0]); int k1 = EventType::GetIndexForEventRep(dat[2][0]);
auto& datum = recalTables.qualityScoreTable(k1, k2, k3); auto& datum = recalTables.qualityScoreTable(k1, k2, k3);
datum.setEmpiricalQuality(ReportUtil::ToDouble(dat[3])); // datum.setEmpiricalQuality(ReportUtil::ToDouble(dat[3])); // 这个数值在GATK4里是需要重新计算的
datum.setEmpiricalQuality(RecalDatum::UNINITIALIZED_EMPIRICAL_QUALITY);
datum.setReportedQuality((double)k3); datum.setReportedQuality((double)k3);
datum.setNumObservations(ReportUtil::ToUint64(dat[4])); datum.setNumObservations(ReportUtil::ToUint64(dat[4]));
datum.setNumMismatches(ReportUtil::ToDouble(dat[5])); datum.setNumMismatches(ReportUtil::ToDouble(dat[5]));
@ -461,7 +463,8 @@ struct RecalUtils {
} else { } else {
spdlog::error("Unknown CovariateName {}", dat[3]); spdlog::error("Unknown CovariateName {}", dat[3]);
} }
dp->setEmpiricalQuality(ReportUtil::ToDouble(dat[5])); // dp->setEmpiricalQuality(ReportUtil::ToDouble(dat[5])); // 这个数值在GATK4里是需要重新计算的
dp->setEmpiricalQuality(RecalDatum::UNINITIALIZED_EMPIRICAL_QUALITY); // 这个数值在GATK4里是需要重新计算的
dp->setReportedQuality((double)k3); dp->setReportedQuality((double)k3);
dp->setNumObservations(ReportUtil::ToUint64(dat[6])); dp->setNumObservations(ReportUtil::ToUint64(dat[6]));
dp->setNumMismatches(ReportUtil::ToDouble(dat[7])); dp->setNumMismatches(ReportUtil::ToDouble(dat[7]));