解决了bug,现在apply bqsr结果一致了,原因是读取bqsr table的时候,并没有将table中的empiricalQuality赋值给Datum,是后续在计算质量分数的时候重新计算的empiricalQuality

This commit is contained in:
zzh 2026-01-04 16:27:50 +08:00
parent 94e06338cd
commit 022e70e1f3
3 changed files with 16 additions and 4 deletions

View File

@ -73,11 +73,13 @@ int SerialApplyBQSR(AuxVar &aux) {
// 二. 遍历每个bamread记录进行处理
for (int i = 0; i < bams.size(); ++i) {
BamWrap *bw = bams[i];
if (bam_copy1(recalBams[i], bams[i]->b) == NULL) {
spdlog::error("Copy bam error");
exit(1);
}
BamWrap* bw = bams[i];
// BamWrap* bw = bams[i];
bw->b = recalBams[i]; // 注意这里的赋值然后就可以对b进行更改了
sd.init();
sd.parseForApplyBQSR(bw);
@ -151,6 +153,12 @@ int SerialApplyBQSR(AuxVar &aux) {
recalibratedQuals[offset] = quantizedQualityScore;
}
// fprintf(gf[4], "%s %d %ld ", bam_get_qname(sd.bw->b), sd.bw->b->core.flag, sd.rid);
// for (size_t si = 0; si < sd.read_len; ++si) {
// fprintf(gf[4], "%d ", (int)recalibratedQuals[si]);
// }
// fprintf(gf[4], "\n");
if (sam_write1(nsgv::gOutBamFp, nsgv::gOutBamHeader, bw->b) < 0) {
spdlog::error("failed writing sam record to \"{}\"", nsgv::gBqsrArg.OUTPUT_FILE.c_str());
sam_close(nsgv::gInBamFp);

View File

@ -159,6 +159,7 @@ struct RecalDatum {
*/
double getEmpiricalQuality() { return getEmpiricalQuality(getReportedQuality()); }
inline void setEmpiricalQuality(double val) { empiricalQuality = val; }
inline void setEmpiricalQuality(int val) { empiricalQuality = val; }
/**
* Computes the empirical base quality (roughly (num errors)/(num observations)) from the counts stored in this datum.

View File

@ -399,7 +399,8 @@ struct RecalUtils {
int k2 = ReadGroupCovariate::RgToId[dats[i][0]];
int k1 = EventType::GetIndexForEventRep(dats[i][1][0]);
auto& datum = recalTables.readGroupTable(k1, k2);
datum.setEmpiricalQuality(ReportUtil::ToDouble(dats[i][2]));
// datum.setEmpiricalQuality(ReportUtil::ToDouble(dats[i][2])); // 这个数值在GATK4里是需要重新计算的
datum.setEmpiricalQuality(RecalDatum::UNINITIALIZED_EMPIRICAL_QUALITY);
datum.setReportedQuality(ReportUtil::ToDouble(dats[i][3]));
datum.setNumObservations(ReportUtil::ToUint64(dats[i][4]));
datum.setNumMismatches(ReportUtil::ToDouble(dats[i][5]));
@ -425,7 +426,8 @@ struct RecalUtils {
int k3 = ReportUtil::ToInt(dat[1]);
int k1 = EventType::GetIndexForEventRep(dat[2][0]);
auto& datum = recalTables.qualityScoreTable(k1, k2, k3);
datum.setEmpiricalQuality(ReportUtil::ToDouble(dat[3]));
// datum.setEmpiricalQuality(ReportUtil::ToDouble(dat[3])); // 这个数值在GATK4里是需要重新计算的
datum.setEmpiricalQuality(RecalDatum::UNINITIALIZED_EMPIRICAL_QUALITY);
datum.setReportedQuality((double)k3);
datum.setNumObservations(ReportUtil::ToUint64(dat[4]));
datum.setNumMismatches(ReportUtil::ToDouble(dat[5]));
@ -461,7 +463,8 @@ struct RecalUtils {
} else {
spdlog::error("Unknown CovariateName {}", dat[3]);
}
dp->setEmpiricalQuality(ReportUtil::ToDouble(dat[5]));
// dp->setEmpiricalQuality(ReportUtil::ToDouble(dat[5])); // 这个数值在GATK4里是需要重新计算的
dp->setEmpiricalQuality(RecalDatum::UNINITIALIZED_EMPIRICAL_QUALITY); // 这个数值在GATK4里是需要重新计算的
dp->setReportedQuality((double)k3);
dp->setNumObservations(ReportUtil::ToUint64(dat[6]));
dp->setNumMismatches(ReportUtil::ToDouble(dat[7]));