解决了bug,现在apply bqsr结果一致了,原因是读取bqsr table的时候,并没有将table中的empiricalQuality赋值给Datum,是后续在计算质量分数的时候重新计算的empiricalQuality
This commit is contained in:
parent
94e06338cd
commit
022e70e1f3
|
|
@ -73,11 +73,13 @@ int SerialApplyBQSR(AuxVar &aux) {
|
|||
|
||||
// 二. 遍历每个bam(read)记录,进行处理
|
||||
for (int i = 0; i < bams.size(); ++i) {
|
||||
BamWrap *bw = bams[i];
|
||||
|
||||
if (bam_copy1(recalBams[i], bams[i]->b) == NULL) {
|
||||
spdlog::error("Copy bam error");
|
||||
exit(1);
|
||||
}
|
||||
BamWrap* bw = bams[i];
|
||||
// BamWrap* bw = bams[i];
|
||||
bw->b = recalBams[i]; // 注意这里的赋值,然后就可以对b进行更改了
|
||||
sd.init();
|
||||
sd.parseForApplyBQSR(bw);
|
||||
|
|
@ -151,6 +153,12 @@ int SerialApplyBQSR(AuxVar &aux) {
|
|||
recalibratedQuals[offset] = quantizedQualityScore;
|
||||
}
|
||||
|
||||
// fprintf(gf[4], "%s %d %ld ", bam_get_qname(sd.bw->b), sd.bw->b->core.flag, sd.rid);
|
||||
// for (size_t si = 0; si < sd.read_len; ++si) {
|
||||
// fprintf(gf[4], "%d ", (int)recalibratedQuals[si]);
|
||||
// }
|
||||
// fprintf(gf[4], "\n");
|
||||
|
||||
if (sam_write1(nsgv::gOutBamFp, nsgv::gOutBamHeader, bw->b) < 0) {
|
||||
spdlog::error("failed writing sam record to \"{}\"", nsgv::gBqsrArg.OUTPUT_FILE.c_str());
|
||||
sam_close(nsgv::gInBamFp);
|
||||
|
|
|
|||
|
|
@ -159,6 +159,7 @@ struct RecalDatum {
|
|||
*/
|
||||
double getEmpiricalQuality() { return getEmpiricalQuality(getReportedQuality()); }
|
||||
inline void setEmpiricalQuality(double val) { empiricalQuality = val; }
|
||||
inline void setEmpiricalQuality(int val) { empiricalQuality = val; }
|
||||
|
||||
/**
|
||||
* Computes the empirical base quality (roughly (num errors)/(num observations)) from the counts stored in this datum.
|
||||
|
|
|
|||
|
|
@ -399,7 +399,8 @@ struct RecalUtils {
|
|||
int k2 = ReadGroupCovariate::RgToId[dats[i][0]];
|
||||
int k1 = EventType::GetIndexForEventRep(dats[i][1][0]);
|
||||
auto& datum = recalTables.readGroupTable(k1, k2);
|
||||
datum.setEmpiricalQuality(ReportUtil::ToDouble(dats[i][2]));
|
||||
// datum.setEmpiricalQuality(ReportUtil::ToDouble(dats[i][2])); // 这个数值在GATK4里是需要重新计算的
|
||||
datum.setEmpiricalQuality(RecalDatum::UNINITIALIZED_EMPIRICAL_QUALITY);
|
||||
datum.setReportedQuality(ReportUtil::ToDouble(dats[i][3]));
|
||||
datum.setNumObservations(ReportUtil::ToUint64(dats[i][4]));
|
||||
datum.setNumMismatches(ReportUtil::ToDouble(dats[i][5]));
|
||||
|
|
@ -425,7 +426,8 @@ struct RecalUtils {
|
|||
int k3 = ReportUtil::ToInt(dat[1]);
|
||||
int k1 = EventType::GetIndexForEventRep(dat[2][0]);
|
||||
auto& datum = recalTables.qualityScoreTable(k1, k2, k3);
|
||||
datum.setEmpiricalQuality(ReportUtil::ToDouble(dat[3]));
|
||||
// datum.setEmpiricalQuality(ReportUtil::ToDouble(dat[3])); // 这个数值在GATK4里是需要重新计算的
|
||||
datum.setEmpiricalQuality(RecalDatum::UNINITIALIZED_EMPIRICAL_QUALITY);
|
||||
datum.setReportedQuality((double)k3);
|
||||
datum.setNumObservations(ReportUtil::ToUint64(dat[4]));
|
||||
datum.setNumMismatches(ReportUtil::ToDouble(dat[5]));
|
||||
|
|
@ -461,7 +463,8 @@ struct RecalUtils {
|
|||
} else {
|
||||
spdlog::error("Unknown CovariateName {}", dat[3]);
|
||||
}
|
||||
dp->setEmpiricalQuality(ReportUtil::ToDouble(dat[5]));
|
||||
// dp->setEmpiricalQuality(ReportUtil::ToDouble(dat[5])); // 这个数值在GATK4里是需要重新计算的
|
||||
dp->setEmpiricalQuality(RecalDatum::UNINITIALIZED_EMPIRICAL_QUALITY); // 这个数值在GATK4里是需要重新计算的
|
||||
dp->setReportedQuality((double)k3);
|
||||
dp->setNumObservations(ReportUtil::ToUint64(dat[6]));
|
||||
dp->setNumMismatches(ReportUtil::ToDouble(dat[7]));
|
||||
|
|
|
|||
Loading…
Reference in New Issue