From a01d758af61ce225f69fad684ba50de2c8ca2203 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Wed, 26 Jul 2017 11:50:04 -0400 Subject: [PATCH] r206: mapq penalize short chains further The old code penalized at the log() scale. Now added a linear-scaled factor. If the chain consists of few minimizers, its quality is really not good. --- hit.c | 6 ++++-- main.c | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/hit.c b/hit.c index 477dedb..ba6bb76 100644 --- a/hit.c +++ b/hit.c @@ -290,16 +290,18 @@ void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs_, mm_r void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc) { + static const float q_coef = 30.0f; int i; for (i = 0; i < n_regs; ++i) { mm_reg1_t *r = ®s[i]; if (r->parent == r->id) { int mapq, subsc; + float pen_cm = r->cnt >= 10? 1.0f : 0.1f * r->cnt; subsc = r->subsc > min_chain_sc? r->subsc : min_chain_sc; if (r->p && r->p->dp_max2 > 0 && r->p->dp_max > 0) { float identity = (float)(r->p->blen - r->p->n_diff - r->p->n_ambi) / (r->p->blen - r->p->n_ambi); - mapq = (int)(identity * 30.0 * (1. - (float)r->p->dp_max2 * subsc / r->p->dp_max / r->score) * logf(r->score)); - } else mapq = (int)(30.0 * (1. - (float)subsc / r->score) * logf(r->score)); + mapq = (int)(identity * pen_cm * q_coef * (1. - (float)r->p->dp_max2 * subsc / r->p->dp_max / r->score) * logf(r->score)); + } else mapq = (int)(pen_cm * q_coef * (1. - (float)subsc / r->score) * logf(r->score)); mapq = mapq > 0? mapq : 0; r->mapq = mapq < 60? mapq : 60; } else r->mapq = 0; diff --git a/main.c b/main.c index 175d085..00cd5d5 100644 --- a/main.c +++ b/main.c @@ -8,7 +8,7 @@ #include "minimap.h" #include "mmpriv.h" -#define MM_VERSION "2.0-r205-dirty" +#define MM_VERSION "2.0-r206-dirty" void liftrlimit() {