r206: mapq penalize short chains further

The old code penalized at the log() scale. Now added a linear-scaled factor. If
the chain consists of few minimizers, its quality is really not good.
This commit is contained in:
Heng Li 2017-07-26 11:50:04 -04:00
parent e9dc1ce2b6
commit a01d758af6
2 changed files with 5 additions and 3 deletions

6
hit.c
View File

@ -290,16 +290,18 @@ void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs_, mm_r
void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc)
{
static const float q_coef = 30.0f;
int i;
for (i = 0; i < n_regs; ++i) {
mm_reg1_t *r = &regs[i];
if (r->parent == r->id) {
int mapq, subsc;
float pen_cm = r->cnt >= 10? 1.0f : 0.1f * r->cnt;
subsc = r->subsc > min_chain_sc? r->subsc : min_chain_sc;
if (r->p && r->p->dp_max2 > 0 && r->p->dp_max > 0) {
float identity = (float)(r->p->blen - r->p->n_diff - r->p->n_ambi) / (r->p->blen - r->p->n_ambi);
mapq = (int)(identity * 30.0 * (1. - (float)r->p->dp_max2 * subsc / r->p->dp_max / r->score) * logf(r->score));
} else mapq = (int)(30.0 * (1. - (float)subsc / r->score) * logf(r->score));
mapq = (int)(identity * pen_cm * q_coef * (1. - (float)r->p->dp_max2 * subsc / r->p->dp_max / r->score) * logf(r->score));
} else mapq = (int)(pen_cm * q_coef * (1. - (float)subsc / r->score) * logf(r->score));
mapq = mapq > 0? mapq : 0;
r->mapq = mapq < 60? mapq : 60;
} else r->mapq = 0;

2
main.c
View File

@ -8,7 +8,7 @@
#include "minimap.h"
#include "mmpriv.h"
#define MM_VERSION "2.0-r205-dirty"
#define MM_VERSION "2.0-r206-dirty"
void liftrlimit()
{