r542: tuning mapQ calculation

This commit is contained in:
Heng Li 2017-10-31 14:25:09 -04:00
parent 7f11f4c4d4
commit fb8a1b5536
4 changed files with 24 additions and 11 deletions

23
hit.c
View File

@ -442,7 +442,7 @@ void mm_seg_free(void *km, int n_segs, mm_seg_t *segs)
kfree(km, segs);
}
void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc, int match_sc, int rep_len)
void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc, int match_sc, int rep_len, int is_sr)
{
static const float q_coef = 40.0f;
int i;
@ -458,10 +458,23 @@ void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc, int match_sc, in
subsc = r->subsc > min_chain_sc? r->subsc : min_chain_sc;
if (r->p && r->p->dp_max2 > 0 && r->p->dp_max > 0) {
float identity = (float)r->mlen / r->blen;
int mapq_alt = (int)(6.02f * identity * identity * (r->p->dp_max - r->p->dp_max2) / match_sc + .499f); // BWA-MEM like mapQ, mostly for short reads
mapq = (int)(identity * pen_cm * q_coef * (1. - (float)r->p->dp_max2 * subsc / r->p->dp_max / r->score) * logf(r->score)); // more for long reads
mapq = mapq < mapq_alt? mapq : mapq_alt; // in case the long-read heuristic fails
} else mapq = (int)(pen_cm * q_coef * (1. - (float)subsc / r->score) * logf(r->score));
float x = (float)r->p->dp_max2 * subsc / r->p->dp_max / r->score;
if (is_sr) {
mapq = (int)(identity * pen_cm * q_coef * (1.0f - x * x) * logf((float)r->p->dp_max / match_sc));
} else {
int mapq_alt = (int)(6.02f * identity * identity * (r->p->dp_max - r->p->dp_max2) / match_sc + .499f); // BWA-MEM like mapQ, mostly for short reads
mapq = (int)(identity * pen_cm * q_coef * (1.0f - x) * logf(r->score)); // more for long reads
mapq = mapq < mapq_alt? mapq : mapq_alt; // in case the long-read heuristic fails
}
} else {
float x = (float)subsc / r->score;
if (is_sr && r->p) {
float identity = (float)r->mlen / r->blen;
mapq = (int)(identity * pen_cm * q_coef * (1.0f - x) * logf((float)r->p->dp_max / match_sc));
} else {
mapq = (int)(pen_cm * q_coef * (1.0f - x) * logf(r->score));
}
}
mapq -= (int)(4.343f * logf(r->n_sub + 1) + .499f);
mapq = mapq > 0? mapq : 0;
r->mapq = mapq < 60? mapq : 60;

2
main.c
View File

@ -6,7 +6,7 @@
#include "mmpriv.h"
#include "getopt.h"
#define MM_VERSION "2.3-r540-dirty"
#define MM_VERSION "2.3-r542-dirty"
#ifdef __linux__
#include <sys/resource.h>

8
map.c
View File

@ -270,7 +270,7 @@ static mm_reg1_t *align_regs(const mm_mapopt_t *opt, const mm_idx_t *mi, void *k
void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **seqs, const char **quals, int *n_regs, mm_reg1_t **regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname)
{
int i, j, rep_len, qlen_sum, n_regs0;
int max_chain_gap_qry, max_chain_gap_ref, is_splice = !!(opt->flag & MM_F_SPLICE);
int max_chain_gap_qry, max_chain_gap_ref, is_splice = !!(opt->flag & MM_F_SPLICE), is_sr = !!(opt->flag & MM_F_SR);
uint32_t hash;
int64_t n_a;
uint64_t *u;
@ -298,7 +298,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
}
// set max chaining gap on the query and the reference sequence
if (opt->flag & MM_F_SR)
if (is_sr)
max_chain_gap_qry = qlen_sum > opt->max_gap? qlen_sum : opt->max_gap;
else max_chain_gap_qry = opt->max_gap;
if (opt->max_gap_ref > 0) {
@ -345,7 +345,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
if (n_segs == 1) { // uni-segment
regs0 = align_regs(opt, mi, b->km, qlens[0], seqs[0], quals? quals[0] : 0, &n_regs0, regs0, a);
mm_set_mapq(n_regs0, regs0, opt->min_chain_score, opt->a, rep_len);
mm_set_mapq(n_regs0, regs0, opt->min_chain_score, opt->a, rep_len, is_sr);
n_regs[0] = n_regs0, regs[0] = regs0;
} else { // multi-segment
mm_seg_t *seg;
@ -354,7 +354,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
for (i = 0; i < n_segs; ++i) {
mm_set_parent(b->km, opt->mask_level, n_regs[i], regs[i], opt->a * 2 + opt->b); // update mm_reg1_t::parent
regs[i] = align_regs(opt, mi, b->km, qlens[i], seqs[i], quals? quals[i] : 0, &n_regs[i], regs[i], seg[i].a);
mm_set_mapq(n_regs[i], regs[i], opt->min_chain_score, opt->a, rep_len);
mm_set_mapq(n_regs[i], regs[i], opt->min_chain_score, opt->a, rep_len, is_sr);
}
mm_seg_free(b->km, n_segs, seg);
if (n_segs == 2 && opt->pe_ori >= 0 && (opt->flag&MM_F_CIGAR))

View File

@ -78,7 +78,7 @@ void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *re
void mm_filter_by_identity(void *km, int n_regs, mm_reg1_t *regs, float min_iden, int qlen, const char *qual);
void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs, mm128_t *a);
void mm_hit_sort_by_dp(void *km, int *n_regs, mm_reg1_t *r);
void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc, int match_sc, int rep_len);
void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc, int match_sc, int rep_len, int is_sr);
mm_seg_t *mm_seg_gen(void *km, uint32_t hash, int n_segs, const int *qlens, int n_regs0, const mm_reg1_t *regs0, int *n_regs, mm_reg1_t **regs, const mm128_t *a);
void mm_seg_free(void *km, int n_segs, mm_seg_t *segs);