r542: tuning mapQ calculation
This commit is contained in:
parent
7f11f4c4d4
commit
fb8a1b5536
23
hit.c
23
hit.c
|
|
@ -442,7 +442,7 @@ void mm_seg_free(void *km, int n_segs, mm_seg_t *segs)
|
|||
kfree(km, segs);
|
||||
}
|
||||
|
||||
void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc, int match_sc, int rep_len)
|
||||
void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc, int match_sc, int rep_len, int is_sr)
|
||||
{
|
||||
static const float q_coef = 40.0f;
|
||||
int i;
|
||||
|
|
@ -458,10 +458,23 @@ void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc, int match_sc, in
|
|||
subsc = r->subsc > min_chain_sc? r->subsc : min_chain_sc;
|
||||
if (r->p && r->p->dp_max2 > 0 && r->p->dp_max > 0) {
|
||||
float identity = (float)r->mlen / r->blen;
|
||||
int mapq_alt = (int)(6.02f * identity * identity * (r->p->dp_max - r->p->dp_max2) / match_sc + .499f); // BWA-MEM like mapQ, mostly for short reads
|
||||
mapq = (int)(identity * pen_cm * q_coef * (1. - (float)r->p->dp_max2 * subsc / r->p->dp_max / r->score) * logf(r->score)); // more for long reads
|
||||
mapq = mapq < mapq_alt? mapq : mapq_alt; // in case the long-read heuristic fails
|
||||
} else mapq = (int)(pen_cm * q_coef * (1. - (float)subsc / r->score) * logf(r->score));
|
||||
float x = (float)r->p->dp_max2 * subsc / r->p->dp_max / r->score;
|
||||
if (is_sr) {
|
||||
mapq = (int)(identity * pen_cm * q_coef * (1.0f - x * x) * logf((float)r->p->dp_max / match_sc));
|
||||
} else {
|
||||
int mapq_alt = (int)(6.02f * identity * identity * (r->p->dp_max - r->p->dp_max2) / match_sc + .499f); // BWA-MEM like mapQ, mostly for short reads
|
||||
mapq = (int)(identity * pen_cm * q_coef * (1.0f - x) * logf(r->score)); // more for long reads
|
||||
mapq = mapq < mapq_alt? mapq : mapq_alt; // in case the long-read heuristic fails
|
||||
}
|
||||
} else {
|
||||
float x = (float)subsc / r->score;
|
||||
if (is_sr && r->p) {
|
||||
float identity = (float)r->mlen / r->blen;
|
||||
mapq = (int)(identity * pen_cm * q_coef * (1.0f - x) * logf((float)r->p->dp_max / match_sc));
|
||||
} else {
|
||||
mapq = (int)(pen_cm * q_coef * (1.0f - x) * logf(r->score));
|
||||
}
|
||||
}
|
||||
mapq -= (int)(4.343f * logf(r->n_sub + 1) + .499f);
|
||||
mapq = mapq > 0? mapq : 0;
|
||||
r->mapq = mapq < 60? mapq : 60;
|
||||
|
|
|
|||
2
main.c
2
main.c
|
|
@ -6,7 +6,7 @@
|
|||
#include "mmpriv.h"
|
||||
#include "getopt.h"
|
||||
|
||||
#define MM_VERSION "2.3-r540-dirty"
|
||||
#define MM_VERSION "2.3-r542-dirty"
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/resource.h>
|
||||
|
|
|
|||
8
map.c
8
map.c
|
|
@ -270,7 +270,7 @@ static mm_reg1_t *align_regs(const mm_mapopt_t *opt, const mm_idx_t *mi, void *k
|
|||
void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **seqs, const char **quals, int *n_regs, mm_reg1_t **regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname)
|
||||
{
|
||||
int i, j, rep_len, qlen_sum, n_regs0;
|
||||
int max_chain_gap_qry, max_chain_gap_ref, is_splice = !!(opt->flag & MM_F_SPLICE);
|
||||
int max_chain_gap_qry, max_chain_gap_ref, is_splice = !!(opt->flag & MM_F_SPLICE), is_sr = !!(opt->flag & MM_F_SR);
|
||||
uint32_t hash;
|
||||
int64_t n_a;
|
||||
uint64_t *u;
|
||||
|
|
@ -298,7 +298,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
|
|||
}
|
||||
|
||||
// set max chaining gap on the query and the reference sequence
|
||||
if (opt->flag & MM_F_SR)
|
||||
if (is_sr)
|
||||
max_chain_gap_qry = qlen_sum > opt->max_gap? qlen_sum : opt->max_gap;
|
||||
else max_chain_gap_qry = opt->max_gap;
|
||||
if (opt->max_gap_ref > 0) {
|
||||
|
|
@ -345,7 +345,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
|
|||
|
||||
if (n_segs == 1) { // uni-segment
|
||||
regs0 = align_regs(opt, mi, b->km, qlens[0], seqs[0], quals? quals[0] : 0, &n_regs0, regs0, a);
|
||||
mm_set_mapq(n_regs0, regs0, opt->min_chain_score, opt->a, rep_len);
|
||||
mm_set_mapq(n_regs0, regs0, opt->min_chain_score, opt->a, rep_len, is_sr);
|
||||
n_regs[0] = n_regs0, regs[0] = regs0;
|
||||
} else { // multi-segment
|
||||
mm_seg_t *seg;
|
||||
|
|
@ -354,7 +354,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
|
|||
for (i = 0; i < n_segs; ++i) {
|
||||
mm_set_parent(b->km, opt->mask_level, n_regs[i], regs[i], opt->a * 2 + opt->b); // update mm_reg1_t::parent
|
||||
regs[i] = align_regs(opt, mi, b->km, qlens[i], seqs[i], quals? quals[i] : 0, &n_regs[i], regs[i], seg[i].a);
|
||||
mm_set_mapq(n_regs[i], regs[i], opt->min_chain_score, opt->a, rep_len);
|
||||
mm_set_mapq(n_regs[i], regs[i], opt->min_chain_score, opt->a, rep_len, is_sr);
|
||||
}
|
||||
mm_seg_free(b->km, n_segs, seg);
|
||||
if (n_segs == 2 && opt->pe_ori >= 0 && (opt->flag&MM_F_CIGAR))
|
||||
|
|
|
|||
2
mmpriv.h
2
mmpriv.h
|
|
@ -78,7 +78,7 @@ void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *re
|
|||
void mm_filter_by_identity(void *km, int n_regs, mm_reg1_t *regs, float min_iden, int qlen, const char *qual);
|
||||
void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs, mm128_t *a);
|
||||
void mm_hit_sort_by_dp(void *km, int *n_regs, mm_reg1_t *r);
|
||||
void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc, int match_sc, int rep_len);
|
||||
void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc, int match_sc, int rep_len, int is_sr);
|
||||
|
||||
mm_seg_t *mm_seg_gen(void *km, uint32_t hash, int n_segs, const int *qlens, int n_regs0, const mm_reg1_t *regs0, int *n_regs, mm_reg1_t **regs, const mm128_t *a);
|
||||
void mm_seg_free(void *km, int n_segs, mm_seg_t *segs);
|
||||
|
|
|
|||
Loading…
Reference in New Issue