r361: improved mapq for short reads
This commit is contained in:
parent
3c91d652dd
commit
6a82a21dee
13
hit.c
13
hit.c
|
|
@ -105,12 +105,13 @@ void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r) // and compu
|
|||
if (ol > mask_level * min) {
|
||||
ri->parent = rp->parent;
|
||||
rp->subsc = rp->subsc > ri->score? rp->subsc : ri->score;
|
||||
if (ri->cnt >= rp->cnt) ++ri->n_sub;
|
||||
if (rp->p && ri->p)
|
||||
rp->p->dp_max2 = rp->p->dp_max2 > ri->p->dp_max? rp->p->dp_max2 : ri->p->dp_max;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (j == k) w[k++] = i, ri->parent = i;
|
||||
if (j == k) w[k++] = i, ri->parent = i, ri->n_sub = 0;
|
||||
}
|
||||
kfree(km, w);
|
||||
}
|
||||
|
|
@ -288,7 +289,7 @@ void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs_, mm_r
|
|||
}
|
||||
}
|
||||
|
||||
void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc)
|
||||
void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc, int n_rep_mini)
|
||||
{
|
||||
static const float q_coef = 30.0f;
|
||||
int i;
|
||||
|
|
@ -298,12 +299,16 @@ void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc)
|
|||
r->mapq = 0;
|
||||
} else if (r->parent == r->id) {
|
||||
int mapq, subsc;
|
||||
float pen_cm = r->cnt >= 10? 1.0f : 0.1f * r->cnt;
|
||||
float pen_cm = 1.0f;
|
||||
if (r->cnt < 10)
|
||||
pen_cm = 0.1f * r->cnt * (r->cnt > n_rep_mini? 1.0f : (1.0f + r->cnt) / (1.0f + n_rep_mini));
|
||||
subsc = r->subsc > min_chain_sc? r->subsc : min_chain_sc;
|
||||
if (r->p && r->p->dp_max2 > 0 && r->p->dp_max > 0) {
|
||||
float identity = (float)(r->p->blen - r->p->n_diff - r->p->n_ambi) / (r->p->blen - r->p->n_ambi);
|
||||
mapq = (int)(identity * pen_cm * q_coef * (1. - (float)r->p->dp_max2 * subsc / r->p->dp_max / r->score) * logf(r->score));
|
||||
float chain_ratio = subsc > r->score? (float)subsc / r->score : 1.0f;
|
||||
mapq = (int)(identity * pen_cm * q_coef * (1. - (float)chain_ratio * r->p->dp_max2 / r->p->dp_max) * logf(r->score));
|
||||
} else mapq = (int)(pen_cm * q_coef * (1. - (float)subsc / r->score) * logf(r->score));
|
||||
mapq -= (int)(4.343 * log(r->n_sub + 1) + .499);
|
||||
mapq = mapq > 0? mapq : 0;
|
||||
r->mapq = mapq < 60? mapq : 60;
|
||||
} else r->mapq = 0;
|
||||
|
|
|
|||
4
main.c
4
main.c
|
|
@ -6,7 +6,7 @@
|
|||
#include "mmpriv.h"
|
||||
#include "getopt.h"
|
||||
|
||||
#define MM_VERSION "2.1.1-r360-dirty"
|
||||
#define MM_VERSION "2.1.1-r361-dirty"
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/resource.h>
|
||||
|
|
@ -29,7 +29,7 @@ static struct option long_options[] = {
|
|||
{ "no-kalloc", no_argument, 0, 0 },
|
||||
{ "print-qname", no_argument, 0, 0 },
|
||||
{ "no-self", no_argument, 0, 0 },
|
||||
{ "print-seed", no_argument, 0, 0 },
|
||||
{ "print-seeds", no_argument, 0, 0 },
|
||||
{ "max-chain-skip", required_argument, 0, 0 },
|
||||
{ "min-dp-len", required_argument, 0, 0 },
|
||||
{ "print-aln-seq", no_argument, 0, 0 },
|
||||
|
|
|
|||
17
map.c
17
map.c
|
|
@ -107,7 +107,7 @@ static void mm_dust_minier(mm128_v *mini, int l_seq, const char *seq, int sdust_
|
|||
|
||||
mm_reg1_t *mm_map(const mm_idx_t *mi, int qlen, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname)
|
||||
{
|
||||
int i, n, j, n_u, max_gap_ref;
|
||||
int i, n, j, n_u, max_gap_ref, n_rep_mini = 0;
|
||||
int64_t n_a;
|
||||
uint64_t *u;
|
||||
mm_match_t *m;
|
||||
|
|
@ -141,7 +141,10 @@ mm_reg1_t *mm_map(const mm_idx_t *mi, int qlen, const char *seq, int *n_regs, mm
|
|||
mm_match_t *q = &m[i];
|
||||
const uint64_t *r = q->x.cr;
|
||||
int k, q_span = p->x & 0xff, is_tandem = 0;
|
||||
if (q->n >= opt->mid_occ) continue;
|
||||
if (q->n >= opt->mid_occ) {
|
||||
++n_rep_mini;
|
||||
continue;
|
||||
}
|
||||
if (i > 0 && p->x>>8 == b->mini.a[i - 1].x>>8) is_tandem = 1;
|
||||
if (i < n - 1 && p->x>>8 == b->mini.a[i + 1].x>>8) is_tandem = 1;
|
||||
for (k = 0; k < q->n; ++k) {
|
||||
|
|
@ -156,10 +159,10 @@ mm_reg1_t *mm_map(const mm_idx_t *mi, int qlen, const char *seq, int *n_regs, mm
|
|||
}
|
||||
p = &a[j++];
|
||||
if ((r[k]&1) == (q->qpos&1)) { // forward strand
|
||||
p->x = (r[k]&0xffffffff00000000ULL) | (uint32_t)r[k]>>1;
|
||||
p->x = (r[k]&0xffffffff00000000ULL) | rpos;
|
||||
p->y = (uint64_t)q_span << 32 | q->qpos >> 1;
|
||||
} else { // reverse strand
|
||||
p->x = 1ULL<<63 | (r[k]&0xffffffff00000000ULL) | (uint32_t)r[k]>>1;
|
||||
p->x = 1ULL<<63 | (r[k]&0xffffffff00000000ULL) | rpos;
|
||||
p->y = (uint64_t)q_span << 32 | (qlen - ((q->qpos>>1) + 1 - q_span) - 1);
|
||||
}
|
||||
if (is_tandem) p->y |= MM_SEED_TANDEM;
|
||||
|
|
@ -169,10 +172,12 @@ mm_reg1_t *mm_map(const mm_idx_t *mi, int qlen, const char *seq, int *n_regs, mm
|
|||
radix_sort_128x(a, a + n_a);
|
||||
kfree(b->km, m);
|
||||
|
||||
if (mm_dbg_flag & MM_DBG_PRINT_SEED)
|
||||
if (mm_dbg_flag & MM_DBG_PRINT_SEED) {
|
||||
fprintf(stderr, "RS\t%d\n", n_rep_mini);
|
||||
for (i = 0; i < n_a; ++i)
|
||||
fprintf(stderr, "SD\t%s\t%d\t%c\t%d\t%d\t%d\n", mi->seq[a[i].x<<1>>33].name, (int32_t)a[i].x, "+-"[a[i].x>>63], (int32_t)a[i].y, (int32_t)(a[i].y>>32&0xff),
|
||||
i == 0? 0 : ((int32_t)a[i].y - (int32_t)a[i-1].y) - ((int32_t)a[i].x - (int32_t)a[i-1].x));
|
||||
}
|
||||
|
||||
max_gap_ref = opt->max_gap_ref >= 0? opt->max_gap_ref : opt->max_gap;
|
||||
n_u = mm_chain_dp(max_gap_ref, opt->max_gap, opt->bw, opt->max_chain_skip, opt->min_cnt, opt->min_chain_score, !!(opt->flag&MM_F_SPLICE), n_a, a, &u, b->km);
|
||||
|
|
@ -199,7 +204,7 @@ mm_reg1_t *mm_map(const mm_idx_t *mi, int qlen, const char *seq, int *n_regs, mm
|
|||
mm_set_sam_pri(*n_regs, regs);
|
||||
}
|
||||
}
|
||||
mm_set_mapq(*n_regs, regs, opt->min_chain_score);
|
||||
mm_set_mapq(*n_regs, regs, opt->min_chain_score, n_rep_mini);
|
||||
|
||||
// free
|
||||
kfree(b->km, a);
|
||||
|
|
|
|||
|
|
@ -181,11 +181,11 @@ var sum_tot = 0, sum_err = 0, q_out = -1, sum_tot2 = 0, sum_err2 = 0;
|
|||
for (var q = max_mapq; q >= 0; --q) {
|
||||
if (tot[q] == 0) continue;
|
||||
if (q_out < 0 || err[q] > 0) {
|
||||
if (q_out >= 0) print('Q', q_out, sum_tot, sum_err, (sum_err2/sum_tot2).toFixed(9));
|
||||
if (q_out >= 0) print('Q', q_out, sum_tot, sum_err, (sum_err2/sum_tot2).toFixed(9), sum_tot2);
|
||||
sum_tot = sum_err = 0, q_out = q;
|
||||
}
|
||||
sum_tot += tot[q], sum_err += err[q];
|
||||
sum_tot2 += tot[q], sum_err2 += err[q];
|
||||
}
|
||||
print('Q', q_out, sum_tot, sum_err, (sum_err2/sum_tot2).toFixed(9));
|
||||
print('Q', q_out, sum_tot, sum_err, (sum_err2/sum_tot2).toFixed(9), sum_tot2);
|
||||
if (n_unmapped != null) print('U', n_unmapped);
|
||||
|
|
|
|||
2
mmpriv.h
2
mmpriv.h
|
|
@ -58,7 +58,7 @@ void mm_select_sub(void *km, float mask_level, float pri_ratio, int min_diff, in
|
|||
void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *regs);
|
||||
void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs, mm128_t *a);
|
||||
void mm_hit_sort_by_dp(void *km, int *n_regs, mm_reg1_t *r);
|
||||
void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc);
|
||||
void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc, int n_rep_mini);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue