r1049: removed the long-join heuristics
This commit is contained in:
parent
4f91558160
commit
379728726a
58
hit.c
58
hit.c
|
|
@ -312,64 +312,6 @@ int mm_squeeze_a(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a)
|
|||
return as;
|
||||
}
|
||||
|
||||
void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs_, mm_reg1_t *regs, mm128_t *a)
|
||||
{
|
||||
int i, n_aux, n_regs = *n_regs_, n_drop = 0;
|
||||
uint64_t *aux;
|
||||
|
||||
if (n_regs < 2) return; // nothing to join
|
||||
mm_squeeze_a(km, n_regs, regs, a);
|
||||
|
||||
aux = (uint64_t*)kmalloc(km, n_regs * 8);
|
||||
for (i = n_aux = 0; i < n_regs; ++i)
|
||||
if (regs[i].parent == i || regs[i].parent < 0)
|
||||
aux[n_aux++] = (uint64_t)regs[i].as << 32 | i;
|
||||
radix_sort_64(aux, aux + n_aux);
|
||||
|
||||
for (i = n_aux - 1; i >= 1; --i) {
|
||||
mm_reg1_t *r0 = ®s[(int32_t)aux[i-1]], *r1 = ®s[(int32_t)aux[i]];
|
||||
mm128_t *a0e, *a1s;
|
||||
int max_gap, min_gap, sc_thres, min_flank_len;
|
||||
|
||||
// test
|
||||
if (r0->as + r0->cnt != r1->as) continue; // not adjacent in a[]
|
||||
if (r0->rid != r1->rid || r0->rev != r1->rev) continue; // make sure on the same target and strand
|
||||
a0e = &a[r0->as + r0->cnt - 1];
|
||||
a1s = &a[r1->as];
|
||||
if (a1s->x <= a0e->x || (int32_t)a1s->y <= (int32_t)a0e->y) continue; // keep colinearity
|
||||
max_gap = min_gap = (int32_t)a1s->y - (int32_t)a0e->y;
|
||||
max_gap = a0e->x + max_gap > a1s->x? max_gap : a1s->x - a0e->x;
|
||||
min_gap = a0e->x + min_gap < a1s->x? min_gap : a1s->x - a0e->x;
|
||||
if (max_gap > opt->max_join_long || min_gap > opt->max_join_short) continue;
|
||||
sc_thres = (int)((float)opt->min_join_flank_sc / opt->max_join_long * max_gap + .499);
|
||||
if (r0->score < sc_thres || r1->score < sc_thres) continue; // require good flanking chains
|
||||
min_flank_len = (int)(max_gap * opt->min_join_flank_ratio);
|
||||
if (r0->re - r0->rs < min_flank_len || r0->qe - r0->qs < min_flank_len) continue; // require enough flanking length
|
||||
if (r1->re - r1->rs < min_flank_len || r1->qe - r1->qs < min_flank_len) continue;
|
||||
|
||||
// all conditions satisfied; join
|
||||
a[r1->as].y |= MM_SEED_LONG_JOIN;
|
||||
r0->cnt += r1->cnt, r0->score += r1->score;
|
||||
mm_reg_set_coor(r0, qlen, a);
|
||||
r1->cnt = 0;
|
||||
r1->parent = r0->id;
|
||||
++n_drop;
|
||||
}
|
||||
kfree(km, aux);
|
||||
|
||||
if (n_drop > 0) { // then fix the hits hierarchy
|
||||
for (i = 0; i < n_regs; ++i) { // adjust the mm_reg1_t::parent
|
||||
mm_reg1_t *r = ®s[i];
|
||||
if (r->parent >= 0 && r->id != r->parent) { // fix for secondary hits only
|
||||
if (regs[r->parent].parent >= 0 && regs[r->parent].parent != r->parent)
|
||||
r->parent = regs[r->parent].parent;
|
||||
}
|
||||
}
|
||||
mm_filter_regs(opt, qlen, n_regs_, regs);
|
||||
mm_sync_regs(km, *n_regs_, regs);
|
||||
}
|
||||
}
|
||||
|
||||
mm_seg_t *mm_seg_gen(void *km, uint32_t hash, int n_segs, const int *qlens, int n_regs0, const mm_reg1_t *regs0, int *n_regs, mm_reg1_t **regs, const mm128_t *a)
|
||||
{
|
||||
int s, i, j, acc_qlen[MM_MAX_SEG+1], qlen_sum = 0;
|
||||
|
|
|
|||
7
main.c
7
main.c
|
|
@ -7,7 +7,7 @@
|
|||
#include "mmpriv.h"
|
||||
#include "ketopt.h"
|
||||
|
||||
#define MM_VERSION "2.18-r1048-dirty"
|
||||
#define MM_VERSION "2.18-r1049-dirty"
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/resource.h>
|
||||
|
|
@ -205,7 +205,6 @@ int main(int argc, char *argv[])
|
|||
else if (c == 327) opt.max_clip_ratio = atof(o.arg); // --max-clip-ratio
|
||||
else if (c == 328) opt.min_mid_occ = atoi(o.arg); // --min-occ-floor
|
||||
else if (c == 329) opt.flag |= MM_F_OUT_MD; // --MD
|
||||
else if (c == 330) opt.min_join_flank_ratio = atof(o.arg); // --lj-min-ratio
|
||||
else if (c == 331) opt.sc_ambi = atoi(o.arg); // --score-N
|
||||
else if (c == 332) opt.flag |= MM_F_EQX; // --eqx
|
||||
else if (c == 333) opt.flag |= MM_F_PAF_NO_HIT; // --paf-no-hit
|
||||
|
|
@ -221,7 +220,9 @@ int main(int argc, char *argv[])
|
|||
else if (c == 344) alt_list = o.arg; // --alt
|
||||
else if (c == 345) opt.alt_drop = atof(o.arg); // --alt-drop
|
||||
else if (c == 346) opt.mask_len = mm_parse_num(o.arg); // --mask-len
|
||||
else if (c == 314) { // --frag
|
||||
else if (c == 330) {
|
||||
fprintf(stderr, "[WARNING] \033[1;31m --lj-min-ratio has been deprecated.\033[0m\n");
|
||||
} else if (c == 314) { // --frag
|
||||
yes_or_no(&opt, MM_F_FRAG_MODE, o.longidx, o.arg, 1);
|
||||
} else if (c == 315) { // --secondary
|
||||
yes_or_no(&opt, MM_F_NO_PRINT_2ND, o.longidx, o.arg, 0);
|
||||
|
|
|
|||
4
map.c
4
map.c
|
|
@ -210,8 +210,6 @@ static void chain_post(const mm_mapopt_t *opt, int max_chain_gap_ref, const mm_i
|
|||
mm_set_parent(km, opt->mask_level, opt->mask_len, *n_regs, regs, opt->a * 2 + opt->b, opt->flag&MM_F_HARD_MLEVEL, opt->alt_drop);
|
||||
if (n_segs <= 1) mm_select_sub(km, opt->pri_ratio, mi->k*2, opt->best_n, n_regs, regs);
|
||||
else mm_select_sub_multi(km, opt->pri_ratio, 0.2f, 0.7f, max_chain_gap_ref, mi->k*2, opt->best_n, n_segs, qlens, n_regs, regs);
|
||||
if (!(opt->flag & (MM_F_SPLICE|MM_F_SR|MM_F_NO_LJOIN))) // long join not working well without primary chains
|
||||
mm_join_long(km, opt, qlen, n_regs, regs, a);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -302,7 +300,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
|
|||
a = mg_lchain_dp(max_chain_gap_ref, max_chain_gap_qry, opt->bw, opt->max_chain_skip, opt->max_chain_iter, opt->min_cnt, opt->min_chain_score,
|
||||
opt->chain_gap_scale * 0.01 * mi->k, 0.0f, is_splice, n_segs, n_a, a, &n_regs0, &u, b->km);
|
||||
}
|
||||
} else if (opt->bw_long > opt->bw && !(opt->flag & MM_F_RMQ) && n_segs == 1 && n_regs0 > 1) {
|
||||
} else if (opt->bw_long > opt->bw && !(opt->flag & (MM_F_RMQ|MM_F_NO_LJOIN)) && n_segs == 1 && n_regs0 > 1) {
|
||||
int32_t st = (int32_t)a[0].y, en = (int32_t)a[(int32_t)u[0] - 1].y;
|
||||
if (qlen_sum - (en - st) > 1000 || en - st > qlen_sum * .1) {
|
||||
int32_t i;
|
||||
|
|
|
|||
|
|
@ -128,10 +128,6 @@ typedef struct {
|
|||
float pri_ratio;
|
||||
int best_n; // top best_n chains are subjected to DP alignment
|
||||
|
||||
int max_join_long, max_join_short;
|
||||
int min_join_flank_sc;
|
||||
float min_join_flank_ratio;
|
||||
|
||||
float alt_drop;
|
||||
|
||||
int a, b, q, e, q2, e2; // matching score, mismatch, gap-open and gap-ext penalties
|
||||
|
|
|
|||
1
mmpriv.h
1
mmpriv.h
|
|
@ -91,7 +91,6 @@ void mm_set_parent(void *km, float mask_level, int mask_len, int n, mm_reg1_t *r
|
|||
void mm_select_sub(void *km, float pri_ratio, int min_diff, int best_n, int *n_, mm_reg1_t *r);
|
||||
void mm_select_sub_multi(void *km, float pri_ratio, float pri1, float pri2, int max_gap_ref, int min_diff, int best_n, int n_segs, const int *qlens, int *n_, mm_reg1_t *r);
|
||||
void mm_filter_regs(const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs);
|
||||
void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs, mm128_t *a);
|
||||
void mm_hit_sort(void *km, int *n_regs, mm_reg1_t *r, float alt_diff_frac);
|
||||
void mm_set_mapq(void *km, int n_regs, mm_reg1_t *regs, int min_chain_sc, int match_sc, int rep_len, int is_sr);
|
||||
|
||||
|
|
|
|||
|
|
@ -38,11 +38,6 @@ void mm_mapopt_init(mm_mapopt_t *opt)
|
|||
opt->pri_ratio = 0.8f;
|
||||
opt->best_n = 5;
|
||||
|
||||
opt->max_join_long = 20000;
|
||||
opt->max_join_short = 2000;
|
||||
opt->min_join_flank_sc = 1000;
|
||||
opt->min_join_flank_ratio = 0.5f;
|
||||
|
||||
opt->alt_drop = 0.15f;
|
||||
|
||||
opt->a = 2, opt->b = 4, opt->q = 4, opt->e = 2, opt->q2 = 24, opt->e2 = 1;
|
||||
|
|
|
|||
|
|
@ -30,10 +30,6 @@ cdef extern from "minimap.h":
|
|||
float pri_ratio
|
||||
int best_n
|
||||
|
||||
int max_join_long, max_join_short
|
||||
int min_join_flank_sc
|
||||
float min_join_flank_ratio
|
||||
|
||||
float alt_drop
|
||||
|
||||
int a, b, q, e, q2, e2
|
||||
|
|
|
|||
Loading…
Reference in New Issue