diff --git a/hit.c b/hit.c index 5a3b3c7..69261a9 100644 --- a/hit.c +++ b/hit.c @@ -134,13 +134,13 @@ void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs) // keep mm_reg1_t::{id, kfree(km, tmp); } -void mm_select_sub(void *km, float mask_level, float pri_ratio, int *n_, mm_reg1_t *r) +void mm_select_sub(void *km, float mask_level, float pri_ratio, int best_n, int *n_, mm_reg1_t *r) { if (pri_ratio > 0.0f && *n_ > 0) { - int i, k, n = *n_; + int i, k, n = *n_, n_2nd = 0; for (i = k = 0; i < n; ++i) - if (r[i].parent == i || r[i].score >= r[r[i].parent].score * pri_ratio) - r[k++] = r[i]; + if (r[i].parent == i) r[k++] = r[i]; + else if (r[i].score >= r[r[i].parent].score * pri_ratio && n_2nd++ < best_n) r[k++] = r[i]; else if (r[i].p) free(r[i].p); if (k != n) mm_sync_regs(km, k, r); // removing hits requires sync() *n_ = k; @@ -192,9 +192,9 @@ int mm_squeeze_a(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a) return as; } -void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int n_regs, mm_reg1_t *regs, mm128_t *a) +void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs_, mm_reg1_t *regs, mm128_t *a) { - int i, n_aux; + int i, n_aux, n_regs = *n_regs_, n_drop = 0; uint64_t *aux; if (n_regs < 2) return; // nothing to join @@ -228,8 +228,21 @@ void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int n_regs, mm_reg r0->cnt += r1->cnt, r0->score += r1->score; mm_reg_set_coor(r0, qlen, a); r1->cnt = 0; + r1->parent = r0->id; + ++n_drop; } kfree(km, aux); + + if (n_drop > 0) { // then fix the hits hierarchy + for (i = 0; i < n_regs; ++i) { // adjust the mm_reg1_t::parent + mm_reg1_t *r = ®s[i]; + if (r->parent >= 0 && r->id != r->parent) { // fix for secondary hits only + if (regs[r->parent].parent >= 0 && regs[r->parent].parent != r->parent) + r->parent = regs[r->parent].parent; + } + } + mm_filter_regs(km, opt, n_regs_, regs); + } } void mm_set_mapq(int n_regs, mm_reg1_t *regs) diff --git a/main.c b/main.c index 6804548..95fcaa8 100644 --- a/main.c +++ b/main.c @@ -10,7 +10,7 @@ #include "minimap.h" #include "mmpriv.h" -#define MM_VERSION "2.0-r141-pre" +#define MM_VERSION "2.0-r142-pre" void liftrlimit() { @@ -68,7 +68,7 @@ int main(int argc, char *argv[]) mm_realtime0 = realtime(); mm_mapopt_init(&opt); - while ((c = getopt_long(argc, argv, "aw:k:t:r:f:Vv:g:I:d:ST:s:x:Hcp:M:n:z:A:B:O:E:m:D:", long_options, &long_idx)) >= 0) { + while ((c = getopt_long(argc, argv, "aw:k:t:r:f:Vv:g:I:d:ST:s:x:Hcp:M:n:z:A:B:O:E:m:D:N:", long_options, &long_idx)) >= 0) { if (c == 'w') w = atoi(optarg); else if (c == 'k') k = atoi(optarg); else if (c == 'H') is_hpc = 1; @@ -78,6 +78,7 @@ int main(int argc, char *argv[]) else if (c == 't') n_threads = atoi(optarg); else if (c == 'v') mm_verbose = atoi(optarg); else if (c == 'g') opt.max_gap = atoi(optarg); + else if (c == 'N') opt.best_n = atoi(optarg); else if (c == 'p') opt.pri_ratio = atof(optarg); else if (c == 'D') opt.min_seedcov_ratio = atof(optarg); else if (c == 'M') opt.mask_level = atof(optarg); diff --git a/map.c b/map.c index 53c85db..bb6a2c9 100644 --- a/map.c +++ b/map.c @@ -22,8 +22,9 @@ void mm_mapopt_init(mm_mapopt_t *opt) opt->max_chain_skip = 15; opt->min_seedcov_ratio = 0.0f; - opt->pri_ratio = 2.0f; opt->mask_level = 0.5f; + opt->pri_ratio = 2.0f; + opt->best_n = 5; opt->max_join_long = 20000; opt->max_join_short = 2000; @@ -240,14 +241,14 @@ mm_reg1_t *mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b, *n_regs = n_u; if (!(opt->flag & MM_F_AVA)) { // don't choose primary mapping(s) for read overlap mm_set_parent(b->km, opt->mask_level, *n_regs, regs); - mm_select_sub(b->km, opt->mask_level, opt->pri_ratio, n_regs, regs); - mm_join_long(b->km, opt, qlen, *n_regs, regs, a); // TODO: this can be applied to all-vs-all in principle + mm_select_sub(b->km, opt->mask_level, opt->pri_ratio, opt->best_n, n_regs, regs); + mm_join_long(b->km, opt, qlen, n_regs, regs, a); // TODO: this can be applied to all-vs-all in principle } if (opt->flag & MM_F_CIGAR) { regs = mm_align_skeleton(b->km, opt, mi, qlen, seq, n_regs, regs, a); // this calls mm_filter_regs() if (!(opt->flag & MM_F_AVA)) { mm_update_parent(b->km, opt->mask_level, *n_regs, regs); - mm_select_sub(b->km, opt->mask_level, opt->pri_ratio, n_regs, regs); + mm_select_sub(b->km, opt->mask_level, opt->pri_ratio, opt->best_n, n_regs, regs); } } else mm_filter_regs(b->km, opt, n_regs, regs); mm_set_mapq(*n_regs, regs); diff --git a/minimap.h b/minimap.h index 51e99e7..af0cd00 100644 --- a/minimap.h +++ b/minimap.h @@ -77,8 +77,9 @@ typedef struct { int min_chain_score; float min_seedcov_ratio; - float pri_ratio; float mask_level; + float pri_ratio; + int best_n; int max_join_long, max_join_short; int min_join_flank_sc; diff --git a/mmpriv.h b/mmpriv.h index 62d088a..e706ece 100644 --- a/mmpriv.h +++ b/mmpriv.h @@ -41,9 +41,9 @@ void mm_split_reg(mm_reg1_t *r, mm_reg1_t *r2, int n, int qlen, mm128_t *a); void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs); void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r); void mm_update_parent(void *km, float mask_level, int n, mm_reg1_t *r); -void mm_select_sub(void *km, float mask_level, float pri_ratio, int *n_, mm_reg1_t *r); +void mm_select_sub(void *km, float mask_level, float pri_ratio, int best_n, int *n_, mm_reg1_t *r); void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *regs); -void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int n_regs, mm_reg1_t *regs, mm128_t *a); +void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs, mm128_t *a); void mm_set_mapq(int n_regs, mm_reg1_t *regs); #ifdef __cplusplus