support RMQ

This commit is contained in:
Heng Li 2021-05-03 09:27:04 -04:00
parent b7f4d8a0f4
commit bbb4f97e52
4 changed files with 39 additions and 24 deletions

3
main.c
View File

@ -71,6 +71,7 @@ static ko_longopt_t long_options[] = {
{ "alt", ko_required_argument, 344 },
{ "alt-drop", ko_required_argument, 345 },
{ "mask-len", ko_required_argument, 346 },
{ "rmq", ko_optional_argument, 347 },
{ "help", ko_no_argument, 'h' },
{ "max-intron-len", ko_required_argument, 'G' },
{ "version", ko_no_argument, 'V' },
@ -241,6 +242,8 @@ int main(int argc, char *argv[])
yes_or_no(&opt, MM_F_HEAP_SORT, o.longidx, o.arg, 1);
} else if (c == 326) { // --dual
yes_or_no(&opt, MM_F_NO_DUAL, o.longidx, o.arg, 0);
} else if (c == 347) { // --rmq
yes_or_no(&opt, MM_F_RMQ, o.longidx, o.arg, 1);
} else if (c == 'S') {
opt.flag |= MM_F_OUT_CS | MM_F_CIGAR | MM_F_OUT_CS_LONG;
if (mm_verbose >= 2)

11
map.c
View File

@ -271,10 +271,15 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
if (max_chain_gap_ref < opt->max_gap) max_chain_gap_ref = opt->max_gap;
} else max_chain_gap_ref = opt->max_gap;
a = mg_lchain_dp(max_chain_gap_ref, max_chain_gap_qry, opt->bw, opt->max_chain_skip, opt->max_chain_iter, opt->min_cnt, opt->min_chain_score,
opt->chain_gap_scale * 0.2f, 0.0f, is_splice, n_segs, n_a, a, &n_regs0, &u, b->km);
if (opt->flag & MM_F_RMQ) {
a = mg_lchain_rmq(opt->max_gap, opt->rmq_inner_dist, opt->bw, opt->max_chain_skip, opt->rmq_size_cap, opt->min_cnt, opt->min_chain_score,
opt->chain_gap_scale * 0.2f, 0.0f, n_a, a, &n_regs0, &u, b->km);
} else {
a = mg_lchain_dp(max_chain_gap_ref, max_chain_gap_qry, opt->bw, opt->max_chain_skip, opt->max_chain_iter, opt->min_cnt, opt->min_chain_score,
opt->chain_gap_scale * 0.2f, 0.0f, is_splice, n_segs, n_a, a, &n_regs0, &u, b->km);
}
if (opt->max_occ > opt->mid_occ && rep_len > 0) {
if (opt->max_occ > opt->mid_occ && rep_len > 0 && !(opt->flag & MM_F_RMQ)) {
int rechain = 0;
if (n_regs0 > 0) { // test if the best chain has all the segments
int n_chained_segs = 1, max = 0, max_i = -1, max_off = -1, off = 0;

View File

@ -36,6 +36,7 @@
#define MM_F_NO_END_FLT 0x10000000
#define MM_F_HARD_MLEVEL 0x20000000
#define MM_F_SAM_HIT_ONLY 0x40000000
#define MM_F_RMQ 0x80000000LL
#define MM_I_HPC 0x1
#define MM_I_NO_SEQ 0x2
@ -120,6 +121,7 @@ typedef struct {
int min_cnt; // min number of minimizers on each chain
int min_chain_score; // min chaining score
float chain_gap_scale;
int rmq_size_cap, rmq_inner_dist;
float mask_level;
int mask_len;

View File

@ -22,13 +22,15 @@ void mm_mapopt_init(mm_mapopt_t *opt)
opt->min_cnt = 3;
opt->min_chain_score = 40;
opt->bw = 500;
opt->max_gap = 5000;
opt->max_gap = 10000;
opt->max_gap_ref = -1;
opt->max_chain_skip = 25;
opt->max_chain_iter = 5000;
opt->rmq_inner_dist = 1000;
opt->rmq_size_cap = 100000;
opt->chain_gap_scale = 1.0f;
opt->max_max_occ = 4095;
opt->occ_dist = 0;
opt->occ_dist = 500;
opt->mask_level = 0.5f;
opt->mask_len = INT_MAX;
@ -51,6 +53,7 @@ void mm_mapopt_init(mm_mapopt_t *opt)
opt->anchor_ext_len = 20, opt->anchor_ext_shift = 6;
opt->max_clip_ratio = 1.0f;
opt->mini_batch_size = 500000000;
opt->max_sw_mat = 100000000;
opt->pe_ori = 0; // FF
opt->pe_bonus = 33;
@ -81,19 +84,18 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
if (preset == 0) {
mm_idxopt_init(io);
mm_mapopt_init(mo);
} else if (strcmp(preset, "map-ont") == 0) { // this is the same as the default
} else if (strcmp(preset, "ava-ont") == 0) {
io->flag = 0, io->k = 15, io->w = 5;
mo->flag |= MM_F_ALL_CHAINS | MM_F_NO_DIAG | MM_F_NO_DUAL | MM_F_NO_LJOIN;
mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25;
mo->bw = 2000;
} else if (strcmp(preset, "map10k") == 0 || strcmp(preset, "map-pb") == 0) {
io->flag |= MM_I_HPC, io->k = 19;
} else if (strcmp(preset, "ava-pb") == 0) {
io->flag |= MM_I_HPC, io->k = 19, io->w = 5;
mo->flag |= MM_F_ALL_CHAINS | MM_F_NO_DIAG | MM_F_NO_DUAL | MM_F_NO_LJOIN;
mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25;
} else if (strcmp(preset, "map10k") == 0 || strcmp(preset, "map-pb") == 0) {
io->flag |= MM_I_HPC, io->k = 19;
} else if (strcmp(preset, "map-ont") == 0) {
io->flag = 0, io->k = 15;
} else if (strcmp(preset, "map-hifi") == 0 || strcmp(preset, "map-ccs") == 0) {
io->flag = 0, io->k = 19, io->w = 19;
mo->a = 1, mo->b = 4, mo->q = 6, mo->q2 = 26, mo->e = 2, mo->e2 = 1;
@ -101,24 +103,21 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
mo->occ_dist = 500;
mo->min_mid_occ = 100, mo->max_mid_occ = 500;
mo->min_dp_max = 200;
} else if (strcmp(preset, "asm5") == 0) {
} else if (strncmp(preset, "asm", 3) == 0) {
io->flag = 0, io->k = 19, io->w = 19;
mo->a = 1, mo->b = 19, mo->q = 39, mo->q2 = 81, mo->e = 3, mo->e2 = 1, mo->zdrop = mo->zdrop_inv = 200;
mo->min_mid_occ = 100;
mo->min_dp_max = 200;
mo->best_n = 50;
} else if (strcmp(preset, "asm10") == 0) {
io->flag = 0, io->k = 19, io->w = 19;
mo->a = 1, mo->b = 9, mo->q = 16, mo->q2 = 41, mo->e = 2, mo->e2 = 1, mo->zdrop = mo->zdrop_inv = 200;
mo->min_mid_occ = 100;
mo->min_dp_max = 200;
mo->best_n = 50;
} else if (strcmp(preset, "asm20") == 0) {
io->flag = 0, io->k = 19, io->w = 10;
mo->a = 1, mo->b = 4, mo->q = 6, mo->q2 = 26, mo->e = 2, mo->e2 = 1, mo->zdrop = mo->zdrop_inv = 200;
mo->min_mid_occ = 100;
mo->bw = 100000;
mo->flag |= MM_F_RMQ | MM_F_NO_LJOIN;
mo->min_mid_occ = 100, mo->max_mid_occ = 500;
mo->min_dp_max = 200;
mo->best_n = 50;
if (strcmp(preset, "asm5") == 0) {
mo->a = 1, mo->b = 19, mo->q = 39, mo->q2 = 81, mo->e = 3, mo->e2 = 1, mo->zdrop = mo->zdrop_inv = 200;
} else if (strcmp(preset, "asm10") == 0) {
mo->a = 1, mo->b = 9, mo->q = 16, mo->q2 = 41, mo->e = 2, mo->e2 = 1, mo->zdrop = mo->zdrop_inv = 200;
} else if (strcmp(preset, "asm20") == 0) {
mo->a = 1, mo->b = 4, mo->q = 6, mo->q2 = 26, mo->e = 2, mo->e2 = 1, mo->zdrop = mo->zdrop_inv = 200;
io->w = 10;
} else return -1;
} else if (strcmp(preset, "short") == 0 || strcmp(preset, "sr") == 0) {
io->flag = 0, io->k = 21, io->w = 11;
mo->flag |= MM_F_SR | MM_F_FRAG_MODE | MM_F_NO_PRINT_2ND | MM_F_2_IO_THREADS | MM_F_HEAP_SORT;
@ -140,6 +139,7 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
} else if (strncmp(preset, "splice", 6) == 0 || strcmp(preset, "cdna") == 0) {
io->flag = 0, io->k = 15, io->w = 5;
mo->flag |= MM_F_SPLICE | MM_F_SPLICE_FOR | MM_F_SPLICE_REV | MM_F_SPLICE_FLANK;
mo->max_sw_mat = 0;
mo->max_gap = 2000, mo->max_gap_ref = mo->bw = 200000;
mo->a = 1, mo->b = 2, mo->q = 2, mo->e = 1, mo->q2 = 32, mo->e2 = 0;
mo->noncan = 9;
@ -153,6 +153,11 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
int mm_check_opt(const mm_idxopt_t *io, const mm_mapopt_t *mo)
{
if ((mo->flag & MM_F_RMQ) && (mo->flag & (MM_F_SR|MM_F_SPLICE))) {
if (mm_verbose >= 1)
fprintf(stderr, "[ERROR]\033[1;31m --rmq doesn't work with --sr or --splice\033[0m\n");
return -7;
}
if (mo->split_prefix && (mo->flag & (MM_F_OUT_CS|MM_F_OUT_MD))) {
if (mm_verbose >= 1)
fprintf(stderr, "[ERROR]\033[1;31m --cs or --MD doesn't work with --split-prefix\033[0m\n");