diff --git a/Makefile b/Makefile index be7c931..67289d3 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,9 @@ CC= gcc -CFLAGS= -g -Wall -O2 -Wc++-compat -Wno-unused-function +CFLAGS= -g -Wall -O2 -Wc++-compat CPPFLAGS= -DHAVE_KALLOC INCLUDES= -I. -OBJS= kalloc.o kthread.o misc.o bseq.o sketch.o chain.o align.o hit.o sdust.o \ - index.o format.o map.o ksw2_extz2_sse.o +OBJS= kthread.o kalloc.o ksw2_extz2_sse.o misc.o bseq.o sketch.o sdust.o \ + index.o chain.o align.o hit.o map.o format.o PROG= minimap2 PROG_EXTRA= sdust LIBS= -lm -lz -lpthread diff --git a/align.c b/align.c index d6b7dc5..674701b 100644 --- a/align.c +++ b/align.c @@ -239,11 +239,8 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int r->p->dp_score += ez->max; re1 = rs + (ez->max_t + 1); qe1 = qs + (ez->max_q + 1); - if (r->cnt - (j + 1) >= opt->min_cnt) { + if (r->cnt - (j + 1) >= opt->min_cnt) mm_split_reg(r, r2, j + 1, qlen, a); - if (j + 1 < opt->min_cnt) - r2->id = r->id, r->id = -1; - } break; } else r->p->dp_score += ez->score; rs = re, qs = qe; @@ -308,5 +305,6 @@ mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *m kfree(km, qseq0[0]); kfree(km, qseq0[1]); kfree(km, ez.cigar); mm_filter_regs(km, opt, n_regs_, regs); + mm_hit_sort_by_dp(km, n_regs_, regs); return regs; } diff --git a/hit.c b/hit.c index 5a3b3c7..ebc0f68 100644 --- a/hit.c +++ b/hit.c @@ -3,6 +3,22 @@ #include "mmpriv.h" #include "kalloc.h" +static inline void mm_reg_set_coor(mm_reg1_t *r, int32_t qlen, const mm128_t *a) +{ + int32_t k = r->as, q_span = (int32_t)(a[k].y>>32&0xff); + r->rev = a[k].x>>63; + r->rid = a[k].x<<1>>33; + r->rs = (int32_t)a[k].x + 1 > q_span? (int32_t)a[k].x + 1 - q_span : 0; // NB: target span may be shorter, so this test is necessary + r->re = (int32_t)a[k + r->cnt - 1].x + 1; + if (!r->rev) { + r->qs = (int32_t)a[k].y + 1 - q_span; + r->qe = (int32_t)a[k + r->cnt - 1].y + 1; + } else { + r->qs = qlen - ((int32_t)a[k + r->cnt - 1].y + 1); + r->qe = qlen - ((int32_t)a[k].y + 1 - q_span); + } +} + mm_reg1_t *mm_gen_regs(void *km, int qlen, int n_u, uint64_t *u, mm128_t *a) // convert chains to hits { mm128_t *z, tmp; @@ -59,60 +75,62 @@ void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r) // and compu { int i, j, k, *w; if (n <= 0) return; + for (i = 0; i < n; ++i) r[i].id = i; w = (int*)kmalloc(km, n * sizeof(int)); w[0] = 0, r[0].parent = 0; for (i = 1, k = 1; i < n; ++i) { - int si = r[i].qs, ei = r[i].qe; + mm_reg1_t *ri = &r[i]; + int si = ri->qs, ei = ri->qe; for (j = 0; j < k; ++j) { - int sj = r[w[j]].qs, ej = r[w[j]].qe; + mm_reg1_t *rp = &r[w[j]]; + int sj = rp->qs, ej = rp->qe; int min = ej - sj < ei - si? ej - sj : ei - si; int ol = si < sj? (ei < sj? 0 : ei < ej? ei - sj : ej - sj) : (ej < si? 0 : ej < ei? ej - si : ei - si); if (ol > mask_level * min) { - r[i].parent = r[w[j]].parent; - if (r[w[j]].subsc < r[i].score) - r[w[j]].subsc = r[i].score; + ri->parent = rp->parent; + rp->subsc = rp->subsc > ri->score? rp->subsc : ri->score; + if (rp->p && ri->p) + rp->p->dp_max2 = rp->p->dp_max2 > ri->p->dp_max? rp->p->dp_max2 : ri->p->dp_max; break; } } - if (j == k) w[k++] = i, r[i].parent = i; + if (j == k) w[k++] = i, ri->parent = i; } kfree(km, w); } -void mm_update_parent(void *km, float mask_level, int n, mm_reg1_t *r) // due to changes to r.{qs,qe} after DP extension +void mm_hit_sort_by_dp(void *km, int *n_regs, mm_reg1_t *r) { - int i, j, k, *w, n_pri = 0; - if (n <= 0) return; - for (i = 0; i < n; ++i) - if (r[i].id == r[i].parent) ++n_pri; - if (n_pri <= 1) return; - w = (int*)kmalloc(km, n_pri * sizeof(int)); - for (i = j = 0; i < n; ++i) // find the first primary - if (r[i].id == r[i].parent) break; - for (w[0] = i, i = i + 1, k = 1; i < n; ++i) { - int si = r[i].qs, ei = r[i].qe; - if (r[i].id != r[i].parent) continue; // only check primary - for (j = 0; j < k; ++j) { - int sj = r[w[j]].qs, ej = r[w[j]].qe; - int min = ej - sj < ei - si? ej - sj : ei - si; - int ol = si < sj? (ei < sj? 0 : ei < ej? ei - sj : ej - sj) : (ej < si? 0 : ej < ei? ej - si : ei - si); - if (ol > mask_level * min) { - r[i].parent = r[w[j]].parent; - if (r[w[j]].subsc < r[i].score) - r[w[j]].subsc = r[i].score; - break; - } + int32_t i, n_aux, n = *n_regs; + uint64_t *aux; + mm_reg1_t *t; + + if (n <= 1) return; + aux = (uint64_t*)kmalloc(km, n * 8); + t = (mm_reg1_t*)kmalloc(km, n * sizeof(mm_reg1_t)); + for (i = n_aux = 0; i < n; ++i) { + if (r[i].cnt > 0) { // squeeze out elements with cnt==0 (soft deleted) + assert(r[i].p); + aux[n_aux++] = (uint64_t)r[i].p->dp_max << 32 | i; + } else if (r[i].p) { + kfree(km, r[i].p); + r[i].p = 0; } - if (j == k) w[k++] = i; } - kfree(km, w); + radix_sort_64(aux, aux + n_aux); + for (i = n_aux - 1; i >= 0; --i) + t[n_aux - 1 - i] = r[(int32_t)aux[i]]; + memcpy(r, t, sizeof(mm_reg1_t) * n_aux); + *n_regs = n_aux; + kfree(km, aux); + kfree(km, t); } void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs) // keep mm_reg1_t::{id,parent} in sync; also reset id { int *tmp, i, max_id = -1, n_tmp, n_pri; if (n_regs <= 0) return; - for (i = 0; i < n_regs; ++i) + for (i = 0; i < n_regs; ++i) // NB: doesn't work if mm_reg1_t::id is negative max_id = max_id > regs[i].id? max_id : regs[i].id; n_tmp = max_id + 1; tmp = (int*)kmalloc(km, n_tmp * sizeof(int)); @@ -134,13 +152,13 @@ void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs) // keep mm_reg1_t::{id, kfree(km, tmp); } -void mm_select_sub(void *km, float mask_level, float pri_ratio, int *n_, mm_reg1_t *r) +void mm_select_sub(void *km, float mask_level, float pri_ratio, int best_n, int *n_, mm_reg1_t *r) { if (pri_ratio > 0.0f && *n_ > 0) { - int i, k, n = *n_; + int i, k, n = *n_, n_2nd = 0; for (i = k = 0; i < n; ++i) - if (r[i].parent == i || r[i].score >= r[r[i].parent].score * pri_ratio) - r[k++] = r[i]; + if (r[i].parent == i) r[k++] = r[i]; + else if (r[i].score >= r[r[i].parent].score * pri_ratio && n_2nd++ < best_n) r[k++] = r[i]; else if (r[i].p) free(r[i].p); if (k != n) mm_sync_regs(km, k, r); // removing hits requires sync() *n_ = k; @@ -148,7 +166,7 @@ void mm_select_sub(void *km, float mask_level, float pri_ratio, int *n_, mm_reg1 } void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *regs) -{ +{ // NB: after this call, mm_reg1_t::parent can be -1 if its parent filtered out int i, k; for (i = k = 0; i < *n_regs; ++i) { mm_reg1_t *r = ®s[i]; @@ -169,7 +187,6 @@ void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *re } } *n_regs = k; - mm_sync_regs(km, k, regs); } int mm_squeeze_a(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a) @@ -192,9 +209,9 @@ int mm_squeeze_a(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a) return as; } -void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int n_regs, mm_reg1_t *regs, mm128_t *a) +void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs_, mm_reg1_t *regs, mm128_t *a) { - int i, n_aux; + int i, n_aux, n_regs = *n_regs_, n_drop = 0; uint64_t *aux; if (n_regs < 2) return; // nothing to join @@ -228,8 +245,22 @@ void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int n_regs, mm_reg r0->cnt += r1->cnt, r0->score += r1->score; mm_reg_set_coor(r0, qlen, a); r1->cnt = 0; + r1->parent = r0->id; + ++n_drop; } kfree(km, aux); + + if (n_drop > 0) { // then fix the hits hierarchy + for (i = 0; i < n_regs; ++i) { // adjust the mm_reg1_t::parent + mm_reg1_t *r = ®s[i]; + if (r->parent >= 0 && r->id != r->parent) { // fix for secondary hits only + if (regs[r->parent].parent >= 0 && regs[r->parent].parent != r->parent) + r->parent = regs[r->parent].parent; + } + } + mm_filter_regs(km, opt, n_regs_, regs); + mm_sync_regs(km, *n_regs_, regs); + } } void mm_set_mapq(int n_regs, mm_reg1_t *regs) @@ -239,7 +270,9 @@ void mm_set_mapq(int n_regs, mm_reg1_t *regs) mm_reg1_t *r = ®s[i]; if (r->parent == r->id) { int mapq; - mapq = (int)(30.0 * (1. - (float)r->subsc / r->score) * logf(r->score)); + if (r->p && r->p->dp_max2 > 0 && r->p->dp_max > 0) + mapq = (int)(30.0 * (1. - (float)(r->p->dp_max2 * r->subsc) / (r->p->dp_max * r->score)) * logf(r->score)); + else mapq = (int)(30.0 * (1. - (float)r->subsc / r->score) * logf(r->score)); mapq = mapq > 0? mapq : 0; r->mapq = mapq < 60? mapq : 60; } else r->mapq = 0; diff --git a/main.c b/main.c index 6804548..eda7d6f 100644 --- a/main.c +++ b/main.c @@ -10,7 +10,7 @@ #include "minimap.h" #include "mmpriv.h" -#define MM_VERSION "2.0-r141-pre" +#define MM_VERSION "2.0-r151-pre" void liftrlimit() { @@ -45,6 +45,8 @@ static struct option long_options[] = { { "bucket-bits", required_argument, 0, 0 }, { "mb-size", required_argument, 0, 0 }, { "int-rname", no_argument, 0, 0 }, + { "no-kalloc", no_argument, 0, 0 }, + { "print-qname", no_argument, 0, 0 }, { "version", no_argument, 0, 'V' }, { "min-count", required_argument, 0, 'n' }, { "min-chain-score",required_argument, 0, 'm' }, @@ -68,7 +70,7 @@ int main(int argc, char *argv[]) mm_realtime0 = realtime(); mm_mapopt_init(&opt); - while ((c = getopt_long(argc, argv, "aw:k:t:r:f:Vv:g:I:d:ST:s:x:Hcp:M:n:z:A:B:O:E:m:D:", long_options, &long_idx)) >= 0) { + while ((c = getopt_long(argc, argv, "aw:k:t:r:f:Vv:g:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:D:N:", long_options, &long_idx)) >= 0) { if (c == 'w') w = atoi(optarg); else if (c == 'k') k = atoi(optarg); else if (c == 'H') is_hpc = 1; @@ -78,11 +80,12 @@ int main(int argc, char *argv[]) else if (c == 't') n_threads = atoi(optarg); else if (c == 'v') mm_verbose = atoi(optarg); else if (c == 'g') opt.max_gap = atoi(optarg); + else if (c == 'N') opt.best_n = atoi(optarg); else if (c == 'p') opt.pri_ratio = atof(optarg); else if (c == 'D') opt.min_seedcov_ratio = atof(optarg); else if (c == 'M') opt.mask_level = atof(optarg); else if (c == 'c') opt.flag |= MM_F_CIGAR; - else if (c == 'S') opt.flag |= MM_F_AVA | MM_F_NO_SELF; + else if (c == 'X') opt.flag |= MM_F_AVA | MM_F_NO_SELF; else if (c == 'a') opt.flag |= MM_F_OUT_SAM | MM_F_CIGAR; else if (c == 'T') opt.sdust_thres = atoi(optarg); else if (c == 'n') opt.min_cnt = atoi(optarg); @@ -93,8 +96,10 @@ int main(int argc, char *argv[]) else if (c == 'E') opt.e = atoi(optarg); else if (c == 'z') opt.zdrop = atoi(optarg); else if (c == 's') opt.min_dp_max = atoi(optarg); - else if (c == 0 && long_idx == 0) bucket_bits = atoi(optarg); // bucket-bits - else if (c == 0 && long_idx == 2) keep_name = 0; // int-rname + else if (c == 0 && long_idx == 0) bucket_bits = atoi(optarg); // --bucket-bits + else if (c == 0 && long_idx == 2) keep_name = 0; // --int-rname + else if (c == 0 && long_idx == 3) mm_dbg_flag |= MM_DBG_NO_KALLOC; // --no-kalloc + else if (c == 0 && long_idx == 4) mm_dbg_flag |= MM_DBG_PRINT_QNAME; // --print-qname else if (c == 'V') { puts(MM_VERSION); return 0; @@ -140,11 +145,12 @@ int main(int argc, char *argv[]) fprintf(stderr, " -n INT minimal number of minimizers on a chain [%d]\n", opt.min_cnt); fprintf(stderr, " -m INT minimal chaining score (matching bases minus log gap penalty) [%d]\n", opt.min_chain_score); // fprintf(stderr, " -T INT SDUST threshold; 0 to disable SDUST [%d]\n", opt.sdust_thres); // TODO: this option is never used; might be buggy - fprintf(stderr, " -S skip self and dual mappings (for the all-vs-all mode)\n"); + fprintf(stderr, " -X skip self and dual mappings (for the all-vs-all mode)\n"); fprintf(stderr, " -p FLOAT min secondary-to-primary score ratio [%g]\n", opt.pri_ratio); + fprintf(stderr, " -N INT retain at most INT secondary alignments [%d]\n", opt.best_n); fprintf(stderr, " -D FLOAT min fraction of minimizer matches [%g]\n", opt.min_seedcov_ratio); fprintf(stderr, " -x STR preset (recommended to be applied before other options) []\n"); - fprintf(stderr, " ava10k: -Hk19 -Sw5 -p0 -m100 -D.05 (PacBio/ONT all-vs-all read mapping)\n"); + fprintf(stderr, " ava10k: -Hk19 -w5 -Xp0 -m100 -D.05 (PacBio/ONT all-vs-all read mapping)\n"); fprintf(stderr, " map10k: -Hk19 (PacBio/ONT vs reference mapping)\n"); fprintf(stderr, " asm1m: -k19 -w19 (intra-species assembly to ref mapping)\n"); fprintf(stderr, " Alignment:\n"); @@ -160,7 +166,7 @@ int main(int argc, char *argv[]) fprintf(stderr, " -t INT number of threads [%d]\n", n_threads); // fprintf(stderr, " -v INT verbose level [%d]\n", mm_verbose); fprintf(stderr, " -V show version number\n"); - fprintf(stderr, "\nSee minimap2.1 for detailed description of the command-line options.\n"); + fprintf(stderr, "\nSee `man ./minimap2.1' for detailed description of command-line options.\n"); return 1; } diff --git a/map.c b/map.c index 53c85db..bb84703 100644 --- a/map.c +++ b/map.c @@ -22,8 +22,9 @@ void mm_mapopt_init(mm_mapopt_t *opt) opt->max_chain_skip = 15; opt->min_seedcov_ratio = 0.0f; - opt->pri_ratio = 2.0f; opt->mask_level = 0.5f; + opt->pri_ratio = 0.8f; + opt->best_n = 5; opt->max_join_long = 20000; opt->max_join_short = 2000; @@ -63,7 +64,7 @@ mm_tbuf_t *mm_tbuf_init(void) { mm_tbuf_t *b; b = (mm_tbuf_t*)calloc(1, sizeof(mm_tbuf_t)); - if (mm_verbose < 10) b->km = km_init(); + if (!(mm_dbg_flag & 1)) b->km = km_init(); b->sdb = sdust_buf_init(b->km); return b; } @@ -240,16 +241,16 @@ mm_reg1_t *mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b, *n_regs = n_u; if (!(opt->flag & MM_F_AVA)) { // don't choose primary mapping(s) for read overlap mm_set_parent(b->km, opt->mask_level, *n_regs, regs); - mm_select_sub(b->km, opt->mask_level, opt->pri_ratio, n_regs, regs); - mm_join_long(b->km, opt, qlen, *n_regs, regs, a); // TODO: this can be applied to all-vs-all in principle + mm_select_sub(b->km, opt->mask_level, opt->pri_ratio, opt->best_n, n_regs, regs); + mm_join_long(b->km, opt, qlen, n_regs, regs, a); // TODO: this can be applied to all-vs-all in principle } if (opt->flag & MM_F_CIGAR) { regs = mm_align_skeleton(b->km, opt, mi, qlen, seq, n_regs, regs, a); // this calls mm_filter_regs() if (!(opt->flag & MM_F_AVA)) { - mm_update_parent(b->km, opt->mask_level, *n_regs, regs); - mm_select_sub(b->km, opt->mask_level, opt->pri_ratio, n_regs, regs); + mm_set_parent(b->km, opt->mask_level, *n_regs, regs); + mm_select_sub(b->km, opt->mask_level, opt->pri_ratio, opt->best_n, n_regs, regs); } - } else mm_filter_regs(b->km, opt, n_regs, regs); + } mm_set_mapq(*n_regs, regs); // free @@ -261,7 +262,6 @@ mm_reg1_t *mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b, mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname) { mm_reg1_t *regs; - if (mm_verbose >= 11) fprintf(stderr, "===> %s <===\n", qname); b->mini.n = 0; mm_sketch(b->km, seq, l_seq, mi->w, mi->k, 0, mi->is_hpc, &b->mini); if (opt->sdust_thres > 0) @@ -294,6 +294,8 @@ typedef struct { static void worker_for(void *_data, long i, int tid) // kt_for() callback { step_t *step = (step_t*)_data; + if (mm_dbg_flag & MM_DBG_PRINT_QNAME) + fprintf(stderr, "Processing query %s on thread %d\n", step->seq[i].name, tid); step->reg[i] = mm_map(step->p->mi, step->seq[i].l_seq, step->seq[i].seq, &step->n_reg[i], step->buf[tid], step->p->opt, step->seq[i].name); } diff --git a/minimap.h b/minimap.h index 51e99e7..fc3f4fc 100644 --- a/minimap.h +++ b/minimap.h @@ -45,7 +45,7 @@ typedef struct { typedef struct { uint32_t capacity; - int32_t dp_score, dp_max; + int32_t dp_score, dp_max, dp_max2; uint32_t blen; uint32_t n_diff, n_ambi; uint32_t n_cigar; @@ -77,8 +77,9 @@ typedef struct { int min_chain_score; float min_seedcov_ratio; - float pri_ratio; float mask_level; + float pri_ratio; + int best_n; int max_join_long, max_join_short; int min_join_flank_sc; @@ -92,7 +93,7 @@ typedef struct { int mid_occ; } mm_mapopt_t; -extern int mm_verbose; +extern int mm_verbose, mm_dbg_flag; extern double mm_realtime0; struct mm_tbuf_s; diff --git a/minimap2.1 b/minimap2.1 index 0afa761..ee6c3e0 100644 --- a/minimap2.1 +++ b/minimap2.1 @@ -1,4 +1,4 @@ -.TH minimap2 1 "1 July 2017" "minimap2-2.0-r141-pre" "Bioinformatics tools" +.TH minimap2 1 "1 July 2017" "minimap2-2.0-r145-pre" "Bioinformatics tools" .SH NAME .PP minimap2 - mapping and alignment between collections of DNA sequences @@ -142,21 +142,27 @@ not using .BR -H ) minus base-2 logarithm gap penalty. It is computed with dynamic programming. .TP -.B -S +.B -X Perform all-vs-all mapping. In this mode, if the query sequence name is lexicographically larger than the target sequence name, the hits between them will be suppressed; if the query sequence name is the same as the target name, diagonal minimizer hits will also be suppressed. .TP .BI -p \ FLOAT -Minimal secondary-to-primary score ratio to output secondary mappings [2]. +Minimal secondary-to-primary score ratio to output secondary mappings [0.8]. Between two chains overlaping over half of the shorter chain (controled by .BR --mask-level ), the chain with a lower score is secondary to the chain with a higher score. If the ratio of the scores is below .IR FLOAT , the secondary chain will not be outputted or extended with DP alignment later. -The default value suppresses all secondary chains. +.TP +.BI -N \ INT +Output at most +.I INT +secondary alignments [5]. This option has no effect when +.B -X +is applied. .TP .BI -D \ FLOAT Discard a chain if the fraction of matching bases over the length of @@ -175,7 +181,7 @@ are: .RS .TP 8 .B ava10k -PacBio/Oxford Nanopore all-vs-all overlap mapping (-Hk19 -Sw5 -p0 -m100 -D.05) +PacBio/Oxford Nanopore all-vs-all overlap mapping (-Hk19 -w5 -Xp0 -m100 -D.05) .TP .B map10k PacBio/Oxford Nanopore read to reference mapping (-Hk19) @@ -220,14 +226,24 @@ by default. Generate CIGAR. In PAF, the CIGAR is written to the `cg' custom tag. .TP .BI -t \ INT -Number of threads [3]. Minimap2 uses at most three threads when collecting -minimizers on target sequences, and uses up to +Number of threads [3]. Minimap2 uses at most three threads when indexing target +sequences, and uses up to .IR INT +1 threads when mapping (the extra thread is for I/O, which is frequently idle and takes little CPU time). .TP .B -V Print version number to stdout +.SS Miscellaneous options +.TP 10 +.B --no-kalloc +Use the libc default allocator instead of the kalloc thread-local allocator. +This debugging option is mostly used with Valgrind to detect invalid memory +accesses. Minimap2 runs slower with this option, especially in the +multi-threading mode. +.TP +.B --print-qname +Print query names to stderr, mostly to see which query is crashing minimap2. .SH OUTPUT FORMAT .PP Minimap2 outputs mapping positions in the Pairwise mApping Format (PAF) by diff --git a/misc.c b/misc.c index 9a85fec..f6699ed 100644 --- a/misc.c +++ b/misc.c @@ -3,6 +3,7 @@ #include "minimap.h" int mm_verbose = 3; +int mm_dbg_flag = 0; double mm_realtime0; double cputime() diff --git a/mmpriv.h b/mmpriv.h index 62d088a..6f7c48d 100644 --- a/mmpriv.h +++ b/mmpriv.h @@ -8,6 +8,9 @@ #define MM_PARENT_UNSET (-1) #define MM_PARENT_TMP_PRI (-2) +#define MM_DBG_NO_KALLOC 0x1 +#define MM_DBG_PRINT_QNAME 0x2 + #ifndef kroundup32 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) #endif @@ -40,30 +43,14 @@ mm_reg1_t *mm_gen_regs(void *km, int qlen, int n_u, uint64_t *u, mm128_t *a); void mm_split_reg(mm_reg1_t *r, mm_reg1_t *r2, int n, int qlen, mm128_t *a); void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs); void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r); -void mm_update_parent(void *km, float mask_level, int n, mm_reg1_t *r); -void mm_select_sub(void *km, float mask_level, float pri_ratio, int *n_, mm_reg1_t *r); +void mm_select_sub(void *km, float mask_level, float pri_ratio, int best_n, int *n_, mm_reg1_t *r); void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *regs); -void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int n_regs, mm_reg1_t *regs, mm128_t *a); +void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs, mm128_t *a); +void mm_hit_sort_by_dp(void *km, int *n_regs, mm_reg1_t *r); void mm_set_mapq(int n_regs, mm_reg1_t *regs); #ifdef __cplusplus } #endif -static inline void mm_reg_set_coor(mm_reg1_t *r, int32_t qlen, const mm128_t *a) -{ - int32_t k = r->as, q_span = (int32_t)(a[k].y>>32&0xff); - r->rev = a[k].x>>63; - r->rid = a[k].x<<1>>33; - r->rs = (int32_t)a[k].x + 1 > q_span? (int32_t)a[k].x + 1 - q_span : 0; // NB: target span may be shorter, so this test is necessary - r->re = (int32_t)a[k + r->cnt - 1].x + 1; - if (!r->rev) { - r->qs = (int32_t)a[k].y + 1 - q_span; - r->qe = (int32_t)a[k + r->cnt - 1].y + 1; - } else { - r->qs = qlen - ((int32_t)a[k + r->cnt - 1].y + 1); - r->qe = qlen - ((int32_t)a[k].y + 1 - q_span); - } -} - #endif