diff --git a/align.c b/align.c index 2c08558..dc571ac 100644 --- a/align.c +++ b/align.c @@ -167,6 +167,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int int32_t rs1, qs1, re1, qe1; int8_t mat[25]; + if (r->cnt == 0) return; ksw_gen_simple_mat(5, mat, opt->a, opt->b); bw = (int)(opt->bw * 1.5 + 1.); @@ -221,7 +222,10 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int for (i = 1; i < r->cnt; ++i) { // gap filling mm_adjust_minier(mi, qseq0, &a[r->as + i], &re, &qe); re1 = re, qe1 = qe; - if (i == r->cnt - 1 || qe - qs >= opt->min_ksw_len || re - rs >= opt->min_ksw_len) { + if (i == r->cnt - 1 || (a[r->as+i].y>>40&1) || qe - qs >= opt->min_ksw_len || re - rs >= opt->min_ksw_len) { + int bw1 = bw; + if (a[r->as+i].y>>40&1) + bw1 = qe - qs > re - rs? qe - qs : re - rs; qseq = &qseq0[rev][qs]; mm_idx_getseq(mi, rid, rs, re, tseq); #if 0 @@ -229,9 +233,9 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int for (k = 0; k < tl; ++k) fputc("ACGTN"[tseq[k]], stderr); fputc('\n', stderr); for (k = 0; k < ql; ++k) fputc("ACGTN"[qseq[k]], stderr); fputc('\n', stderr); #endif - ksw_extz2_sse(km, qe - qs, qseq, re - rs, tseq, 5, mat, opt->q, opt->e, bw, opt->zdrop, KSW_EZ_APPROX_MAX, ez); + ksw_extz2_sse(km, qe - qs, qseq, re - rs, tseq, 5, mat, opt->q, opt->e, bw1, opt->zdrop, KSW_EZ_APPROX_MAX, ez); if (mm_check_zdrop(qseq, tseq, ez->n_cigar, ez->cigar, mat, opt->q, opt->e, opt->zdrop)) - ksw_extz2_sse(km, qe - qs, qseq, re - rs, tseq, 5, mat, opt->q, opt->e, bw, opt->zdrop, 0, ez); + ksw_extz2_sse(km, qe - qs, qseq, re - rs, tseq, 5, mat, opt->q, opt->e, bw1, opt->zdrop, 0, ez); if (ez->n_cigar > 0) { mm_append_cigar(r, ez->n_cigar, ez->cigar); mm_update_extra(r->p, qseq, tseq, ez->n_cigar, ez->cigar, 0); @@ -313,8 +317,8 @@ mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *m for (r = i = 0; r < n_regs; ++r) { mm_reg1_t *reg = ®s[r]; int flt = 0; - if (reg->p->blen - reg->p->n_ambi - reg->p->n_diff < opt->min_chain_score) flt = 1; - else if (reg->cnt < opt->min_cnt) flt = 1; + if (reg->cnt < opt->min_cnt) flt = 1; + else if (reg->p->blen - reg->p->n_ambi - reg->p->n_diff < opt->min_chain_score) flt = 1; else if (reg->p->score < opt->min_dp_score && (reg->qe - reg->qs) * 2 < qlen) flt = 1; if (flt) free(reg->p); else if (i < r) regs[i++] = regs[r]; // NB: this also move the regs[r].p pointer diff --git a/main.c b/main.c index f621001..800db9f 100644 --- a/main.c +++ b/main.c @@ -10,7 +10,7 @@ #include "minimap.h" #include "mmpriv.h" -#define MM_VERSION "2.0-r99-pre" +#define MM_VERSION "2.0-r104-pre" void liftrlimit() { diff --git a/map.c b/map.c index b8816f7..35cde64 100644 --- a/map.c +++ b/map.c @@ -225,9 +225,13 @@ void mm_select_sub(float mask_level, float pri_ratio, int *n_, mm_reg1_t *r, voi } } -static int mm_squeeze_a_core(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a, const uint64_t *aux) +static int mm_squeeze_a(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a) { // squeeze out regions in a[] that are not referenced by regs[] - int i, as = 0; + int i, n_aux, as = 0; + uint64_t *aux; + aux = (uint64_t*)kmalloc(km, n_regs * 8); + for (i = n_aux = 0; i < n_regs; ++i) + aux[n_aux++] = (uint64_t)regs[i].as << 32 | i; for (i = 0; i < n_regs; ++i) { mm_reg1_t *r = ®s[i]; if (r->as != as) { @@ -236,21 +240,23 @@ static int mm_squeeze_a_core(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a, } as += r->cnt; } + kfree(km, aux); return as; } -void mm_join_long(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, int n_regs, mm_reg1_t *regs, mm128_t *a) +void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int n_regs, mm_reg1_t *regs, mm128_t *a) { int i, n_aux; uint64_t *aux; if (n_regs < 2) return; // nothing to join + mm_squeeze_a(km, n_regs, regs, a); + aux = (uint64_t*)kmalloc(km, n_regs * 8); for (i = n_aux = 0; i < n_regs; ++i) - aux[n_aux++] = (uint64_t)regs[i].as << 32 | i; - assert(n_aux == n_regs); // TODO: may be relaxed in future for other use cases + if (regs[i].parent == i || regs[i].parent < 0) + aux[n_aux++] = (uint64_t)regs[i].as << 32 | i; radix_sort_64(aux, aux + n_aux); - mm_squeeze_a_core(km, n_regs, regs, a, aux); for (i = n_aux - 1; i >= 1; --i) { mm_reg1_t *r0 = ®s[(int32_t)aux[i-1]], *r1 = ®s[(int32_t)aux[i]]; @@ -351,8 +357,10 @@ mm_reg1_t *mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b, n_u = mm_chain_dp(opt->max_gap, opt->bw, opt->max_chain_skip, opt->min_cnt, opt->min_chain_score, n_a, a, &u, b->km); regs = mm_gen_reg(qlen, n_u, u, a); *n_regs = n_u; - if (!(opt->flag & MM_F_AVA)) // don't choose primary mapping(s) for read overlap + if (!(opt->flag & MM_F_AVA)) { // don't choose primary mapping(s) for read overlap mm_select_sub(opt->mask_level, opt->pri_ratio, n_regs, regs, b->km); + mm_join_long(b->km, opt, qlen, *n_regs, regs, a); // TODO: this can be applied to all-vs-all in principle + } if (opt->flag & MM_F_CIGAR) regs = mm_align_skeleton(b->km, opt, mi, qlen, seq, n_regs, regs, a); @@ -432,6 +440,7 @@ static void *worker_pipeline(void *shared, int step, void *in) bseq1_t *t = &s->seq[i]; for (j = 0; j < s->n_reg[i]; ++j) { mm_reg1_t *r = &s->reg[i][j]; + if (r->cnt == 0) continue; if (p->opt->flag & MM_F_OUT_SAM) mm_write_sam(&p->str, mi, t, j, r); else mm_write_paf(&p->str, mi, t, j, r); puts(p->str.s);