diff --git a/align.c b/align.c index 70c69a3..8f062bb 100644 --- a/align.c +++ b/align.c @@ -2,10 +2,52 @@ #include "minimap.h" #include "ksw2.h" +static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, uint8_t *qseq0[2], mm_reg1_t *r, mm_reg1_t *r_split, mm128_t *a) +{ + int32_t rid = a[r->as].x<<1>>33, rev = a[r->as].x>>63; + uint8_t *tseq0, *tseq, *qseq; + int32_t i, l, rs0, re0; + int32_t rs, re, qs, qe, ret; + + l = r->qs < opt->max_gap? r->qs : opt->max_gap; + l = (l * opt->a - opt->q) / opt->e; + l = l < opt->max_gap? l : opt->max_gap; + l = l < r->rs? l : r->rs; + rs0 = r->rs - l; + + l = qlen - r->re < opt->max_gap? qlen - r->re : opt->max_gap; + l = (l * opt->a - opt->q) / opt->e; + l = l < opt->max_gap? l : opt->max_gap; + l = l < mi->seq[rid].len - r->re? l : mi->seq[rid].len - r->re; + re0 = r->re + l; + + tseq0 = (uint8_t*)kmalloc(km, re0 - rs0); + ret = mm_idx_getseq(mi, rid, rs0, re0, tseq0); + assert(ret > 0); + + rs = (int32_t)a[r->as].x + 1; + qs = (int32_t)a[r->as].y + 1; + for (i = 1; i < r->cnt; ++i) { + re = (int32_t)a[r->as + i].x + 1; + qe = (int32_t)a[r->as + i].y + 1; + if (i == r->cnt - 1 || qe - qs >= opt->min_ksw_len || re - rs >= opt->min_ksw_len) { + qseq = &qseq0[rev][qs]; + tseq = &tseq0[rs - rs0]; + #if 0 + fprintf(stderr, "===> [%d] %d-%d %c (%s:%d-%d) <===\n", i, qs, qe, "+-"[rev], mi->seq[rid].name, rs, re); + for (k = 0; k < re - rs; ++k) fputc("ACGTN"[tseq[k]], stderr); fputc('\n', stderr); + for (k = 0; k < qe - qs; ++k) fputc("ACGTN"[qseq[k]], stderr); fputc('\n', stderr); + #endif + rs = re, qs = qe; + } + } + kfree(km, tseq0); +} + void mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, const char *qstr, int n_regs, mm_reg1_t *regs, mm128_t *a) { extern unsigned char seq_nt4_table[256]; - int i, k, reg; + int i, reg; uint8_t *qseq0[2]; qseq0[0] = (uint8_t*)kmalloc(km, qlen); @@ -16,28 +58,8 @@ void mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int } for (reg = 0; reg < n_regs; ++reg) { - mm_reg1_t *r = ®s[reg]; - int32_t rid = a[r->as].x<<1>>33, rev = a[r->as].x>>63; - uint8_t *tseq0, *tseq, *qseq; - int32_t rs, re, qs, qe, ret; - tseq0 = (uint8_t*)kmalloc(km, r->re - r->rs); - ret = mm_idx_getseq(mi, rid, r->rs, r->re, tseq0); - assert(ret > 0); - rs = (int32_t)a[r->as].x + 1; - qs = (int32_t)a[r->as].y + 1; - for (i = 1; i < r->cnt; ++i) { - re = (int32_t)a[r->as + i].x + 1; - qe = (int32_t)a[r->as + i].y + 1; - if (qe - qs >= opt->min_ksw_len || re - rs >= opt->min_ksw_len) { - qseq = &qseq0[rev][qs]; - tseq = &tseq0[rs - r->rs]; - fprintf(stderr, "===> [%d,%d] %d-%d %c (%s:%d-%d) <===\n", reg, i, qs, qe, "+-"[rev], mi->seq[rid].name, rs, re); - for (k = 0; k < re - rs; ++k) fputc("ACGTN"[tseq[k]], stderr); fputc('\n', stderr); - for (k = 0; k < qe - qs; ++k) fputc("ACGTN"[qseq[k]], stderr); fputc('\n', stderr); - rs = re, qs = qe; - } - } - kfree(km, tseq0); + mm_reg1_t r_split; + mm_align1(km, opt, mi, qlen, qseq0, ®s[reg], &r_split, a); } kfree(km, qseq0[0]); kfree(km, qseq0[1]); diff --git a/map.c b/map.c index 795e175..e86143c 100644 --- a/map.c +++ b/map.c @@ -24,6 +24,7 @@ void mm_mapopt_init(mm_mapopt_t *opt) opt->mask_level = 0.5f; opt->a = 1, opt->b = 1, opt->q = 1, opt->e = 1; + opt->zdrop = 100; opt->min_ksw_len = 100; } diff --git a/minimap.h b/minimap.h index 33c6128..79ef7c5 100644 --- a/minimap.h +++ b/minimap.h @@ -62,6 +62,7 @@ typedef struct { float pri_ratio; float mask_level; int a, b, q, e; // matching score, mismatch, gap-open and gap-ext penalties + int zdrop; int max_occ; int mid_occ;