diff --git a/align.c b/align.c index 8f062bb..496829a 100644 --- a/align.c +++ b/align.c @@ -2,31 +2,60 @@ #include "minimap.h" #include "ksw2.h" -static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, uint8_t *qseq0[2], mm_reg1_t *r, mm_reg1_t *r_split, mm128_t *a) +static inline void mm_seq_rev(uint32_t len, uint8_t *seq) +{ + uint32_t i; + uint8_t t; + for (i = 0; i < len>>1; ++i) + t = seq[i], seq[i] = seq[len - 1 - i], seq[len - 1 - i] = t; +} + +static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, uint8_t *qseq0[2], mm_reg1_t *r, mm_reg1_t *r2, mm128_t *a) { int32_t rid = a[r->as].x<<1>>33, rev = a[r->as].x>>63; - uint8_t *tseq0, *tseq, *qseq; - int32_t i, l, rs0, re0; + uint8_t *tseq, *qseq; + int32_t i, k, l, rs0, re0, qs0, qe0; int32_t rs, re, qs, qe, ret; + mm_reg1_t r1; - l = r->qs < opt->max_gap? r->qs : opt->max_gap; - l = (l * opt->a - opt->q) / opt->e; - l = l < opt->max_gap? l : opt->max_gap; - l = l < r->rs? l : r->rs; - rs0 = r->rs - l; + rs = (int32_t)a[r->as].x + 1; // NB: this is the same as r->{rs,re} + re = (int32_t)a[r->as + r->cnt - 1].x + 1; + qs = (int32_t)a[r->as].y + 1; // NB: this is the coordinate on the reverse strand; r->{qs,qe} are on the reverse strand + qe = (int32_t)a[r->as + r->cnt - 1].y + 1; - l = qlen - r->re < opt->max_gap? qlen - r->re : opt->max_gap; - l = (l * opt->a - opt->q) / opt->e; - l = l < opt->max_gap? l : opt->max_gap; - l = l < mi->seq[rid].len - r->re? l : mi->seq[rid].len - r->re; - re0 = r->re + l; + if (qs > 0 && rs > 0) { + l = qs < opt->max_gap? qs : opt->max_gap; + qs0 = qs - l; + l = (l * opt->a - opt->q) / opt->e; + l = l < opt->max_gap? l : opt->max_gap; + l = l < rs? l : rs; + rs0 = rs - l; + } else rs0 = rs, qs0 = qs; - tseq0 = (uint8_t*)kmalloc(km, re0 - rs0); - ret = mm_idx_getseq(mi, rid, rs0, re0, tseq0); - assert(ret > 0); + if (qe < qlen && re < mi->seq[rid].len) { + l = qlen - re < opt->max_gap? qlen - re : opt->max_gap; + qe0 = qe + l; + l = (l * opt->a - opt->q) / opt->e; + l = l < opt->max_gap? l : opt->max_gap; + l = l < mi->seq[rid].len - re? l : mi->seq[rid].len - re; + re0 = re + l; + } else re0 = re, qe0 = qe; - rs = (int32_t)a[r->as].x + 1; - qs = (int32_t)a[r->as].y + 1; + tseq = (uint8_t*)kmalloc(km, re0 - rs0); + + if (qs > 0 && rs > 0) { // left extension + uint32_t ql = qs - qs0, tl = rs - rs0; + qseq = &qseq0[rev][qs0]; + ret = mm_idx_getseq(mi, rid, rs0, rs, tseq); + assert(ret > 0); + mm_seq_rev(ql, qseq); + mm_seq_rev(tl, tseq); + fprintf(stderr, "===> [-1] %d-%d %c (%s:%d-%d) <===\n", qs0, qs, "+-"[rev], mi->seq[rid].name, rs0, rs); + for (k = 0; k < tl; ++k) fputc("ACGTN"[tseq[k]], stderr); fputc('\n', stderr); + for (k = 0; k < ql; ++k) fputc("ACGTN"[qseq[k]], stderr); fputc('\n', stderr); + mm_seq_rev(ql, qseq); + } +/* for (i = 1; i < r->cnt; ++i) { re = (int32_t)a[r->as + i].x + 1; qe = (int32_t)a[r->as + i].y + 1; @@ -41,7 +70,8 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int rs = re, qs = qe; } } - kfree(km, tseq0); +*/ + kfree(km, tseq); } void mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, const char *qstr, int n_regs, mm_reg1_t *regs, mm128_t *a) @@ -58,8 +88,8 @@ void mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int } for (reg = 0; reg < n_regs; ++reg) { - mm_reg1_t r_split; - mm_align1(km, opt, mi, qlen, qseq0, ®s[reg], &r_split, a); + mm_reg1_t r2; + mm_align1(km, opt, mi, qlen, qseq0, ®s[reg], &r2, a); } kfree(km, qseq0[0]); kfree(km, qseq0[1]); diff --git a/minimap.h b/minimap.h index 79ef7c5..807ce9e 100644 --- a/minimap.h +++ b/minimap.h @@ -41,6 +41,11 @@ typedef struct { mm_idx_bucket_t *B; // index } mm_idx_t; +typedef struct { + uint32_t n_cigar, m_cigar; + uint32_t cigar[]; +} mm_cigar_t; + typedef struct { uint32_t cnt:31, rev:1; uint32_t rid:31, rep:1; @@ -48,6 +53,7 @@ typedef struct { int32_t qs, qe, rs, re; int32_t parent, subsc; int32_t as; + mm_cigar_t *cigar; } mm_reg1_t; typedef struct {