diff --git a/Makefile b/Makefile index a415f12..31eb857 100644 --- a/Makefile +++ b/Makefile @@ -51,6 +51,7 @@ index.o: kthread.h bseq.h minimap.h mmpriv.h kvec.h kalloc.h khash.h kalloc.o: kalloc.h ksw2_extd2_sse.o: ksw2.h kalloc.h ksw2_extz2_sse.o: ksw2.h kalloc.h +ksw2_ll_sse.o: ksw2.h kalloc.h main.o: bseq.h minimap.h mmpriv.h map.o: kthread.h kvec.h kalloc.h sdust.h mmpriv.h minimap.h bseq.h misc.o: minimap.h ksort.h diff --git a/align.c b/align.c index c1478e3..595f0b1 100644 --- a/align.c +++ b/align.c @@ -126,6 +126,12 @@ static void mm_append_cigar(mm_reg1_t *r, uint32_t n_cigar, uint32_t *cigar) // static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint8_t *qseq, int tlen, const uint8_t *tseq, const int8_t *mat, int w, int flag, ksw_extz_t *ez) { + if (mm_dbg_flag & MM_DBG_PRINT_ALN_SEQ) { + int i; + fprintf(stderr, "===> q=(%d,%d), e=(%d,%d), bw=%d, flag=%d, zdrop=%d <===\n", opt->q, opt->q2, opt->e, opt->e2, w, flag, opt->zdrop); + for (i = 0; i < tlen; ++i) fputc("ACGTN"[tseq[i]], stderr); fputc('\n', stderr); + for (i = 0; i < qlen; ++i) fputc("ACGTN"[qseq[i]], stderr); fputc('\n', stderr); + } if (opt->q == opt->q2 && opt->e == opt->e2) ksw_extz2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, w, opt->zdrop, flag, ez); else diff --git a/ksw2.h b/ksw2.h index da64afe..b7774a8 100644 --- a/ksw2.h +++ b/ksw2.h @@ -39,8 +39,8 @@ typedef struct { * @param mat m*m scoring mattrix in one-dimension array * @param gapo gap open penalty; a gap of length l cost "-(gapo+l*gape)" * @param gape gap extension penalty - * @param w band width - * @param zdrop off-diagonal drop-off to stop extension (positive) + * @param w band width (<0 to disable) + * @param zdrop off-diagonal drop-off to stop extension (positive; <0 to disable) * @param flag flag (see KSW_EZ_* macros) * @param ez (out) scores and cigar */ @@ -53,6 +53,8 @@ void ksw_extd(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t gapo, int8_t gape, int8_t gapo2, int8_t gape2, int w, int zdrop, int flag, ksw_extz_t *ez); +void ksw_extf2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t mch, int8_t mis, int8_t e, int w, int xdrop, ksw_extz_t *ez); + /** * Global alignment * @@ -104,16 +106,26 @@ static inline uint32_t *ksw_push_cigar(void *km, int *n_cigar, int *m_cigar, uin // bit 0-2: which type gets the max - 0 for H, 1 for E, 2 for F, 3 for \tilde{E} and 4 for \tilde{F} // bit 3/0x08: 1 if a continuation on the E state (bit 5/0x20 for a continuation on \tilde{E}) // bit 4/0x10: 1 if a continuation on the F state (bit 6/0x40 for a continuation on \tilde{F}) -static inline void ksw_backtrack(void *km, int is_rot, int is_rev, const uint8_t *p, const int *off, int n_col, int i0, int j0, int *m_cigar_, int *n_cigar_, uint32_t **cigar_) +static inline void ksw_backtrack(void *km, int is_rot, int is_rev, const uint8_t *p, const int *off, const int *off_end, int n_col, int i0, int j0, int *m_cigar_, int *n_cigar_, uint32_t **cigar_) { // p[] - lower 3 bits: which type gets the max; bit int n_cigar = 0, m_cigar = *m_cigar_, i = i0, j = j0, r, state = 0; uint32_t *cigar = *cigar_, tmp; while (i >= 0 && j >= 0) { // at the beginning of the loop, _state_ tells us which state to check - if (is_rot) r = i + j, tmp = p[r * n_col + i - off[r]]; - else tmp = p[i * n_col + j - off[i]]; + int force_state = -1; + if (is_rot) { + r = i + j; + if (i < off[r]) force_state = 2; + if (off_end && i > off_end[r]) force_state = 1; + tmp = force_state < 0? p[r * n_col + i - off[r]] : 0; + } else { + if (j < off[i]) force_state = 2; + if (off_end && j > off_end[i]) force_state = 1; + tmp = force_state < 0? p[i * n_col + j - off[i]] : 0; + } if (state == 0) state = tmp & 7; // if requesting the H state, find state one maximizes it. else if (!(tmp >> (state + 2) & 1)) state = 0; // if requesting other states, _state_ stays the same if it is a continuation; otherwise, set to H if (state == 0) state = tmp & 7; // TODO: probably this line can be merged into the "else if" line right above; not 100% sure + if (force_state >= 0) state = force_state; if (state == 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 0, 1), --i, --j; // match else if (state == 1 || state == 3) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 2, 1), --i; // deletion else cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 1, 1), --j; // insertion diff --git a/ksw2_extd2_sse.c b/ksw2_extd2_sse.c index 3130eec..e0f5337 100644 --- a/ksw2_extd2_sse.c +++ b/ksw2_extd2_sse.c @@ -42,7 +42,7 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin a2= _mm_sub_epi8(a2, tmp); \ b2= _mm_sub_epi8(b2, tmp); - int r, t, qe = q + e, n_col_, *off = 0, tlen_, qlen_, last_st, last_en, wl, wr, max_sc, min_sc, long_thres, long_diff; + int r, t, qe = q + e, n_col_, *off = 0, *off_end = 0, tlen_, qlen_, last_st, last_en, wl, wr, max_sc, min_sc, long_thres, long_diff; int with_cigar = !(flag&KSW_EZ_SCORE_ONLY), approx_max = !!(flag&KSW_EZ_APPROX_MAX); int32_t *H = 0, H0 = 0, last_H0_t = 0; uint8_t *qr, *sf, *mem, *mem2 = 0; @@ -96,7 +96,8 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin if (with_cigar) { mem2 = (uint8_t*)kmalloc(km, ((qlen + tlen - 1) * n_col_ + 1) * 16); p = (__m128i*)(((size_t)mem2 + 15) >> 4 << 4); - off = (int*)kmalloc(km, (qlen + tlen - 1) * sizeof(int)); + off = (int*)kmalloc(km, (qlen + tlen - 1) * sizeof(int) * 2); + off_end = off + qlen + tlen - 1; } for (t = 0; t < qlen; ++t) qr[t] = query[qlen - 1 - t]; @@ -105,7 +106,7 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin for (r = 0, last_st = last_en = -1; r < qlen + tlen - 1; ++r) { int st = 0, en = tlen - 1, st0, en0, st_, en_; int8_t x1, x21, v1; - uint8_t *qrr = qr + (qlen - 1 - r), p_en0 = 0; + uint8_t *qrr = qr + (qlen - 1 - r); int8_t *u8 = (int8_t*)u, *v8 = (int8_t*)v, *x8 = (int8_t*)x, *x28 = (int8_t*)x2; __m128i x1_, x21_, v1_; // find the boundaries @@ -199,18 +200,7 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin } } else if (!(flag&KSW_EZ_RIGHT)) { // gap left-alignment __m128i *pr = p + r * n_col_ - st_; - off[r] = st; - if (en0 < r && en0 < tlen - 1) { // to avoid backtracking out of the band; this assumes a fixed band - int8_t a, a2, z = ((uint8_t*)s)[en0]; - a = x8[en0-1] + v8[en0-1]; - p_en0 = a > z? 1 : 0; - z = a > z? a : z; - p_en0 |= a - (z - q) > 0? 1<<4 : 0; - a2 = x28[en0-1] + v8[en0-1]; - p_en0 = a2 > z? 3 : p_en0; - z = a2 > z? a2 : z; - p_en0 |= a2 - (z - q2) > 0? 1<<6 : 0; - } + off[r] = st, off_end[r] = en; for (t = st_; t <= en_; ++t) { __m128i d, z, a, b, a2, b2, xt1, x2t1, vt1, ut, tmp; __dp_code_block1; @@ -257,18 +247,7 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin } } else { // gap right-alignment __m128i *pr = p + r * n_col_ - st_; - off[r] = st; - if (en0 < r && en0 < tlen - 1) { // to avoid backtracking out of the band; this assumes a fixed band - int8_t a, a2, z = ((uint8_t*)s)[en0]; - a = x8[en0-1] + v8[en0-1]; - p_en0 = a >= z? 1 : 0; - z = a >= z? a : z; - p_en0 |= a - (z - q) >= 0? 1<<4 : 0; - a2 = x28[en0-1] + v8[en0-1]; - p_en0 = a2 >= z? 3 : p_en0; - z = a2 >= z? a2 : z; - p_en0 |= a2 - (z - q2) >= 0? 1<<6 : 0; - } + off[r] = st, off_end[r] = en; for (t = st_; t <= en_; ++t) { __m128i d, z, a, b, a2, b2, xt1, x2t1, vt1, ut, tmp; __dp_code_block1; @@ -314,7 +293,6 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin _mm_store_si128(&pr[t], d); } } - if (with_cigar && en0 < r && en0 < tlen - 1) ((uint8_t*)(p + r * n_col_))[en0 - st] = p_en0; if (!approx_max) { // find the exact max with a 32-bit score array int32_t max_H, max_t; // compute H[], max_H and max_t @@ -384,9 +362,9 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin if (with_cigar) { // backtrack int rev_cigar = !!(flag & KSW_EZ_REV_CIGAR); if (!ez->zdropped && !(flag&KSW_EZ_EXTZ_ONLY)) - ksw_backtrack(km, 1, rev_cigar, (uint8_t*)p, off, n_col_*16, tlen-1, qlen-1, &ez->m_cigar, &ez->n_cigar, &ez->cigar); + ksw_backtrack(km, 1, rev_cigar, (uint8_t*)p, off, off_end, n_col_*16, tlen-1, qlen-1, &ez->m_cigar, &ez->n_cigar, &ez->cigar); else if (ez->max_t >= 0 && ez->max_q >= 0) - ksw_backtrack(km, 1, rev_cigar, (uint8_t*)p, off, n_col_*16, ez->max_t, ez->max_q, &ez->m_cigar, &ez->n_cigar, &ez->cigar); + ksw_backtrack(km, 1, rev_cigar, (uint8_t*)p, off, off_end, n_col_*16, ez->max_t, ez->max_q, &ez->m_cigar, &ez->n_cigar, &ez->cigar); kfree(km, mem2); kfree(km, off); } } diff --git a/ksw2_extz2_sse.c b/ksw2_extz2_sse.c index f728619..d665c6f 100644 --- a/ksw2_extz2_sse.c +++ b/ksw2_extz2_sse.c @@ -33,7 +33,7 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin a = _mm_sub_epi8(a, z); \ b = _mm_sub_epi8(b, z); - int r, t, qe = q + e, n_col_, *off = 0, tlen_, qlen_, last_st, last_en, wl, wr, max_sc, min_sc; + int r, t, qe = q + e, n_col_, *off = 0, *off_end = 0, tlen_, qlen_, last_st, last_en, wl, wr, max_sc, min_sc; int with_cigar = !(flag&KSW_EZ_SCORE_ONLY), approx_max = !!(flag&KSW_EZ_APPROX_MAX); int32_t *H = 0, H0 = 0, last_H0_t = 0; uint8_t *qr, *sf, *mem, *mem2 = 0; @@ -76,7 +76,8 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin if (with_cigar) { mem2 = (uint8_t*)kmalloc(km, ((qlen + tlen - 1) * n_col_ + 1) * 16); p = (__m128i*)(((size_t)mem2 + 15) >> 4 << 4); - off = (int*)kmalloc(km, (qlen + tlen - 1) * sizeof(int)); + off = (int*)kmalloc(km, (qlen + tlen - 1) * sizeof(int) * 2); + off_end = off + qlen + tlen - 1; } for (t = 0; t < qlen; ++t) qr[t] = query[qlen - 1 - t]; @@ -85,7 +86,7 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin for (r = 0, last_st = last_en = -1; r < qlen + tlen - 1; ++r) { int st = 0, en = tlen - 1, st0, en0, st_, en_; int8_t x1, v1; - uint8_t *qrr = qr + (qlen - 1 - r), *u8 = (uint8_t*)u, *v8 = (uint8_t*)v, *x8 = (uint8_t*)x, p_en0 = 0; + uint8_t *qrr = qr + (qlen - 1 - r), *u8 = (uint8_t*)u, *v8 = (uint8_t*)v; __m128i x1_, v1_; // find the boundaries if (st < r - qlen + 1) st = r - qlen + 1; @@ -152,14 +153,7 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin } } else if (!(flag&KSW_EZ_RIGHT)) { // gap left-alignment __m128i *pr = p + r * n_col_ - st_; - off[r] = st; - if (en0 < r && en0 < tlen - 1) { // to avoid backtracking out of the band; this assumes a fixed band - int8_t a, z = ((uint8_t*)s)[en0] + 2 * qe; - a = x8[en0-1] + v8[en0-1]; - p_en0 = a > z? 1 : 0; - z = a > z? a : z; - p_en0 |= a - (z - q) > 0? 0x08 : 0; - } + off[r] = st, off_end[r] = en; for (t = st_; t <= en_; ++t) { __m128i d, z, a, b, xt1, vt1, ut, tmp; __dp_code_block1; @@ -185,14 +179,7 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin } } else { // gap right-alignment __m128i *pr = p + r * n_col_ - st_; - off[r] = st; - if (en0 < r && en0 < tlen - 1) { - int8_t a, z = ((uint8_t*)s)[en0] + 2 * qe; - a = x8[en0-1] + v8[en0-1]; - p_en0 = a >= z? 1 : 0; - z = a >= z? a : z; - p_en0 |= a - (z - q) >= 0? 0x08 : 0; - } + off[r] = st, off_end[r] = en; for (t = st_; t <= en_; ++t) { __m128i d, z, a, b, xt1, vt1, ut, tmp; __dp_code_block1; @@ -217,7 +204,6 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin _mm_store_si128(&pr[t], d); } } - if (with_cigar && en0 < r && en0 < tlen - 1) ((uint8_t*)(p + r * n_col_))[en0 - st] = p_en0; if (!approx_max) { // find the exact max with a 32-bit score array int32_t max_H, max_t; // compute H[], max_H and max_t @@ -289,9 +275,9 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin if (with_cigar) { // backtrack int rev_cigar = !!(flag & KSW_EZ_REV_CIGAR); if (!ez->zdropped && !(flag&KSW_EZ_EXTZ_ONLY)) - ksw_backtrack(km, 1, rev_cigar, (uint8_t*)p, off, n_col_*16, tlen-1, qlen-1, &ez->m_cigar, &ez->n_cigar, &ez->cigar); + ksw_backtrack(km, 1, rev_cigar, (uint8_t*)p, off, off_end, n_col_*16, tlen-1, qlen-1, &ez->m_cigar, &ez->n_cigar, &ez->cigar); else if (ez->max_t >= 0 && ez->max_q >= 0) - ksw_backtrack(km, 1, rev_cigar, (uint8_t*)p, off, n_col_*16, ez->max_t, ez->max_q, &ez->m_cigar, &ez->n_cigar, &ez->cigar); + ksw_backtrack(km, 1, rev_cigar, (uint8_t*)p, off, off_end, n_col_*16, ez->max_t, ez->max_q, &ez->m_cigar, &ez->n_cigar, &ez->cigar); kfree(km, mem2); kfree(km, off); } } diff --git a/main.c b/main.c index a6ae930..18510f8 100644 --- a/main.c +++ b/main.c @@ -8,7 +8,7 @@ #include "minimap.h" #include "mmpriv.h" -#define MM_VERSION "2.0-r228-dirty" +#define MM_VERSION "2.0-r229-dirty" void liftrlimit() { @@ -30,6 +30,7 @@ static struct option long_options[] = { { "print-seed", no_argument, 0, 0 }, { "max-chain-skip", required_argument, 0, 0 }, { "min-dp-len", required_argument, 0, 0 }, + { "print-aln-seq", no_argument, 0, 0 }, { "version", no_argument, 0, 'V' }, { "min-count", required_argument, 0, 'n' }, { "min-chain-score",required_argument, 0, 'm' }, @@ -85,6 +86,7 @@ int main(int argc, char *argv[]) else if (c == 0 && long_idx == 6) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_SEED; // --print-seed else if (c == 0 && long_idx == 7) opt.max_chain_skip = atoi(optarg); // --max-chain-skip else if (c == 0 && long_idx == 8) opt.min_ksw_len = atoi(optarg); // --min-dp-len + else if (c == 0 && long_idx == 9) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_ALN_SEQ; // --print-aln-seq else if (c == 'V') { puts(MM_VERSION); return 0; diff --git a/mmpriv.h b/mmpriv.h index e8a4156..dcb78d6 100644 --- a/mmpriv.h +++ b/mmpriv.h @@ -8,9 +8,10 @@ #define MM_PARENT_UNSET (-1) #define MM_PARENT_TMP_PRI (-2) -#define MM_DBG_NO_KALLOC 0x1 -#define MM_DBG_PRINT_QNAME 0x2 -#define MM_DBG_PRINT_SEED 0x4 +#define MM_DBG_NO_KALLOC 0x1 +#define MM_DBG_PRINT_QNAME 0x2 +#define MM_DBG_PRINT_SEED 0x4 +#define MM_DBG_PRINT_ALN_SEQ 0x8 #define MM_SEED_LONG_JOIN (1ULL<<40) #define MM_SEED_IGNORE (1ULL<<41)