From c02ff4662ccd72de36ec01e0aa3e8c3abd851441 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Tue, 27 Jun 2017 10:36:24 -0400 Subject: [PATCH] r85: two-round z-drop --- align.c | 6 ++++-- ksw2.h | 24 ++++++------------------ ksw2_extz2_sse.c | 42 ++++++++++++++++++++++++++++-------------- main.c | 2 +- 4 files changed, 39 insertions(+), 35 deletions(-) diff --git a/align.c b/align.c index 6b1314c..e3cc89d 100644 --- a/align.c +++ b/align.c @@ -188,12 +188,14 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int if (i == r->cnt - 1 || qe - qs >= opt->min_ksw_len || re - rs >= opt->min_ksw_len) { qseq = &qseq0[rev][qs]; mm_idx_getseq(mi, rid, rs, re, tseq); - ksw_extz2_sse(km, qe - qs, qseq, re - rs, tseq, 5, mat, opt->q, opt->e, bw, opt->zdrop, 0, ez); + ksw_extz2_sse(km, qe - qs, qseq, re - rs, tseq, 5, mat, opt->q, opt->e, bw, opt->zdrop, KSW_EZ_GLOBAL_ONLY, ez); + if (ez->zdropped || ez->score < 0) + ksw_extz2_sse(km, qe - qs, qseq, re - rs, tseq, 5, mat, opt->q, opt->e, bw, opt->zdrop, 0, ez); if (ez->n_cigar > 0) { mm_append_cigar(r, ez->n_cigar, ez->cigar); mm_update_extra(r->p, qseq, tseq, ez->n_cigar, ez->cigar, 0); } - if (ez->score == KSW_NEG_INF) { // truncated by Z-drop + if (ez->zdropped) { // truncated by Z-drop int j; for (j = i - 1; j >= 0; --j) if ((int32_t)a[r->as + j].x < re + ez->max_t) diff --git a/ksw2.h b/ksw2.h index a4edc2a..b5062ff 100644 --- a/ksw2.h +++ b/ksw2.h @@ -14,7 +14,8 @@ #define KSW_EZ_REV_CIGAR 0x40 // reverse CIGAR in the output typedef struct { - int max, max_q, max_t; // max extension score and coordinate + uint32_t max:31, zdropped:1; + int max_q, max_t; // max extension coordinate int mqe, mqe_t; // max score when reaching the end of query int mte, mte_q; // max score when reaching the end of target int score; // max score reaching both ends; may be KSW_NEG_INF @@ -112,24 +113,11 @@ static inline void ksw_backtrack(void *km, int is_rot, int is_rev, const uint8_t *m_cigar_ = m_cigar, *n_cigar_ = n_cigar, *cigar_ = cigar; } -static inline int ksw_cigar2score(int8_t m, const int8_t *mat, int8_t q, int8_t e, const uint8_t *query, const uint8_t *target, int n_cigar, const uint32_t *cigar) +static inline void ksw_reset_extz(ksw_extz_t *ez) { - int i, j, k, l, score; - for (k = 0, score = 0, i = j = 0; k < n_cigar; ++k) { - int op = cigar[k] & 0xf, len = cigar[k] >> 4; - if (op == 0) { - for (l = 0; l < len; ++l) - score += mat[target[i + l] * m + query[j + l]]; - i += len, j += len; - } else if (op == 1) { - score -= q + len * e; - j += len; - } else if (op == 2) { - score -= q + len * e; - i += len; - } - } - return score; + ez->max_q = ez->max_t = ez->mqe_t = ez->mte_q = -1; + ez->max = 0, ez->score = ez->mqe = ez->mte = KSW_NEG_INF; + ez->n_cigar = 0, ez->zdropped = 0; } #endif diff --git a/ksw2_extz2_sse.c b/ksw2_extz2_sse.c index 0337095..1cb7c4e 100644 --- a/ksw2_extz2_sse.c +++ b/ksw2_extz2_sse.c @@ -8,6 +8,21 @@ #include #endif +static inline int apply_zdrop(ksw_extz_t *ez, int32_t H, int r, int t, int zdrop, int8_t e) +{ + if (H > (int32_t)ez->max) { + ez->max = H, ez->max_t = t, ez->max_q = r - t; + } else if (t >= ez->max_t && r - t >= ez->max_q) { + int tl = t - ez->max_t, ql = (r - t) - ez->max_q, l; + l = tl > ql? tl - ql : ql - tl; + if (ez->max - H > zdrop + l * e) { + ez->zdropped = 1; + return 1; + } + } + return 0; +} + void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez) { #define __dp_code_block1 \ @@ -52,9 +67,7 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin sc_mis_ = _mm_set1_epi8(mat[1]); m1_ = _mm_set1_epi8(m - 1); // wildcard - ez->max_q = ez->max_t = ez->mqe_t = ez->mte_q = -1; - ez->max = 0, ez->score = ez->mqe = ez->mte = KSW_NEG_INF; - ez->n_cigar = 0; + ksw_reset_extz(ez); wl = wr = w; tlen_ = (tlen + 15) / 16; @@ -238,14 +251,7 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin ez->mte = H[en0], ez->mte_q = r - en; if (r - st0 == qlen - 1 && H[st0] > ez->mqe) ez->mqe = H[st0], ez->mqe_t = st0; - if (max_H > ez->max) { - ez->max = max_H, ez->max_t = max_t, ez->max_q = r - max_t; - } else if (max_t >= ez->max_t && r - max_t >= ez->max_q) { - int tl = max_t - ez->max_t, ql = (r - max_t) - ez->max_q, l; - l = tl > ql? tl - ql : ql - tl; - if (ez->max - max_H > zdrop + l * e) - break; - } + if (apply_zdrop(ez, max_H, r, max_t, zdrop, e)) break; if (r == qlen + tlen - 2 && en0 == tlen - 1) ez->score = H[tlen - 1]; if (flag & KSW_EZ_DYN_BAND & 0) { // FIXME: don't use - buggy! @@ -259,9 +265,17 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin } } else { if (r > 0) { - if (last_H0_t >= st0 && last_H0_t <= en0) + if (last_H0_t >= st0 && last_H0_t <= en0 && last_H0_t + 1 >= st0 && last_H0_t + 1 <= en0) { + int32_t d0 = v8[last_H0_t] - qe; + int32_t d1 = u8[last_H0_t + 1] - qe; + if (d0 > d1) H0 += d0; + else H0 += d1, ++last_H0_t; + } else if (last_H0_t >= st0 && last_H0_t <= en0) { H0 += v8[last_H0_t] - qe; - else ++last_H0_t, H0 += u8[last_H0_t] - qe; + } else { + ++last_H0_t, H0 += u8[last_H0_t] - qe; + } + if (apply_zdrop(ez, H0, r, last_H0_t, zdrop, e)) break; } else H0 = v8[0] - qe - qe, last_H0_t = 0; if (r == qlen + tlen - 2 && en0 == tlen - 1) ez->score = H0; @@ -273,7 +287,7 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin if (with_max) kfree(km, H); if (with_cigar) { // backtrack int rev_cigar = !!(flag & KSW_EZ_REV_CIGAR); - if (ez->score > KSW_NEG_INF && !(flag&KSW_EZ_EXTZ_ONLY)) + if (!ez->zdropped && !(flag&KSW_EZ_EXTZ_ONLY)) ksw_backtrack(km, 1, rev_cigar, (uint8_t*)p, off, n_col_*16, tlen-1, qlen-1, &ez->m_cigar, &ez->n_cigar, &ez->cigar); else if (ez->max_t >= 0 && ez->max_q >= 0) ksw_backtrack(km, 1, rev_cigar, (uint8_t*)p, off, n_col_*16, ez->max_t, ez->max_q, &ez->m_cigar, &ez->n_cigar, &ez->cigar); diff --git a/main.c b/main.c index 451fd19..4b8fa11 100644 --- a/main.c +++ b/main.c @@ -10,7 +10,7 @@ #include "minimap.h" #include "mmpriv.h" -#define MM_VERSION "2.0-r79-pre" +#define MM_VERSION "2.0-r85-pre" void liftrlimit() {