diff --git a/ksw2_extd2_sse.c b/ksw2_extd2_sse.c index ca4af4d..c678a11 100644 --- a/ksw2_extd2_sse.c +++ b/ksw2_extd2_sse.c @@ -61,7 +61,7 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin int with_cigar = !(flag&KSW_EZ_SCORE_ONLY), approx_max = !!(flag&KSW_EZ_APPROX_MAX); int32_t *H = 0, H0 = 0, last_H0_t = 0; uint8_t *qr, *sf, *mem, *mem2 = 0; - __m128i q_, q2_, qe_, qe2_, zero_, sc_mch_, sc_mis_, m1_; + __m128i q_, q2_, qe_, qe2_, zero_, sc_mch_, sc_mis_, m1_, sc_N_; __m128i *u, *v, *x, *y, *x2, *y2, *s, *p = 0; ksw_reset_extz(ez); @@ -76,6 +76,7 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin qe2_ = _mm_set1_epi8(q2 + e2); sc_mch_ = _mm_set1_epi8(mat[0]); sc_mis_ = _mm_set1_epi8(mat[1]); + sc_N_ = _mm_set1_epi8(-e2); m1_ = _mm_set1_epi8(m - 1); // wildcard if (w < 0) w = tlen > qlen? tlen : qlen; @@ -162,10 +163,11 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin tmp = _mm_cmpeq_epi8(sq, st); #ifdef __SSE4_1__ tmp = _mm_blendv_epi8(sc_mis_, sc_mch_, tmp); + tmp = _mm_blendv_epi8(tmp, sc_N_, mask); #else - tmp = _mm_or_si128(_mm_andnot_si128(tmp, sc_mis_), _mm_and_si128(tmp, sc_mch_)); + tmp = _mm_or_si128(_mm_andnot_si128(tmp, sc_mis_), _mm_and_si128(tmp, sc_mch_)); + tmp = _mm_or_si128(_mm_andnot_si128(mask, tmp), _mm_and_si128(mask, sc_N_)); #endif - tmp = _mm_andnot_si128(mask, tmp); _mm_storeu_si128((__m128i*)((int8_t*)s + t), tmp); } } else { diff --git a/ksw2_exts2_sse.c b/ksw2_exts2_sse.c index 66149d2..82a0369 100644 --- a/ksw2_exts2_sse.c +++ b/ksw2_exts2_sse.c @@ -59,7 +59,7 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin int with_cigar = !(flag&KSW_EZ_SCORE_ONLY), approx_max = !!(flag&KSW_EZ_APPROX_MAX); int32_t *H = 0, H0 = 0, last_H0_t = 0; uint8_t *qr, *sf, *mem, *mem2 = 0; - __m128i q_, q2_, qe_, zero_, sc_mch_, sc_mis_, m1_; + __m128i q_, q2_, qe_, zero_, sc_mch_, sc_mis_, sc_N_, m1_; __m128i *u, *v, *x, *y, *x2, *s, *p = 0, *donor, *acceptor; ksw_reset_extz(ez); @@ -71,6 +71,7 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin qe_ = _mm_set1_epi8(q + e); sc_mch_ = _mm_set1_epi8(mat[0]); sc_mis_ = _mm_set1_epi8(mat[1]); + sc_N_ = _mm_set1_epi8(-e); m1_ = _mm_set1_epi8(m - 1); // wildcard tlen_ = (tlen + 15) / 16; @@ -159,10 +160,11 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin tmp = _mm_cmpeq_epi8(sq, st); #ifdef __SSE4_1__ tmp = _mm_blendv_epi8(sc_mis_, sc_mch_, tmp); + tmp = _mm_blendv_epi8(tmp, sc_N_, mask); #else - tmp = _mm_or_si128(_mm_andnot_si128(tmp, sc_mis_), _mm_and_si128(tmp, sc_mch_)); + tmp = _mm_or_si128(_mm_andnot_si128(tmp, sc_mis_), _mm_and_si128(tmp, sc_mch_)); + tmp = _mm_or_si128(_mm_andnot_si128(mask, tmp), _mm_and_si128(mask, sc_N_)); #endif - tmp = _mm_andnot_si128(mask, tmp); _mm_storeu_si128((__m128i*)((int8_t*)s + t), tmp); } } else { diff --git a/ksw2_extz2_sse.c b/ksw2_extz2_sse.c index 3889955..e8f71a4 100644 --- a/ksw2_extz2_sse.c +++ b/ksw2_extz2_sse.c @@ -50,7 +50,7 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin int with_cigar = !(flag&KSW_EZ_SCORE_ONLY), approx_max = !!(flag&KSW_EZ_APPROX_MAX); int32_t *H = 0, H0 = 0, last_H0_t = 0; uint8_t *qr, *sf, *mem, *mem2 = 0; - __m128i q_, qe2_, zero_, flag1_, flag2_, flag8_, flag16_, sc_mch_, sc_mis_, m1_, max_sc_; + __m128i q_, qe2_, zero_, flag1_, flag2_, flag8_, flag16_, sc_mch_, sc_mis_, sc_N_, m1_, max_sc_; __m128i *u, *v, *x, *y, *s, *p = 0; ksw_reset_extz(ez); @@ -65,6 +65,7 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin flag16_ = _mm_set1_epi8(0x10); sc_mch_ = _mm_set1_epi8(mat[0]); sc_mis_ = _mm_set1_epi8(mat[1]); + sc_N_ = _mm_set1_epi8(-e); m1_ = _mm_set1_epi8(m - 1); // wildcard max_sc_ = _mm_set1_epi8(mat[0] + (q + e) * 2); @@ -130,10 +131,11 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin tmp = _mm_cmpeq_epi8(sq, st); #ifdef __SSE4_1__ tmp = _mm_blendv_epi8(sc_mis_, sc_mch_, tmp); + tmp = _mm_blendv_epi8(tmp, sc_N_, mask); #else - tmp = _mm_or_si128(_mm_andnot_si128(tmp, sc_mis_), _mm_and_si128(tmp, sc_mch_)); + tmp = _mm_or_si128(_mm_andnot_si128(tmp, sc_mis_), _mm_and_si128(tmp, sc_mch_)); + tmp = _mm_or_si128(_mm_andnot_si128(mask, tmp), _mm_and_si128(mask, sc_N_)); #endif - tmp = _mm_andnot_si128(mask, tmp); _mm_storeu_si128((__m128i*)((uint8_t*)s + t), tmp); } } else { diff --git a/main.c b/main.c index db363fd..35e395a 100644 --- a/main.c +++ b/main.c @@ -6,7 +6,7 @@ #include "mmpriv.h" #include "getopt.h" -#define MM_VERSION "2.2-r490-dirty" +#define MM_VERSION "2.2-r491-dirty" #ifdef __linux__ #include diff --git a/map.c b/map.c index 32a0685..fef7322 100644 --- a/map.c +++ b/map.c @@ -74,10 +74,12 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo) io->is_hpc = 0, io->k = 19, io->w = 19; mo->a = 1, mo->b = 19, mo->q = 39, mo->q2 = 81, mo->e = 3, mo->e2 = 1, mo->zdrop = 200; mo->min_dp_max = 200; + mo->n_best = 50; } else if (strcmp(preset, "asm10") == 0) { io->is_hpc = 0, io->k = 19, io->w = 19; mo->a = 1, mo->b = 9, mo->q = 16, mo->q2 = 41, mo->e = 2, mo->e2 = 1, mo->zdrop = 200; mo->min_dp_max = 200; + mo->n_best = 50; } else if (strcmp(preset, "short") == 0 || strcmp(preset, "sr") == 0) { io->is_hpc = 0, io->k = 21, io->w = 11; mo->flag |= MM_F_SR | MM_F_FRAG_MODE | MM_F_NO_PRINT_2ND;