This commit is contained in:
zzh 2025-09-18 15:15:30 +08:00
commit 093aeda028
6 changed files with 26 additions and 23 deletions

View File

@ -42,8 +42,8 @@ const kswr_t g_defr = {0, -1, -1, -1, -1, -1, -1};
int i; \ int i; \
kswr_t score; \ kswr_t score; \
for (i = sp; i < ep; ++i) { \ for (i = sp; i < ep; ++i) { \
/*if (kv_A(kv_A(i_arr, i), 0) < 144) \ if (kv_A(kv_A(i_arr, i), 0) < 144) \
continue; */ \ continue; \
kswq_avx2_t *q = aln_avx2_qinit(&bmem[0], nbyte, kv_A(kv_A(i_arr, i), 0), kv_A(q_arr, i).a, 5, mat); \ kswq_avx2_t *q = aln_avx2_qinit(&bmem[0], nbyte, kv_A(kv_A(i_arr, i), 0), kv_A(q_arr, i).a, 5, mat); \
score = func(&bmem[1], q, kv_A(kv_A(i_arr, i), 1), kv_A(t_arr, i).a, 6, 1, 6, 1, xtra); \ score = func(&bmem[1], q, kv_A(kv_A(i_arr, i), 1), kv_A(t_arr, i).a, 6, 1, 6, 1, xtra); \
score_total[kernel_id] += score.score; \ score_total[kernel_id] += score.score; \
@ -233,6 +233,7 @@ int main_align(int argc, char *argv[])
ALIGN_PERFORMANCE_TEST(1, 1, align_sse_u8, 0, align_lines); ALIGN_PERFORMANCE_TEST(1, 1, align_sse_u8, 0, align_lines);
ALIGN_PERFORMANCE_TEST_AVX2(2, 2, align_avx2_i16, 0, align_lines); ALIGN_PERFORMANCE_TEST_AVX2(2, 2, align_avx2_i16, 0, align_lines);
ALIGN_PERFORMANCE_TEST_AVX2(3, 1, align_avx2_u8, 0, align_lines); ALIGN_PERFORMANCE_TEST_AVX2(3, 1, align_avx2_u8, 0, align_lines);
#if 0 #if 0
// compare the score2 of i16 and u8 // compare the score2 of i16 and u8
{ {
@ -262,8 +263,8 @@ int main_align(int argc, char *argv[])
} }
#ifdef SHOW_PERF #ifdef SHOW_PERF
fprintf(stderr, "[align sse i16] time: %9.6lf s; score: %ld\n", gprof[G_ALN_I16] / TIME_DIVIDE_BY, gdata[G_ALN_I16]); fprintf(stderr, "[align avx i16] time: %9.6lf s; score: %ld\n", gprof[G_ALN_I16] / TIME_DIVIDE_BY, gdata[G_ALN_I16]);
fprintf(stderr, "[align sse u8 ] time: %9.6lf s; score: %ld\n", gprof[G_ALN_U8] / TIME_DIVIDE_BY, gdata[G_ALN_U8]); fprintf(stderr, "[align avx u8 ] time: %9.6lf s; score: %ld\n", gprof[G_ALN_U8] / TIME_DIVIDE_BY, gdata[G_ALN_U8]);
fprintf(stderr, "[align avx2 i16] time: %9.6lf s; score: %ld\n", gprof[G_ALN_AVX2_I16] / TIME_DIVIDE_BY, gdata[G_ALN_AVX2_I16]); fprintf(stderr, "[align avx2 i16] time: %9.6lf s; score: %ld\n", gprof[G_ALN_AVX2_I16] / TIME_DIVIDE_BY, gdata[G_ALN_AVX2_I16]);
fprintf(stderr, "[align avx2 u8 ] time: %9.6lf s; score: %ld\n", gprof[G_ALN_AVX2_U8] / TIME_DIVIDE_BY, gdata[G_ALN_AVX2_U8]); fprintf(stderr, "[align avx2 u8 ] time: %9.6lf s; score: %ld\n", gprof[G_ALN_AVX2_U8] / TIME_DIVIDE_BY, gdata[G_ALN_AVX2_U8]);
#endif #endif

View File

@ -51,6 +51,5 @@ kswr_t align_avx2_u8(byte_mem_t *bmem, kswq_avx2_t *q, int tlen, const uint8_t *
int _e_ins, int xtra); int _e_ins, int xtra);
kswr_t align_avx2_i16(byte_mem_t *bmem, kswq_avx2_t *q, int tlen, const uint8_t *target, int _o_del, int _e_del, int _o_ins, kswr_t align_avx2_i16(byte_mem_t *bmem, kswq_avx2_t *q, int tlen, const uint8_t *target, int _o_del, int _e_del, int _o_ins,
int _e_ins, int xtra); int _e_ins, int xtra);
int main_align(int argc, char *argv[]); int main_align(int argc, char *argv[]);
#endif #endif

View File

@ -321,6 +321,7 @@ int extend_avx2_i16(byte_mem_t *bmem,
} }
ins[0][0] = del[0][0] = score[0][0] = h0; ins[0][0] = del[0][0] = score[0][0] = h0;
#endif #endif
w = 1000;
for (D = 1; LIKELY(D < Dloop); ++D) { for (D = 1; LIKELY(D < Dloop); ++D) {
// 边界条件一定要注意! tlen 大于,等于,小于 qlen时的情况 // 边界条件一定要注意! tlen 大于,等于,小于 qlen时的情况

View File

@ -175,7 +175,7 @@ static const uint8_t reverse_mask[SIMD_WIDTH] = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14,
max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 8)); \ max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 8)); \
max_vec = _mm256_max_epu8(max_vec, _mm256_permute2x128_si256(max_vec, max_vec, 0x01)); \ max_vec = _mm256_max_epu8(max_vec, _mm256_permute2x128_si256(max_vec, max_vec, 0x01)); \
m = MAX(m, maxVal[0]); \ m = MAX(m, maxVal[0]); \
if (maxVal[0] > 0 && m >= max) \ /*if (maxVal[0] > 0 && m >= max) \
{ \ { \
for (j = beg, i = iend; j <= end; j += SIMD_WIDTH, i -= SIMD_WIDTH) \ for (j = beg, i = iend; j <= end; j += SIMD_WIDTH, i -= SIMD_WIDTH) \
{ \ { \
@ -189,7 +189,7 @@ static const uint8_t reverse_mask[SIMD_WIDTH] = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14,
mi = i - 1 - pos; \ mi = i - 1 - pos; \
} \ } \
} \ } \
} }*/
// 每轮迭代后,交换数组 // 每轮迭代后,交换数组
#define SWAP_DATA_POINTER \ #define SWAP_DATA_POINTER \
@ -340,6 +340,7 @@ int extend_avx2_u8(byte_mem_t *bmem,
#endif #endif
#endif #endif
w = 1000;
for (D = 1; LIKELY(D < Dloop); ++D) for (D = 1; LIKELY(D < Dloop); ++D)
{ {
// 边界条件一定要注意! tlen 大于,等于,小于 qlen时的情况 // 边界条件一定要注意! tlen 大于,等于,小于 qlen时的情况

View File

@ -70,6 +70,7 @@ int extend_scalar(byte_mem_t *bmem, int qlen, const uint8_t *query, int tlen, co
max_ie = -1, gscore = -1; max_ie = -1, gscore = -1;
max_off = 0; max_off = 0;
beg = 0, end = qlen; beg = 0, end = qlen;
w = 1000;
for (i = 0; LIKELY(i < tlen); ++i) // 对target逐个字符进行遍历 for (i = 0; LIKELY(i < tlen); ++i) // 对target逐个字符进行遍历
{ {
int t, f = 0, h1, m = 0, mj = -1; int t, f = 0, h1, m = 0, mj = -1;