This commit is contained in:
zzh 2025-09-18 15:15:30 +08:00
commit 093aeda028
6 changed files with 26 additions and 23 deletions

View File

@ -42,8 +42,8 @@ const kswr_t g_defr = {0, -1, -1, -1, -1, -1, -1};
int i; \
kswr_t score; \
for (i = sp; i < ep; ++i) { \
/*if (kv_A(kv_A(i_arr, i), 0) < 144) \
continue; */ \
if (kv_A(kv_A(i_arr, i), 0) < 144) \
continue; \
kswq_avx2_t *q = aln_avx2_qinit(&bmem[0], nbyte, kv_A(kv_A(i_arr, i), 0), kv_A(q_arr, i).a, 5, mat); \
score = func(&bmem[1], q, kv_A(kv_A(i_arr, i), 1), kv_A(t_arr, i).a, 6, 1, 6, 1, xtra); \
score_total[kernel_id] += score.score; \
@ -233,6 +233,7 @@ int main_align(int argc, char *argv[])
ALIGN_PERFORMANCE_TEST(1, 1, align_sse_u8, 0, align_lines);
ALIGN_PERFORMANCE_TEST_AVX2(2, 2, align_avx2_i16, 0, align_lines);
ALIGN_PERFORMANCE_TEST_AVX2(3, 1, align_avx2_u8, 0, align_lines);
#if 0
// compare the score2 of i16 and u8
{
@ -262,8 +263,8 @@ int main_align(int argc, char *argv[])
}
#ifdef SHOW_PERF
fprintf(stderr, "[align sse i16] time: %9.6lf s; score: %ld\n", gprof[G_ALN_I16] / TIME_DIVIDE_BY, gdata[G_ALN_I16]);
fprintf(stderr, "[align sse u8 ] time: %9.6lf s; score: %ld\n", gprof[G_ALN_U8] / TIME_DIVIDE_BY, gdata[G_ALN_U8]);
fprintf(stderr, "[align avx i16] time: %9.6lf s; score: %ld\n", gprof[G_ALN_I16] / TIME_DIVIDE_BY, gdata[G_ALN_I16]);
fprintf(stderr, "[align avx u8 ] time: %9.6lf s; score: %ld\n", gprof[G_ALN_U8] / TIME_DIVIDE_BY, gdata[G_ALN_U8]);
fprintf(stderr, "[align avx2 i16] time: %9.6lf s; score: %ld\n", gprof[G_ALN_AVX2_I16] / TIME_DIVIDE_BY, gdata[G_ALN_AVX2_I16]);
fprintf(stderr, "[align avx2 u8 ] time: %9.6lf s; score: %ld\n", gprof[G_ALN_AVX2_U8] / TIME_DIVIDE_BY, gdata[G_ALN_AVX2_U8]);
#endif

View File

@ -51,6 +51,5 @@ kswr_t align_avx2_u8(byte_mem_t *bmem, kswq_avx2_t *q, int tlen, const uint8_t *
int _e_ins, int xtra);
kswr_t align_avx2_i16(byte_mem_t *bmem, kswq_avx2_t *q, int tlen, const uint8_t *target, int _o_del, int _e_del, int _o_ins,
int _e_ins, int xtra);
int main_align(int argc, char *argv[]);
#endif

View File

@ -321,6 +321,7 @@ int extend_avx2_i16(byte_mem_t *bmem,
}
ins[0][0] = del[0][0] = score[0][0] = h0;
#endif
w = 1000;
for (D = 1; LIKELY(D < Dloop); ++D) {
// 边界条件一定要注意! tlen 大于,等于,小于 qlen时的情况

View File

@ -175,7 +175,7 @@ static const uint8_t reverse_mask[SIMD_WIDTH] = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14,
max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 8)); \
max_vec = _mm256_max_epu8(max_vec, _mm256_permute2x128_si256(max_vec, max_vec, 0x01)); \
m = MAX(m, maxVal[0]); \
if (maxVal[0] > 0 && m >= max) \
/*if (maxVal[0] > 0 && m >= max) \
{ \
for (j = beg, i = iend; j <= end; j += SIMD_WIDTH, i -= SIMD_WIDTH) \
{ \
@ -189,7 +189,7 @@ static const uint8_t reverse_mask[SIMD_WIDTH] = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14,
mi = i - 1 - pos; \
} \
} \
}
}*/
// 每轮迭代后,交换数组
#define SWAP_DATA_POINTER \
@ -340,6 +340,7 @@ int extend_avx2_u8(byte_mem_t *bmem,
#endif
#endif
w = 1000;
for (D = 1; LIKELY(D < Dloop); ++D)
{
// 边界条件一定要注意! tlen 大于,等于,小于 qlen时的情况

View File

@ -70,6 +70,7 @@ int extend_scalar(byte_mem_t *bmem, int qlen, const uint8_t *query, int tlen, co
max_ie = -1, gscore = -1;
max_off = 0;
beg = 0, end = qlen;
w = 1000;
for (i = 0; LIKELY(i < tlen); ++i) // 对target逐个字符进行遍历
{
int t, f = 0, h1, m = 0, mj = -1;