Compare commits

...

2 Commits

Author SHA1 Message Date
zzh 093aeda028 merge 2025-09-18 15:15:30 +08:00
zzh 338c56e6ed 完成avx2的align代码 2025-09-18 15:09:48 +08:00
5 changed files with 9 additions and 8 deletions

View File

@ -25,12 +25,12 @@ const kswr_t g_defr = {0, -1, -1, -1, -1, -1, -1};
int i; \
kswr_t score; \
for (i = sp; i < ep; ++i) \
{ if (kv_A(kv_A(i_arr, i), 0) < 144) continue; \
{ if (kv_A(kv_A(i_arr, i), 0) < 144) continue; \
kswq_sse_t *q = aln_sse_qinit(&bmem[0], nbyte, kv_A(kv_A(i_arr, i), 0), kv_A(q_arr, i).a, 5, mat); \
score = func(&bmem[1], q, \
score = func(&bmem[1], q, \
kv_A(kv_A(i_arr, i), 1), kv_A(t_arr, i).a, \
6, 1, 6, 1, xtra); \
score_total[kernel_id] += score.score; \
score_total[kernel_id] += score.score; \
byte_mem_clear(&bmem[0]); /* free(q); */ \
} \
PROF_END(gprof[kernel_prof_idx[kernel_id]], align); \

View File

@ -52,4 +52,4 @@ kswr_t align_avx2_u8(byte_mem_t *bmem, kswq_avx2_t *q, int tlen, const uint8_t *
kswr_t align_avx2_i16(byte_mem_t *bmem, kswq_avx2_t *q, int tlen, const uint8_t *target, int _o_del, int _e_del, int _o_ins,
int _e_ins, int xtra);
int main_align(int argc, char *argv[]);
#endif
#endif

View File

@ -106,7 +106,7 @@ end_loop8:
gmax = imax; te = i;
for (j = 0; LIKELY(j < slen); ++j)
_mm_store_si128(Hmax + j, _mm_load_si128(H1 + j));
//if (gmax >= endsc) break;
if (gmax >= endsc) break;
}
S = H1; H1 = H0; H0 = S;

View File

@ -120,7 +120,7 @@ end_loop16:
gmax = imax; te = i; // te is the end position on the target
for (j = 0; LIKELY(j < slen); ++j) // keep the H1 vector
_mm_store_si128(Hmax + j, _mm_load_si128(H1 + j));
//if (gmax + q->shift >= 255 || gmax >= endsc) break;
if (gmax + q->shift >= 255 || gmax >= endsc) break;
}
S = H1; H1 = H0; H0 = S; // swap H0 and H1
}

View File

@ -21,7 +21,8 @@ extern FILE *ins_f_arr[LIM_TYPE],
*retval_f_arr[LIM_TYPE];
// GLOBAL performance info
enum {
enum
{
G_ALL = 0,
G_EXT_SCALAR,
G_EXT_AVX2_I16,
@ -43,4 +44,4 @@ enum {
// get current milli seconds
uint64_t get_msec();
#endif
#endif