merge
This commit is contained in:
commit
093aeda028
37
align.c
37
align.c
|
|
@ -25,31 +25,31 @@ const kswr_t g_defr = {0, -1, -1, -1, -1, -1, -1};
|
|||
int i; \
|
||||
kswr_t score; \
|
||||
for (i = sp; i < ep; ++i) \
|
||||
{ if (kv_A(kv_A(i_arr, i), 0) < 144) continue; \
|
||||
{ if (kv_A(kv_A(i_arr, i), 0) < 144) continue; \
|
||||
kswq_sse_t *q = aln_sse_qinit(&bmem[0], nbyte, kv_A(kv_A(i_arr, i), 0), kv_A(q_arr, i).a, 5, mat); \
|
||||
score = func(&bmem[1], q, \
|
||||
score = func(&bmem[1], q, \
|
||||
kv_A(kv_A(i_arr, i), 1), kv_A(t_arr, i).a, \
|
||||
6, 1, 6, 1, xtra); \
|
||||
score_total[kernel_id] += score.score; \
|
||||
score_total[kernel_id] += score.score; \
|
||||
byte_mem_clear(&bmem[0]); /* free(q); */ \
|
||||
} \
|
||||
PROF_END(gprof[kernel_prof_idx[kernel_id]], align); \
|
||||
} while (0)
|
||||
|
||||
#define ALIGN_PERFORMANCE_TEST_AVX2(kernel_id, nbyte, func, sp, ep) \
|
||||
do { \
|
||||
PROF_START(align); \
|
||||
int i; \
|
||||
kswr_t score; \
|
||||
for (i = sp; i < ep; ++i) { \
|
||||
/*if (kv_A(kv_A(i_arr, i), 0) < 144) \
|
||||
continue; */ \
|
||||
#define ALIGN_PERFORMANCE_TEST_AVX2(kernel_id, nbyte, func, sp, ep) \
|
||||
do { \
|
||||
PROF_START(align); \
|
||||
int i; \
|
||||
kswr_t score; \
|
||||
for (i = sp; i < ep; ++i) { \
|
||||
if (kv_A(kv_A(i_arr, i), 0) < 144) \
|
||||
continue; \
|
||||
kswq_avx2_t *q = aln_avx2_qinit(&bmem[0], nbyte, kv_A(kv_A(i_arr, i), 0), kv_A(q_arr, i).a, 5, mat); \
|
||||
score = func(&bmem[1], q, kv_A(kv_A(i_arr, i), 1), kv_A(t_arr, i).a, 6, 1, 6, 1, xtra); \
|
||||
score_total[kernel_id] += score.score; \
|
||||
byte_mem_clear(&bmem[0]); /* free(q); */ \
|
||||
} \
|
||||
PROF_END(gprof[kernel_prof_idx[kernel_id]], align); \
|
||||
score = func(&bmem[1], q, kv_A(kv_A(i_arr, i), 1), kv_A(t_arr, i).a, 6, 1, 6, 1, xtra); \
|
||||
score_total[kernel_id] += score.score; \
|
||||
byte_mem_clear(&bmem[0]); /* free(q); */ \
|
||||
} \
|
||||
PROF_END(gprof[kernel_prof_idx[kernel_id]], align); \
|
||||
} while (0)
|
||||
|
||||
// sse ksw init
|
||||
|
|
@ -233,6 +233,7 @@ int main_align(int argc, char *argv[])
|
|||
ALIGN_PERFORMANCE_TEST(1, 1, align_sse_u8, 0, align_lines);
|
||||
ALIGN_PERFORMANCE_TEST_AVX2(2, 2, align_avx2_i16, 0, align_lines);
|
||||
ALIGN_PERFORMANCE_TEST_AVX2(3, 1, align_avx2_u8, 0, align_lines);
|
||||
|
||||
#if 0
|
||||
// compare the score2 of i16 and u8
|
||||
{
|
||||
|
|
@ -262,8 +263,8 @@ int main_align(int argc, char *argv[])
|
|||
}
|
||||
|
||||
#ifdef SHOW_PERF
|
||||
fprintf(stderr, "[align sse i16] time: %9.6lf s; score: %ld\n", gprof[G_ALN_I16] / TIME_DIVIDE_BY, gdata[G_ALN_I16]);
|
||||
fprintf(stderr, "[align sse u8 ] time: %9.6lf s; score: %ld\n", gprof[G_ALN_U8] / TIME_DIVIDE_BY, gdata[G_ALN_U8]);
|
||||
fprintf(stderr, "[align avx i16] time: %9.6lf s; score: %ld\n", gprof[G_ALN_I16] / TIME_DIVIDE_BY, gdata[G_ALN_I16]);
|
||||
fprintf(stderr, "[align avx u8 ] time: %9.6lf s; score: %ld\n", gprof[G_ALN_U8] / TIME_DIVIDE_BY, gdata[G_ALN_U8]);
|
||||
fprintf(stderr, "[align avx2 i16] time: %9.6lf s; score: %ld\n", gprof[G_ALN_AVX2_I16] / TIME_DIVIDE_BY, gdata[G_ALN_AVX2_I16]);
|
||||
fprintf(stderr, "[align avx2 u8 ] time: %9.6lf s; score: %ld\n", gprof[G_ALN_AVX2_U8] / TIME_DIVIDE_BY, gdata[G_ALN_AVX2_U8]);
|
||||
#endif
|
||||
|
|
|
|||
3
align.h
3
align.h
|
|
@ -51,6 +51,5 @@ kswr_t align_avx2_u8(byte_mem_t *bmem, kswq_avx2_t *q, int tlen, const uint8_t *
|
|||
int _e_ins, int xtra);
|
||||
kswr_t align_avx2_i16(byte_mem_t *bmem, kswq_avx2_t *q, int tlen, const uint8_t *target, int _o_del, int _e_del, int _o_ins,
|
||||
int _e_ins, int xtra);
|
||||
|
||||
int main_align(int argc, char *argv[]);
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -321,6 +321,7 @@ int extend_avx2_i16(byte_mem_t *bmem,
|
|||
}
|
||||
ins[0][0] = del[0][0] = score[0][0] = h0;
|
||||
#endif
|
||||
w = 1000;
|
||||
|
||||
for (D = 1; LIKELY(D < Dloop); ++D) {
|
||||
// 边界条件一定要注意! tlen 大于,等于,小于 qlen时的情况
|
||||
|
|
|
|||
|
|
@ -175,7 +175,7 @@ static const uint8_t reverse_mask[SIMD_WIDTH] = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14,
|
|||
max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 8)); \
|
||||
max_vec = _mm256_max_epu8(max_vec, _mm256_permute2x128_si256(max_vec, max_vec, 0x01)); \
|
||||
m = MAX(m, maxVal[0]); \
|
||||
if (maxVal[0] > 0 && m >= max) \
|
||||
/*if (maxVal[0] > 0 && m >= max) \
|
||||
{ \
|
||||
for (j = beg, i = iend; j <= end; j += SIMD_WIDTH, i -= SIMD_WIDTH) \
|
||||
{ \
|
||||
|
|
@ -189,7 +189,7 @@ static const uint8_t reverse_mask[SIMD_WIDTH] = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14,
|
|||
mi = i - 1 - pos; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
}*/
|
||||
|
||||
// 每轮迭代后,交换数组
|
||||
#define SWAP_DATA_POINTER \
|
||||
|
|
@ -340,6 +340,7 @@ int extend_avx2_u8(byte_mem_t *bmem,
|
|||
#endif
|
||||
#endif
|
||||
|
||||
w = 1000;
|
||||
for (D = 1; LIKELY(D < Dloop); ++D)
|
||||
{
|
||||
// 边界条件一定要注意! tlen 大于,等于,小于 qlen时的情况
|
||||
|
|
|
|||
|
|
@ -70,6 +70,7 @@ int extend_scalar(byte_mem_t *bmem, int qlen, const uint8_t *query, int tlen, co
|
|||
max_ie = -1, gscore = -1;
|
||||
max_off = 0;
|
||||
beg = 0, end = qlen;
|
||||
w = 1000;
|
||||
for (i = 0; LIKELY(i < tlen); ++i) // 对target逐个字符进行遍历
|
||||
{
|
||||
int t, f = 0, h1, m = 0, mj = -1;
|
||||
|
|
|
|||
|
|
@ -44,4 +44,4 @@ enum
|
|||
|
||||
// get current milli seconds
|
||||
uint64_t get_msec();
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Reference in New Issue