merge
This commit is contained in:
commit
093aeda028
9
align.c
9
align.c
|
|
@ -42,8 +42,8 @@ const kswr_t g_defr = {0, -1, -1, -1, -1, -1, -1};
|
||||||
int i; \
|
int i; \
|
||||||
kswr_t score; \
|
kswr_t score; \
|
||||||
for (i = sp; i < ep; ++i) { \
|
for (i = sp; i < ep; ++i) { \
|
||||||
/*if (kv_A(kv_A(i_arr, i), 0) < 144) \
|
if (kv_A(kv_A(i_arr, i), 0) < 144) \
|
||||||
continue; */ \
|
continue; \
|
||||||
kswq_avx2_t *q = aln_avx2_qinit(&bmem[0], nbyte, kv_A(kv_A(i_arr, i), 0), kv_A(q_arr, i).a, 5, mat); \
|
kswq_avx2_t *q = aln_avx2_qinit(&bmem[0], nbyte, kv_A(kv_A(i_arr, i), 0), kv_A(q_arr, i).a, 5, mat); \
|
||||||
score = func(&bmem[1], q, kv_A(kv_A(i_arr, i), 1), kv_A(t_arr, i).a, 6, 1, 6, 1, xtra); \
|
score = func(&bmem[1], q, kv_A(kv_A(i_arr, i), 1), kv_A(t_arr, i).a, 6, 1, 6, 1, xtra); \
|
||||||
score_total[kernel_id] += score.score; \
|
score_total[kernel_id] += score.score; \
|
||||||
|
|
@ -233,6 +233,7 @@ int main_align(int argc, char *argv[])
|
||||||
ALIGN_PERFORMANCE_TEST(1, 1, align_sse_u8, 0, align_lines);
|
ALIGN_PERFORMANCE_TEST(1, 1, align_sse_u8, 0, align_lines);
|
||||||
ALIGN_PERFORMANCE_TEST_AVX2(2, 2, align_avx2_i16, 0, align_lines);
|
ALIGN_PERFORMANCE_TEST_AVX2(2, 2, align_avx2_i16, 0, align_lines);
|
||||||
ALIGN_PERFORMANCE_TEST_AVX2(3, 1, align_avx2_u8, 0, align_lines);
|
ALIGN_PERFORMANCE_TEST_AVX2(3, 1, align_avx2_u8, 0, align_lines);
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
// compare the score2 of i16 and u8
|
// compare the score2 of i16 and u8
|
||||||
{
|
{
|
||||||
|
|
@ -262,8 +263,8 @@ int main_align(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef SHOW_PERF
|
#ifdef SHOW_PERF
|
||||||
fprintf(stderr, "[align sse i16] time: %9.6lf s; score: %ld\n", gprof[G_ALN_I16] / TIME_DIVIDE_BY, gdata[G_ALN_I16]);
|
fprintf(stderr, "[align avx i16] time: %9.6lf s; score: %ld\n", gprof[G_ALN_I16] / TIME_DIVIDE_BY, gdata[G_ALN_I16]);
|
||||||
fprintf(stderr, "[align sse u8 ] time: %9.6lf s; score: %ld\n", gprof[G_ALN_U8] / TIME_DIVIDE_BY, gdata[G_ALN_U8]);
|
fprintf(stderr, "[align avx u8 ] time: %9.6lf s; score: %ld\n", gprof[G_ALN_U8] / TIME_DIVIDE_BY, gdata[G_ALN_U8]);
|
||||||
fprintf(stderr, "[align avx2 i16] time: %9.6lf s; score: %ld\n", gprof[G_ALN_AVX2_I16] / TIME_DIVIDE_BY, gdata[G_ALN_AVX2_I16]);
|
fprintf(stderr, "[align avx2 i16] time: %9.6lf s; score: %ld\n", gprof[G_ALN_AVX2_I16] / TIME_DIVIDE_BY, gdata[G_ALN_AVX2_I16]);
|
||||||
fprintf(stderr, "[align avx2 u8 ] time: %9.6lf s; score: %ld\n", gprof[G_ALN_AVX2_U8] / TIME_DIVIDE_BY, gdata[G_ALN_AVX2_U8]);
|
fprintf(stderr, "[align avx2 u8 ] time: %9.6lf s; score: %ld\n", gprof[G_ALN_AVX2_U8] / TIME_DIVIDE_BY, gdata[G_ALN_AVX2_U8]);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
1
align.h
1
align.h
|
|
@ -51,6 +51,5 @@ kswr_t align_avx2_u8(byte_mem_t *bmem, kswq_avx2_t *q, int tlen, const uint8_t *
|
||||||
int _e_ins, int xtra);
|
int _e_ins, int xtra);
|
||||||
kswr_t align_avx2_i16(byte_mem_t *bmem, kswq_avx2_t *q, int tlen, const uint8_t *target, int _o_del, int _e_del, int _o_ins,
|
kswr_t align_avx2_i16(byte_mem_t *bmem, kswq_avx2_t *q, int tlen, const uint8_t *target, int _o_del, int _e_del, int _o_ins,
|
||||||
int _e_ins, int xtra);
|
int _e_ins, int xtra);
|
||||||
|
|
||||||
int main_align(int argc, char *argv[]);
|
int main_align(int argc, char *argv[]);
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -321,6 +321,7 @@ int extend_avx2_i16(byte_mem_t *bmem,
|
||||||
}
|
}
|
||||||
ins[0][0] = del[0][0] = score[0][0] = h0;
|
ins[0][0] = del[0][0] = score[0][0] = h0;
|
||||||
#endif
|
#endif
|
||||||
|
w = 1000;
|
||||||
|
|
||||||
for (D = 1; LIKELY(D < Dloop); ++D) {
|
for (D = 1; LIKELY(D < Dloop); ++D) {
|
||||||
// 边界条件一定要注意! tlen 大于,等于,小于 qlen时的情况
|
// 边界条件一定要注意! tlen 大于,等于,小于 qlen时的情况
|
||||||
|
|
|
||||||
|
|
@ -175,7 +175,7 @@ static const uint8_t reverse_mask[SIMD_WIDTH] = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14,
|
||||||
max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 8)); \
|
max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 8)); \
|
||||||
max_vec = _mm256_max_epu8(max_vec, _mm256_permute2x128_si256(max_vec, max_vec, 0x01)); \
|
max_vec = _mm256_max_epu8(max_vec, _mm256_permute2x128_si256(max_vec, max_vec, 0x01)); \
|
||||||
m = MAX(m, maxVal[0]); \
|
m = MAX(m, maxVal[0]); \
|
||||||
if (maxVal[0] > 0 && m >= max) \
|
/*if (maxVal[0] > 0 && m >= max) \
|
||||||
{ \
|
{ \
|
||||||
for (j = beg, i = iend; j <= end; j += SIMD_WIDTH, i -= SIMD_WIDTH) \
|
for (j = beg, i = iend; j <= end; j += SIMD_WIDTH, i -= SIMD_WIDTH) \
|
||||||
{ \
|
{ \
|
||||||
|
|
@ -189,7 +189,7 @@ static const uint8_t reverse_mask[SIMD_WIDTH] = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14,
|
||||||
mi = i - 1 - pos; \
|
mi = i - 1 - pos; \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
}
|
}*/
|
||||||
|
|
||||||
// 每轮迭代后,交换数组
|
// 每轮迭代后,交换数组
|
||||||
#define SWAP_DATA_POINTER \
|
#define SWAP_DATA_POINTER \
|
||||||
|
|
@ -340,6 +340,7 @@ int extend_avx2_u8(byte_mem_t *bmem,
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
w = 1000;
|
||||||
for (D = 1; LIKELY(D < Dloop); ++D)
|
for (D = 1; LIKELY(D < Dloop); ++D)
|
||||||
{
|
{
|
||||||
// 边界条件一定要注意! tlen 大于,等于,小于 qlen时的情况
|
// 边界条件一定要注意! tlen 大于,等于,小于 qlen时的情况
|
||||||
|
|
|
||||||
|
|
@ -70,6 +70,7 @@ int extend_scalar(byte_mem_t *bmem, int qlen, const uint8_t *query, int tlen, co
|
||||||
max_ie = -1, gscore = -1;
|
max_ie = -1, gscore = -1;
|
||||||
max_off = 0;
|
max_off = 0;
|
||||||
beg = 0, end = qlen;
|
beg = 0, end = qlen;
|
||||||
|
w = 1000;
|
||||||
for (i = 0; LIKELY(i < tlen); ++i) // 对target逐个字符进行遍历
|
for (i = 0; LIKELY(i < tlen); ++i) // 对target逐个字符进行遍历
|
||||||
{
|
{
|
||||||
int t, f = 0, h1, m = 0, mj = -1;
|
int t, f = 0, h1, m = 0, mj = -1;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue