2023-08-10 15:28:45 +08:00
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
|
|
#include <string.h>
|
|
|
|
|
|
#include <stdint.h>
|
|
|
|
|
|
#include <assert.h>
|
2023-08-18 20:27:55 +08:00
|
|
|
|
#include <time.h>
|
2023-08-10 15:28:45 +08:00
|
|
|
|
#include "sys/time.h"
|
2023-08-25 14:47:30 +08:00
|
|
|
|
#include "thread_mem.h"
|
2023-08-26 00:38:38 +08:00
|
|
|
|
#include "ksw_ext.h"
|
|
|
|
|
|
#include "utils.h"
|
2023-08-10 15:28:45 +08:00
|
|
|
|
|
|
|
|
|
|
#define SW_NORMAL 0
|
|
|
|
|
|
#define SW_AVX2 1
|
|
|
|
|
|
#define SW_CUDA 2
|
|
|
|
|
|
#define SW_ALL 3
|
|
|
|
|
|
|
|
|
|
|
|
#define BLOCK_BUF_SIZE 1048576
|
|
|
|
|
|
#define READ_BUF_SIZE 2048
|
|
|
|
|
|
#define SEQ_BUF_SIZE (BLOCK_BUF_SIZE + READ_BUF_SIZE)
|
2023-08-26 00:38:38 +08:00
|
|
|
|
#define INIT_ALLOC_SIZE 4096
|
2023-08-10 15:28:45 +08:00
|
|
|
|
|
2023-08-18 20:27:55 +08:00
|
|
|
|
#define DIVIDE_BY (CLOCKS_PER_SEC * 1.0)
|
|
|
|
|
|
|
2023-08-26 00:38:38 +08:00
|
|
|
|
#define KERNEL_NUM 7
|
|
|
|
|
|
|
2023-08-10 15:28:45 +08:00
|
|
|
|
#ifdef SHOW_PERF
|
|
|
|
|
|
// 用来调试,计算感兴趣部分的运行时间
|
|
|
|
|
|
// 获取当前毫秒数
|
|
|
|
|
|
int64_t get_mseconds()
|
|
|
|
|
|
{
|
2023-08-18 20:27:55 +08:00
|
|
|
|
// struct timeval tv;
|
|
|
|
|
|
// gettimeofday(&tv, NULL);
|
|
|
|
|
|
// return (int64_t)1000 * (tv.tv_sec + ((1e-6) * tv.tv_usec));
|
|
|
|
|
|
return clock();
|
2023-08-10 15:28:45 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2023-08-26 00:38:38 +08:00
|
|
|
|
int64_t time_sw[KERNEL_NUM] = {0};
|
2023-08-10 15:28:45 +08:00
|
|
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
2023-08-26 00:38:38 +08:00
|
|
|
|
#define _PERFORMANCE_TEST_NORMAL(kernel_num, func) \
|
|
|
|
|
|
cur_query_pos = 0; \
|
|
|
|
|
|
cur_target_pos = 0; \
|
|
|
|
|
|
for (i = 0; i < block_line_num; ++i) \
|
|
|
|
|
|
{ \
|
|
|
|
|
|
score[kernel_num] = func( \
|
|
|
|
|
|
&tmem[kernel_num], \
|
|
|
|
|
|
info_arr[i][0], \
|
|
|
|
|
|
(uint8_t *)query_arr + cur_query_pos, \
|
|
|
|
|
|
info_arr[i][1], \
|
|
|
|
|
|
(uint8_t *)target_arr + cur_target_pos, \
|
|
|
|
|
|
5, mat, 6, 1, 6, 1, 100, 5, 100, \
|
|
|
|
|
|
info_arr[i][2], \
|
|
|
|
|
|
&qle, &tle, >le, &gscore, &max_off[0]); \
|
|
|
|
|
|
score_total[kernel_num] += score[kernel_num]; \
|
|
|
|
|
|
cur_query_pos += info_arr[i][0]; \
|
|
|
|
|
|
cur_target_pos += info_arr[i][1]; \
|
|
|
|
|
|
}
|
2023-08-15 22:19:39 +08:00
|
|
|
|
|
2023-08-26 00:38:38 +08:00
|
|
|
|
#define _PERFORMANCE_TEST_AVX2(kernel_num, func) \
|
|
|
|
|
|
cur_query_pos = 0; \
|
|
|
|
|
|
cur_target_pos = 0; \
|
|
|
|
|
|
for (i = 0; i < block_line_num; ++i) \
|
|
|
|
|
|
{ \
|
|
|
|
|
|
score[kernel_num] = func( \
|
|
|
|
|
|
&tmem[kernel_num], \
|
|
|
|
|
|
info_arr[i][0], \
|
|
|
|
|
|
(uint8_t *)query_arr + cur_query_pos, \
|
|
|
|
|
|
info_arr[i][1], \
|
|
|
|
|
|
(uint8_t *)target_arr + cur_target_pos, \
|
|
|
|
|
|
0, 6, 1, 6, 1, \
|
|
|
|
|
|
1, 4, \
|
|
|
|
|
|
100, 5, \
|
|
|
|
|
|
info_arr[i][2], \
|
|
|
|
|
|
&qle, &tle, >le, &gscore, &max_off[0]); \
|
|
|
|
|
|
score_total[kernel_num] += score[kernel_num]; \
|
|
|
|
|
|
cur_query_pos += info_arr[i][0]; \
|
|
|
|
|
|
cur_target_pos += info_arr[i][1]; \
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef SHOW_PERF
|
|
|
|
|
|
#define PERFORMANCE_TEST_NORMAL(kernel_num, func) \
|
|
|
|
|
|
start_time = get_mseconds(); \
|
|
|
|
|
|
_PERFORMANCE_TEST_NORMAL(kernel_num, func); \
|
|
|
|
|
|
time_sw[kernel_num] += get_mseconds() - start_time
|
|
|
|
|
|
|
|
|
|
|
|
#define PERFORMANCE_TEST_AVX2(kernel_num, func) \
|
|
|
|
|
|
start_time = get_mseconds(); \
|
|
|
|
|
|
_PERFORMANCE_TEST_AVX2(kernel_num, func); \
|
|
|
|
|
|
time_sw[kernel_num] += get_mseconds() - start_time
|
|
|
|
|
|
#else
|
|
|
|
|
|
#define PERFORMANCE_TEST_NORMAL(kernel_num, func) _PERFORMANCE_TEST_NORMAL(kernel_num, func)
|
|
|
|
|
|
#define PERFORMANCE_TEST_AVX2(kernel_num, func) _PERFORMANCE_TEST_AVX2(kernel_num, func)
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
// 读取一行序列数据
|
|
|
|
|
|
int read_seq_line(char *read_buf, FILE *f_ptr, char *out_arr)
|
2023-08-25 14:47:30 +08:00
|
|
|
|
{
|
2023-08-26 00:38:38 +08:00
|
|
|
|
if (fgets(read_buf, READ_BUF_SIZE, f_ptr) == NULL)
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
int line_size = strlen(read_buf);
|
|
|
|
|
|
assert(line_size < READ_BUF_SIZE);
|
|
|
|
|
|
if (read_buf[line_size - 1] == '\n')
|
|
|
|
|
|
{
|
|
|
|
|
|
read_buf[line_size - 1] = '\0';
|
|
|
|
|
|
line_size--;
|
|
|
|
|
|
}
|
|
|
|
|
|
convert_char_to_2bit(read_buf);
|
|
|
|
|
|
strncpy(out_arr, read_buf, line_size);
|
|
|
|
|
|
return line_size;
|
2023-08-25 14:47:30 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2023-08-10 15:28:45 +08:00
|
|
|
|
// 程序执行入口
|
|
|
|
|
|
int main(int argc, char *argv[])
|
|
|
|
|
|
{
|
2023-08-26 00:38:38 +08:00
|
|
|
|
const char *qf_path = argv[1];
|
|
|
|
|
|
const char *tf_path = argv[2];
|
|
|
|
|
|
const char *if_path = argv[3];
|
|
|
|
|
|
|
2023-08-10 15:28:45 +08:00
|
|
|
|
// 初始化一些全局参数
|
|
|
|
|
|
int8_t mat[25] = {1, -4, -4, -4, -1,
|
|
|
|
|
|
-4, 1, -4, -4, -1,
|
|
|
|
|
|
-4, -4, 1, -4, -1,
|
|
|
|
|
|
-4, -4, -4, 1, -1,
|
|
|
|
|
|
-1, -1, -1, -1, -1};
|
|
|
|
|
|
int max_off[2];
|
|
|
|
|
|
int qle, tle, gtle, gscore;
|
2023-08-26 00:38:38 +08:00
|
|
|
|
thread_mem_t tmem[KERNEL_NUM];
|
|
|
|
|
|
int i, j;
|
|
|
|
|
|
for (i = 0; i < KERNEL_NUM; ++i)
|
|
|
|
|
|
{
|
|
|
|
|
|
thread_mem_init_alloc(tmem + i, INIT_ALLOC_SIZE);
|
|
|
|
|
|
}
|
|
|
|
|
|
// 记录计算出的分数
|
|
|
|
|
|
int score[KERNEL_NUM] = {0};
|
|
|
|
|
|
int score_total[KERNEL_NUM] = {0};
|
2023-08-10 15:28:45 +08:00
|
|
|
|
|
|
|
|
|
|
// 读取测试数据
|
|
|
|
|
|
char *query_arr = (char *)malloc(SEQ_BUF_SIZE);
|
|
|
|
|
|
char *target_arr = (char *)malloc(SEQ_BUF_SIZE);
|
2023-08-18 20:27:55 +08:00
|
|
|
|
int *info_buf = (int *)malloc(SEQ_BUF_SIZE * sizeof(int));
|
|
|
|
|
|
int **info_arr = (int **)malloc(SEQ_BUF_SIZE * sizeof(int *));
|
2023-08-26 00:38:38 +08:00
|
|
|
|
|
2023-08-10 15:28:45 +08:00
|
|
|
|
FILE *query_f = 0, *target_f = 0, *info_f = 0;
|
2023-08-26 00:38:38 +08:00
|
|
|
|
FILE *normal_out_f = 0, *avx2_out_f = 0, *avx2_u8_out_f = 0;
|
|
|
|
|
|
|
2023-08-10 15:28:45 +08:00
|
|
|
|
query_f = fopen(qf_path, "r");
|
|
|
|
|
|
target_f = fopen(tf_path, "r");
|
|
|
|
|
|
info_f = fopen(if_path, "r");
|
|
|
|
|
|
|
2023-08-15 22:19:39 +08:00
|
|
|
|
// 将每次比对的得分等信息写入文件,进行debug
|
2023-08-26 00:38:38 +08:00
|
|
|
|
// normal_out_f = fopen("normal_out.txt", "w");
|
|
|
|
|
|
// avx2_out_f = fopen("avx2_out.txt", "w");
|
|
|
|
|
|
// avx2_u8_out_f = fopen("avx2_u8_out.txt", "w");
|
2023-08-15 22:19:39 +08:00
|
|
|
|
|
2023-08-10 15:28:45 +08:00
|
|
|
|
// 每次读取一定量的数据,然后执行,直到处理完所有数据
|
|
|
|
|
|
int total_line_num = 0; // 目前处理的总的数据行数
|
|
|
|
|
|
int block_line_num = 0; // 当前循环包含的数据行数
|
2023-08-26 00:38:38 +08:00
|
|
|
|
int cur_query_pos, cur_target_pos;
|
|
|
|
|
|
int64_t start_time;
|
2023-08-10 15:28:45 +08:00
|
|
|
|
char read_buf[READ_BUF_SIZE]; // 读文件缓存
|
2023-08-26 00:38:38 +08:00
|
|
|
|
|
2023-08-10 15:28:45 +08:00
|
|
|
|
// 初始化info_arr数组
|
|
|
|
|
|
i = 0;
|
|
|
|
|
|
j = 0;
|
|
|
|
|
|
while (1)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (j > BLOCK_BUF_SIZE)
|
|
|
|
|
|
break;
|
|
|
|
|
|
info_arr[i] = &info_buf[j];
|
|
|
|
|
|
i += 1;
|
|
|
|
|
|
j += 3;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
while (!feof(target_f))
|
|
|
|
|
|
{
|
2023-08-26 00:38:38 +08:00
|
|
|
|
block_line_num = 0; // 记录每次读取的行数
|
2023-08-10 15:28:45 +08:00
|
|
|
|
// target序列一般占用存储最多,先读取target,看一个buf能读多少行,query和info就按照这个行数来读
|
|
|
|
|
|
int cur_read_size = 0;
|
|
|
|
|
|
while (!feof(target_f) && cur_read_size < BLOCK_BUF_SIZE)
|
|
|
|
|
|
{
|
2023-08-26 00:38:38 +08:00
|
|
|
|
int line_size = read_seq_line(read_buf, target_f, target_arr + cur_read_size);
|
|
|
|
|
|
if (line_size == 0)
|
2023-08-10 15:28:45 +08:00
|
|
|
|
break;
|
2023-08-26 00:38:38 +08:00
|
|
|
|
cur_read_size += line_size;
|
2023-08-10 15:28:45 +08:00
|
|
|
|
++block_line_num;
|
|
|
|
|
|
++total_line_num;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 读query
|
|
|
|
|
|
cur_read_size = 0;
|
|
|
|
|
|
for (i = 0; i < block_line_num; ++i)
|
|
|
|
|
|
{
|
2023-08-26 00:38:38 +08:00
|
|
|
|
int line_size = read_seq_line(read_buf, query_f, query_arr + cur_read_size);
|
|
|
|
|
|
if (line_size == 0)
|
2023-08-10 15:28:45 +08:00
|
|
|
|
break;
|
|
|
|
|
|
cur_read_size += line_size;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 读info
|
|
|
|
|
|
cur_read_size = 0;
|
|
|
|
|
|
for (i = 0; i < block_line_num; ++i)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (fgets(read_buf, READ_BUF_SIZE, info_f) == NULL)
|
|
|
|
|
|
break;
|
|
|
|
|
|
const int line_size = strlen(read_buf);
|
|
|
|
|
|
assert(line_size < READ_BUF_SIZE);
|
|
|
|
|
|
sscanf(read_buf, "%d %d %d\n", &info_arr[i][0], &info_arr[i][1], &info_arr[i][2]);
|
|
|
|
|
|
cur_read_size += line_size;
|
|
|
|
|
|
// fprintf(stderr, "%-8d%-8d%-8d\n", info_arr[i][0], info_arr[i][1], info_arr[i][2]);
|
|
|
|
|
|
// fprintf(stderr, "%s\n", read_buf);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 性能测试
|
|
|
|
|
|
|
2023-08-26 00:38:38 +08:00
|
|
|
|
// normal sw
|
|
|
|
|
|
PERFORMANCE_TEST_NORMAL(0, ksw_extend_normal);
|
2023-08-25 14:47:30 +08:00
|
|
|
|
|
2023-08-26 00:38:38 +08:00
|
|
|
|
// avx2
|
|
|
|
|
|
PERFORMANCE_TEST_AVX2(1, ksw_extend_avx2);
|
2023-08-25 14:47:30 +08:00
|
|
|
|
|
2023-08-26 00:38:38 +08:00
|
|
|
|
// avx2 u8
|
|
|
|
|
|
PERFORMANCE_TEST_AVX2(2, ksw_extend_avx2_u8);
|
2023-08-25 14:47:30 +08:00
|
|
|
|
|
2023-08-26 00:38:38 +08:00
|
|
|
|
// avx2 heuristics
|
|
|
|
|
|
PERFORMANCE_TEST_AVX2(3, ksw_extend_avx2_heuristics);
|
2023-08-10 15:28:45 +08:00
|
|
|
|
|
2023-08-26 00:38:38 +08:00
|
|
|
|
// avx2 u8 heuristics
|
|
|
|
|
|
PERFORMANCE_TEST_AVX2(4, ksw_extend_avx2_u8_heuristics);
|
|
|
|
|
|
|
|
|
|
|
|
// avx2 mem aligned
|
|
|
|
|
|
PERFORMANCE_TEST_AVX2(5, ksw_extend_avx2_aligned);
|
|
|
|
|
|
|
|
|
|
|
|
// avx2 u8 mem aligned
|
|
|
|
|
|
PERFORMANCE_TEST_AVX2(6, ksw_extend_avx2_u8_aligned);
|
|
|
|
|
|
}
|
2023-08-10 15:28:45 +08:00
|
|
|
|
|
|
|
|
|
|
#ifdef SHOW_PERF
|
2023-08-26 00:38:38 +08:00
|
|
|
|
char *kernel_names[7] = {
|
|
|
|
|
|
"normal",
|
|
|
|
|
|
"avx2",
|
|
|
|
|
|
"avx2_u8",
|
|
|
|
|
|
"avx2_heuristics",
|
|
|
|
|
|
"avx2_u8_heuristics",
|
|
|
|
|
|
"avx2_aligned",
|
|
|
|
|
|
"avx2_u8_aligned"};
|
|
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < KERNEL_NUM; ++i)
|
|
|
|
|
|
{
|
|
|
|
|
|
fprintf(stderr, "[%18s] time: %9.6f s; score: %d\n", kernel_names[i], time_sw[i] / DIVIDE_BY, score_total[i]);
|
|
|
|
|
|
}
|
2023-08-10 15:28:45 +08:00
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
if (query_f != 0)
|
|
|
|
|
|
fclose(query_f);
|
|
|
|
|
|
if (target_f != 0)
|
|
|
|
|
|
fclose(target_f);
|
|
|
|
|
|
if (info_f != 0)
|
|
|
|
|
|
fclose(info_f);
|
2023-08-15 22:19:39 +08:00
|
|
|
|
if (avx2_out_f != 0)
|
|
|
|
|
|
fclose(avx2_out_f);
|
|
|
|
|
|
if (avx2_u8_out_f != 0)
|
|
|
|
|
|
fclose(avx2_u8_out_f);
|
|
|
|
|
|
if (normal_out_f != 0)
|
|
|
|
|
|
fclose(normal_out_f);
|
2023-08-18 20:27:55 +08:00
|
|
|
|
}
|