From 5a41d5e2076959d15273326d90f13d0c079911b8 Mon Sep 17 00:00:00 2001 From: zzh Date: Fri, 11 Aug 2023 00:35:43 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B5=8B=E8=AF=95=E4=B8=8D=E5=90=8C=E5=86=85?= =?UTF-8?q?=E5=AD=98=E5=BC=80=E8=BE=9F=E7=AD=96=E7=95=A5=E7=9A=84=E5=BD=B1?= =?UTF-8?q?=E5=93=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- discrete_mem_main.c | 243 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 243 insertions(+) create mode 100644 discrete_mem_main.c diff --git a/discrete_mem_main.c b/discrete_mem_main.c new file mode 100644 index 0000000..9298ebf --- /dev/null +++ b/discrete_mem_main.c @@ -0,0 +1,243 @@ +#include +#include +#include +#include +#include +#include "sys/time.h" + +#define SW_NORMAL 0 +#define SW_AVX2 1 +#define SW_CUDA 2 +#define SW_ALL 3 + +#define BLOCK_BUF_SIZE 1048576 +#define READ_BUF_SIZE 2048 +#define SEQ_BUF_SIZE (BLOCK_BUF_SIZE + READ_BUF_SIZE) + +// 将文件读取到离散的内存里,看一下对sw性能的影响 + +#ifdef SHOW_PERF +// 用来调试,计算感兴趣部分的运行时间 +// 获取当前毫秒数 +int64_t get_mseconds() +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return (int64_t)1000 * (tv.tv_sec + ((1e-6) * tv.tv_usec)); +} + +int64_t time_sw_normal = 0, + time_sw_avx2 = 0, + time_sw_avx2_u8 = 0; + +#endif + +extern int ksw_normal(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off); +extern int ksw_avx2(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int is_left, int m, const int8_t *mat, int o_del, int e_del, + int o_ins, int e_ins, int a, int b, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off); +extern int ksw_avx2_u8(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int is_left, int m, const int8_t *mat, int o_del, int e_del, + int o_ins, int e_ins, int a, int b, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off); + +/* + * 包含一个参数,用来区分调用那个sw算法 + * 参数为 normal/avx2/cuda + */ +// 程序执行入口 +int main(int argc, char *argv[]) +{ + + // 初始化一些全局参数 + int8_t mat[25] = {1, -4, -4, -4, -1, + -4, 1, -4, -4, -1, + -4, -4, 1, -4, -1, + -4, -4, -4, 1, -1, + -1, -1, -1, -1, -1}; + int max_off[2]; + int qle, tle, gtle, gscore; + + // 读取测试数据 + char *query_arr = (char **)malloc(SEQ_BUF_SIZE); + char *target_arr = (char **)malloc(SEQ_BUF_SIZE); + int *info_buf = (int *)malloc(SEQ_BUF_SIZE); + int **info_arr = (int **)malloc(SEQ_BUF_SIZE); + FILE *query_f = 0, *target_f = 0, *info_f = 0; + // const char *qf_path = "/public/home/zzh/data/sw/q_s.fa"; + // const char *tf_path = "/public/home/zzh/data/sw/t_s.fa"; + // const char *if_path = "/public/home/zzh/data/sw/i_s.txt"; + const char *qf_path = "/public/home/zzh/data/sw/q_m.fa"; + const char *tf_path = "/public/home/zzh/data/sw/t_m.fa"; + const char *if_path = "/public/home/zzh/data/sw/i_m.txt"; + // const char *qf_path = "/public/home/zzh/data/sw/q_m.fa"; + // const char *tf_path = "/public/home/zzh/data/sw/t_m.fa"; + // const char *if_path = "/public/home/zzh/data/sw/i_m.txt"; + query_f = fopen(qf_path, "r"); + target_f = fopen(tf_path, "r"); + info_f = fopen(if_path, "r"); + + // 每次读取一定量的数据,然后执行,直到处理完所有数据 + int total_line_num = 0; // 目前处理的总的数据行数 + int block_line_num = 0; // 当前循环包含的数据行数 + int i, j; + // const int max_read = READ_BUF_SIZE; // 每次最多读取的字符 + char read_buf[READ_BUF_SIZE]; // 读文件缓存 + // int ret_code = 0; + + // 初始化info_arr数组 + i = 0; + j = 0; + while (1) + { + if (j > BLOCK_BUF_SIZE) + break; + info_arr[i] = &info_buf[j]; + i += 1; + j += 3; + } + + int score_normal = 0, score_avx2 = 0, score_avx2_u8 = 0; + + while (!feof(target_f)) + { + block_line_num = 0; + // target序列一般占用存储最多,先读取target,看一个buf能读多少行,query和info就按照这个行数来读 + int cur_read_size = 0; + while (!feof(target_f) && cur_read_size < BLOCK_BUF_SIZE) + { + if (fgets(read_buf, READ_BUF_SIZE, target_f) == NULL) + break; + const int line_size = strlen(read_buf); + target_arr[block_line_num] = (char *)malloc(line_size); + assert(line_size < READ_BUF_SIZE); + ++block_line_num; + ++total_line_num; + strncpy(target_arr[block_line_num], read_buf, line_size); + cur_read_size += line_size; + // fprintf(stderr, "%d %d \n", line_size, cur_read_size); + } + + // 读query + cur_read_size = 0; + for (i = 0; i < block_line_num; ++i) + { + if (fgets(read_buf, READ_BUF_SIZE, query_f) == NULL) + break; + const int line_size = strlen(read_buf); + assert(line_size < READ_BUF_SIZE); + query_arr[i] = (char *)malloc(line_size); + strncpy(query_arr[i], read_buf, line_size); + cur_read_size += line_size; + } + + // 读info + cur_read_size = 0; + for (i = 0; i < block_line_num; ++i) + { + if (fgets(read_buf, READ_BUF_SIZE, info_f) == NULL) + break; + const int line_size = strlen(read_buf); + assert(line_size < READ_BUF_SIZE); + sscanf(read_buf, "%d %d %d\n", &info_arr[i][0], &info_arr[i][1], &info_arr[i][2]); + cur_read_size += line_size; + // fprintf(stderr, "%-8d%-8d%-8d\n", info_arr[i][0], info_arr[i][1], info_arr[i][2]); + // fprintf(stderr, "%s\n", read_buf); + } + + // 性能测试 + + // 普通 sw + int cur_query_pos = 0; + int cur_target_pos = 0; + for (i = 0; i < block_line_num; ++i) + { +#ifdef SHOW_PERF + int64_t start_time = get_mseconds(); +#endif + score_normal += ksw_normal( + info_arr[i][0], + (uint8_t *)query_arr[i], + info_arr[i][1], + (uint8_t *)target_arr[i], + 5, mat, 6, 1, 6, 1, 100, 5, 100, + info_arr[i][2], + &qle, &tle, >le, &gscore, &max_off[0]); +#ifdef SHOW_PERF + time_sw_normal += get_mseconds() - start_time; +#endif + // 更新query和target位置信息 + cur_query_pos += info_arr[i][0]; + cur_target_pos += info_arr[i][1]; + // fprintf(stderr, "%d %d %d %d %d %d %d\n", score_normal, qle, tle, gtle, gscore, max_off[0], max_off[1]); + } + + // avx2 sw + cur_query_pos = 0; + cur_target_pos = 0; + for (i = 0; i < block_line_num; ++i) + { +#ifdef SHOW_PERF + int64_t start_time = get_mseconds(); +#endif + score_avx2 += ksw_avx2( + info_arr[i][0], + (uint8_t *)query_arr[i], + info_arr[i][1], + (uint8_t *)target_arr[i], + 0, 5, mat, 6, 1, 6, 1, + 1, 4, + 100, 5, 100, + info_arr[i][2], + &qle, &tle, >le, &gscore, &max_off[0]); +#ifdef SHOW_PERF + time_sw_avx2 += get_mseconds() - start_time; +#endif + // 更新query和target位置信息 + cur_query_pos += info_arr[i][0]; + cur_target_pos += info_arr[i][1]; + // fprintf(stderr, "%d %d %d %d %d %d %d\n", score_avx2, qle, tle, gtle, gscore, max_off[0], max_off[1]); + } + + // avx2 u8 sw + cur_query_pos = 0; + cur_target_pos = 0; + for (i = 0; i < block_line_num; ++i) + { +#ifdef SHOW_PERF + int64_t start_time = get_mseconds(); +#endif + score_avx2_u8 += ksw_avx2_u8( + info_arr[i][0], + (uint8_t *)query_arr[i], + info_arr[i][1], + (uint8_t *)target_arr[i], + 0, 5, mat, 6, 1, 6, 1, + 1, 4, + 100, 5, 100, + info_arr[i][2], + &qle, &tle, >le, &gscore, &max_off[0]); +#ifdef SHOW_PERF + time_sw_avx2_u8 += get_mseconds() - start_time; +#endif + // 更新query和target位置信息 + cur_query_pos += info_arr[i][0]; + cur_target_pos += info_arr[i][1]; + // fprintf(stderr, "%d %d %d %d %d %d %d\n", score_normal, qle, tle, gtle, gscore, max_off[0], max_off[1]); + } + + // fprintf(stderr, "%d %d \n", block_line_num, total_line_num); + } + + // fprintf(stderr, "%d \n", score_normal); + +#ifdef SHOW_PERF + fprintf(stderr, "time_sw_normal: %f s; score: %d\n", time_sw_normal / 1000.0, score_normal); + fprintf(stderr, "time_sw_avx2: %f s; score: %d\n", time_sw_avx2 / 1000.0, score_avx2); + fprintf(stderr, "time_sw_avx2_u8: %f s; score: %d\n", time_sw_avx2_u8 / 1000.0, score_avx2_u8); +#endif + + if (query_f != 0) + fclose(query_f); + if (target_f != 0) + fclose(target_f); + if (info_f != 0) + fclose(info_f); +} \ No newline at end of file