/********************************************************************************************* Description: The entry for sw performance tests Copyright : All right reserved by NCIC.ICT Author : Zhang Zhonghai Date : 2023/08/20 ***********************************************************************************************/ #include #include #include #include #include #include #include "sys/time.h" #include "thread_mem.h" #include "ksw_ext.h" #include "utils.h" #include "common.h" #define BLOCK_BUF_SIZE 1048576 #define READ_BUF_SIZE 2048 #define SEQ_BUF_SIZE (BLOCK_BUF_SIZE + READ_BUF_SIZE) #define INIT_ALLOC_SIZE 4096 #define DIVIDE_BY (CLOCKS_PER_SEC * 1.0) #ifdef SHOW_PERF // 用来调试,计算感兴趣部分的运行时间 // 获取当前毫秒数 int64_t get_mseconds() { // struct timeval tv; // gettimeofday(&tv, NULL); // return (int64_t)1000 * (tv.tv_sec + ((1e-6) * tv.tv_usec)); return clock(); } int64_t time_sw[KERNEL_NUM] = {0}; #endif #ifdef DEBUG_RETURN_VALUE #define OUTPUT_RETVAL_1(kernel_num) \ fprintf(retval_f_arr[kernel_num], "%d\t%d\t%d\t%d\t", score[kernel_num], info_arr[i][0], info_arr[i][1], info_arr[i][2]); \ for (j = cur_query_pos - info_arr[i][0]; j < cur_query_pos; ++j) \ { \ fprintf(retval_f_arr[kernel_num], "%c", t_2bit2char[(uint8_t)query_arr[j]]); \ } \ fprintf(retval_f_arr[kernel_num], "\t"); \ for (j = cur_target_pos - info_arr[i][1]; j < cur_target_pos; ++j) \ { \ fprintf(retval_f_arr[kernel_num], "%c", t_2bit2char[(uint8_t)target_arr[j]]); \ } \ fprintf(retval_f_arr[kernel_num], "\n"); #define OUTPUT_RETVAL_SCORE \ fprintf(retval_f_arr[kernel_num], "%d\t%d\t%d\t%d\t%d\t%d\n", score[kernel_num], qle, tle, gtle, gscore, max_off[0]) // fprintf(retval_f_arr[kernel_num], " %d %d\n", cur_query_pos, info_arr[i][0]); // fprintf(retval_f_arr[kernel_num], "%d\t%d\t%d\t%d\n", score[kernel_num], info_arr[i][0], info_arr[i][1], info_arr[i][2]) #define OUTPUT_RETVAL_target(kernel_num) \ for (j = cur_target_pos - info_arr[i][1]; j < cur_target_pos; ++j) \ { \ fprintf(retval_f_arr[kernel_num], "%c", t_2bit2char[(uint8_t)target_arr[j]]); \ } \ fprintf(retval_f_arr[kernel_num], "\n"); #define OUTPUT_RETVAL_query(kernel_num) \ for (j = cur_query_pos - info_arr[i][0]; j < cur_query_pos; ++j) \ { \ fprintf(retval_f_arr[kernel_num], "%c", t_2bit2char[(uint8_t)query_arr[j]]); \ } \ fprintf(retval_f_arr[kernel_num], "\n"); #define OUTPUT_RETVAL_INFO(kernel_num) \ fprintf(retval_f_arr[kernel_num], "%-8d%-8d%-8d\n", info_arr[i][0], info_arr[i][1], info_arr[i][2]); #define OUTPUT_RETVAL(kernel_num) OUTPUT_RETVAL_target(kernel_num) #else #define OUTPUT_RETVAL(kernel_num) #endif #define _PERFORMANCE_TEST_NORMAL(kernel_num, func) \ cur_query_pos = 0; \ cur_target_pos = 0; \ for (i = 0; i < block_line_num; ++i) \ { \ score[kernel_num] = func( \ &tmem[kernel_num], \ info_arr[i][0], \ (uint8_t *)query_arr + cur_query_pos, \ info_arr[i][1], \ (uint8_t *)target_arr + cur_target_pos, \ 5, mat, 6, 1, 6, 1, 100, 5, 100, \ info_arr[i][2], \ &qle, &tle, >le, &gscore, &max_off[0]); \ score_total[kernel_num] += score[kernel_num]; \ cur_query_pos += info_arr[i][0]; \ cur_target_pos += info_arr[i][1]; \ OUTPUT_RETVAL(0); \ } #define _PERFORMANCE_TEST_AVX2(kernel_num, func) \ cur_query_pos = 0; \ cur_target_pos = 0; \ for (i = 0; i < block_line_num; ++i) \ { \ score[kernel_num] = func( \ &tmem[kernel_num], \ info_arr[i][0], \ (uint8_t *)query_arr + cur_query_pos, \ info_arr[i][1], \ (uint8_t *)target_arr + cur_target_pos, \ 0, 6, 1, 6, 1, \ 1, 4, \ 100, 5, \ info_arr[i][2], \ &qle, &tle, >le, &gscore, &max_off[0]); \ score_total[kernel_num] += score[kernel_num]; \ cur_query_pos += info_arr[i][0]; \ cur_target_pos += info_arr[i][1]; \ OUTPUT_RETVAL(kernel_num); \ } #define _PERFORMANCE_TEST_AVX2_T1(kernel_num, func) \ cur_query_pos = 0; \ cur_target_pos = 0; \ for (i = 0; i < block_line_num; ++i) \ { \ cur_query_pos += info_arr[i][0]; \ cur_target_pos += info_arr[i][1]; \ OUTPUT_RETVAL(kernel_num); \ } #ifdef SHOW_PERF #define PERFORMANCE_TEST_NORMAL(kernel_num, func) \ start_time = get_mseconds(); \ _PERFORMANCE_TEST_NORMAL(kernel_num, func); \ time_sw[kernel_num] += get_mseconds() - start_time #define PERFORMANCE_TEST_AVX2(kernel_num, func) \ start_time = get_mseconds(); \ _PERFORMANCE_TEST_AVX2_T1(kernel_num, func); \ time_sw[kernel_num] += get_mseconds() - start_time #else #define PERFORMANCE_TEST_NORMAL(kernel_num, func) _PERFORMANCE_TEST_NORMAL(kernel_num, func) #define PERFORMANCE_TEST_AVX2(kernel_num, func) _PERFORMANCE_TEST_AVX2(kernel_num, func) #endif // 读取一行序列数据 int read_seq_line(char *read_buf, FILE *f_ptr, char *out_arr) { if (fgets(read_buf, READ_BUF_SIZE, f_ptr) == NULL) return -1; int line_size = strlen(read_buf); assert(line_size < READ_BUF_SIZE); if (read_buf[line_size - 1] == '\n') { read_buf[line_size - 1] = '\0'; line_size--; } convert_char_to_2bit(read_buf); memcpy(out_arr, read_buf, line_size); return line_size; } // 全局变量 // 将每次比对的得分等信息写入文件,进行debug FILE *ins_ext_f_arr[KERNEL_NUM] = {0}, *del_ext_f_arr[KERNEL_NUM] = {0}, *score_f_arr[KERNEL_NUM] = {0}, *retval_f_arr[KERNEL_NUM] = {0}; // 程序执行入口 int main(int argc, char *argv[]) { const char *qf_path = argv[1]; const char *tf_path = argv[2]; const char *if_path = argv[3]; // 初始化一些全局参数 int8_t mat[25] = {1, -4, -4, -4, -1, -4, 1, -4, -4, -1, -4, -4, 1, -4, -1, -4, -4, -4, 1, -1, -1, -1, -1, -1, -1}; int max_off[2]; int qle, tle, gtle, gscore; thread_mem_t tmem[KERNEL_NUM]; int i, j; for (i = 0; i < KERNEL_NUM; ++i) { thread_mem_init_alloc(tmem + i, INIT_ALLOC_SIZE); } // 记录计算出的分数 int score[KERNEL_NUM] = {0}; int64_t score_total[KERNEL_NUM] = {0}; // 读取测试数据 char *query_arr = (char *)malloc(SEQ_BUF_SIZE); char *target_arr = (char *)malloc(SEQ_BUF_SIZE); int *info_buf = (int *)malloc(SEQ_BUF_SIZE * sizeof(int)); int **info_arr = (int **)malloc(SEQ_BUF_SIZE * sizeof(int *)); FILE *query_f = 0, *target_f = 0, *info_f = 0; // 每次读取一定量的数据,然后执行,直到处理完所有数据 int64_t total_line_num = 0; // 目前处理的总的数据行数 int block_line_num = 0; // 当前循环包含的数据行数 int cur_query_pos, cur_target_pos; int64_t start_time; char read_buf[READ_BUF_SIZE]; // 读文件缓存 #ifdef DEBUG_OUT for (i = 0; i < KERNEL_NUM; ++i) { char out_path[64]; sprintf(out_path, "/home/zzh/work/sw_perf/output/ins_%d.txt", i); ins_ext_f_arr[i] = fopen(out_path, "w"); sprintf(out_path, "/home/zzh/work/sw_perf/output/del_%d.txt", i); del_ext_f_arr[i] = fopen(out_path, "w"); sprintf(out_path, "/home/zzh/work/sw_perf/output/score_%d.txt", i); score_f_arr[i] = fopen(out_path, "w"); } #endif #ifdef DEBUG_RETURN_VALUE for (i = 0; i < KERNEL_NUM; ++i) { char out_path[64]; sprintf(out_path, "/home/zzh/work/sw_perf/output/retval_%d.txt", i); retval_f_arr[i] = fopen(out_path, "w"); } #endif query_f = fopen(qf_path, "r"); target_f = fopen(tf_path, "r"); info_f = fopen(if_path, "r"); // 初始化info_arr数组 i = 0; j = 0; while (1) { if (j > BLOCK_BUF_SIZE) break; info_arr[i] = &info_buf[j]; i += 1; j += 3; } while (!feof(target_f)) { block_line_num = 0; // 记录每次读取的行数 // target序列一般占用存储最多,先读取target,看一个buf能读多少行,query和info就按照这个行数来读 int cur_read_size = 0; while (!feof(target_f) && cur_read_size < BLOCK_BUF_SIZE) { int line_size = read_seq_line(read_buf, target_f, target_arr + cur_read_size); // for (j = 0; j < line_size; ++j) //{ // // fprintf(stderr, "%c", t_2bit2char[(uint8_t)read_buf[j]]); // fprintf(stderr, "%c", t_2bit2char[(uint8_t)target_arr[j + cur_read_size]]); // } // fprintf(stderr, "\n"); // fprintf(retval_f_arr[1], "%d\n", line_size); if (line_size == -1) break; cur_read_size += line_size; ++block_line_num; ++total_line_num; } // 读query cur_read_size = 0; for (i = 0; i < block_line_num; ++i) { int line_size = read_seq_line(read_buf, query_f, query_arr + cur_read_size); // int j; // for (j = cur_read_size; j < cur_read_size + line_size; ++j) //{ // fprintf(retval_f_arr[0], "%c", t_2bit2char[(uint8_t)query_arr[j]]); // } // fprintf(retval_f_arr[0], "\n"); if (line_size == -1) break; cur_read_size += line_size; } // 读info cur_read_size = 0; for (i = 0; i < block_line_num; ++i) { if (fgets(read_buf, READ_BUF_SIZE, info_f) == NULL) break; const int line_size = strlen(read_buf); assert(line_size < READ_BUF_SIZE); sscanf(read_buf, "%d %d %d\n", &info_arr[i][0], &info_arr[i][1], &info_arr[i][2]); cur_read_size += line_size; // fprintf(stderr, "%-8d%-8d%-8d\n", info_arr[i][0], info_arr[i][1], info_arr[i][2]); // fprintf(stderr, "%s\n", read_buf); } cur_read_size = 0; for (i = 0; i < block_line_num; ++i) { for (j = cur_read_size; j < cur_read_size + info_arr[i][1]; ++j) { fprintf(retval_f_arr[0], "%c", t_2bit2char[(uint8_t)target_arr[j]]); } fprintf(retval_f_arr[0], "\n"); cur_read_size += info_arr[i][1]; } // for (i = 0; i < block_line_num; ++i) //{ // fprintf(retval_f_arr[0], "%d\n", info_arr[i][1]); //} // 性能测试 // normal sw // PERFORMANCE_TEST_NORMAL(0, ksw_extend_normal); // avx2 // PERFORMANCE_TEST_AVX2(1, ksw_extend_avx2); // avx2 heuristics // PERFORMANCE_TEST_AVX2(2, ksw_extend_avx2_heuristics); // avx2 mem aligned // PERFORMANCE_TEST_AVX2(3, ksw_extend_avx2_aligned); // avx2 u8 // PERFORMANCE_TEST_AVX2(4, ksw_extend_avx2_u8); // avx2 u8 heuristics // PERFORMANCE_TEST_AVX2(5, ksw_extend_avx2_u8_heuristics); // avx2 u8 mem aligned // PERFORMANCE_TEST_AVX2(6, ksw_extend_avx2_u8_aligned); } fprintf(stderr, "%ld\n", total_line_num); #ifdef SHOW_PERF char *kernel_names[7] = { "normal", "avx2", "avx2_heuristics", "avx2_aligned", "avx2_u8", "avx2_u8_heuristics", "avx2_u8_aligned"}; for (i = 0; i < KERNEL_NUM; ++i) { fprintf(stderr, "[%18s] time: %9.6f s; score: %ld\n", kernel_names[i], time_sw[i] / DIVIDE_BY, score_total[i]); } #endif if (query_f != 0) fclose(query_f); if (target_f != 0) fclose(target_f); if (info_f != 0) fclose(info_f); for (i = 0; i < KERNEL_NUM; ++i) { if (ins_ext_f_arr[i] != 0) fclose(ins_ext_f_arr[i]); if (del_ext_f_arr[i] != 0) fclose(del_ext_f_arr[i]); if (score_f_arr[i] != 0) fclose(score_f_arr[i]); if (retval_f_arr[i] != 0) fclose(retval_f_arr[i]); } }