#include #include #include #include #include #include "sys/time.h" #define SW_NORMAL 0 #define SW_AVX2 1 #define SW_CUDA 2 #define SW_ALL 3 #define BLOCK_BUF_SIZE 1048576 #define READ_BUF_SIZE 2048 #define SEQ_BUF_SIZE (BLOCK_BUF_SIZE + READ_BUF_SIZE) #ifdef SHOW_PERF // 用来调试,计算感兴趣部分的运行时间 // 获取当前毫秒数 int64_t get_mseconds() { struct timeval tv; gettimeofday(&tv, NULL); return (int64_t)1000 * (tv.tv_sec + ((1e-6) * tv.tv_usec)); } int64_t time_sw_normal = 0, time_sw_avx2 = 0, time_sw_avx2_u8 = 0; #endif extern int ksw_normal(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off); extern int ksw_avx2(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int is_left, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int a, int b, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off); extern int ksw_avx2_u8(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int is_left, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int a, int b, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off); unsigned char nst_nt4_table[256] = { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5 /*'-'*/, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4}; char t_2bit2char[5] = {'A', 'C', 'G', 'T'}; // 将碱基字符转成2位编码 void convert_char_to_2bit(char *str) { int i; for (i = 0; i < strlen(str); ++i) str[i] = nst_nt4_table[str[i]]; } /* * 包含一个参数,用来区分调用那个sw算法 * 参数为 normal/avx2/cuda */ // 程序执行入口 int main(int argc, char *argv[]) { /* int sw_algo = SW_NORMAL; // 判断执行的sw的实现类型 if (argc > 1) { if (strcmp(argv[1], "normal") == 0) { sw_algo = SW_NORMAL; } else if (strcmp(argv[1], "avx2") == 0) { sw_algo = SW_AVX2; } else if (strcmp(argv[1], "cuda") == 0) { sw_algo = SW_CUDA; } else { sw_algo = SW_ALL; } } */ // 初始化一些全局参数 int8_t mat[25] = {1, -4, -4, -4, -1, -4, 1, -4, -4, -1, -4, -4, 1, -4, -1, -4, -4, -4, 1, -1, -1, -1, -1, -1, -1}; int max_off[2]; int qle, tle, gtle, gscore; // 读取测试数据 char *query_arr = (char *)malloc(SEQ_BUF_SIZE); char *target_arr = (char *)malloc(SEQ_BUF_SIZE); int *info_buf = (int *)malloc(SEQ_BUF_SIZE); int **info_arr = (int **)malloc(SEQ_BUF_SIZE); FILE *query_f = 0, *target_f = 0, *info_f = 0; // const char *qf_path = "q.fa"; // const char *tf_path = "t.fa"; // const char *if_path = "i.txt"; const char *qf_path = "bug_q.fa"; const char *tf_path = "bug_t.fa"; const char *if_path = "bug_i.txt"; // const char *qf_path = "/public/home/zzh/data/sw/q_s.fa"; // const char *tf_path = "/public/home/zzh/data/sw/t_s.fa"; // const char *if_path = "/public/home/zzh/data/sw/i_s.txt"; // const char *qf_path = "/public/home/zzh/data/sw/q_m.fa"; // const char *tf_path = "/public/home/zzh/data/sw/t_m.fa"; // const char *if_path = "/public/home/zzh/data/sw/i_m.txt"; // const char *qf_path = "/public/home/zzh/data/sw/q_l.fa"; // const char *tf_path = "/public/home/zzh/data/sw/t_l.fa"; // const char *if_path = "/public/home/zzh/data/sw/i_l.txt"; // const char *qf_path = "/public/home/zzh/data/sw/query.fa"; // const char *tf_path = "/public/home/zzh/data/sw/target.fa"; // const char *if_path = "/public/home/zzh/data/sw/info.txt"; query_f = fopen(qf_path, "r"); target_f = fopen(tf_path, "r"); info_f = fopen(if_path, "r"); // 将每次比对的得分等信息写入文件,进行debug FILE *normal_out_f = fopen("normal_out.txt", "w"); FILE *avx2_out_f = fopen("avx2_out.txt", "w"); FILE *avx2_u8_out_f = fopen("avx2_u8_out.txt", "w"); // 每次读取一定量的数据,然后执行,直到处理完所有数据 int total_line_num = 0; // 目前处理的总的数据行数 int block_line_num = 0; // 当前循环包含的数据行数 int i, j; // const int max_read = READ_BUF_SIZE; // 每次最多读取的字符 char read_buf[READ_BUF_SIZE]; // 读文件缓存 // int ret_code = 0; // 初始化info_arr数组 i = 0; j = 0; while (1) { if (j > BLOCK_BUF_SIZE) break; info_arr[i] = &info_buf[j]; i += 1; j += 3; } int score_normal = 0, score_avx2 = 0, score_avx2_u8 = 0; int score_normal_total = 0, score_avx2_total = 0, score_avx2_u8_total = 0; while (!feof(target_f)) { block_line_num = 0; // target序列一般占用存储最多,先读取target,看一个buf能读多少行,query和info就按照这个行数来读 int cur_read_size = 0; while (!feof(target_f) && cur_read_size < BLOCK_BUF_SIZE) { if (fgets(read_buf, READ_BUF_SIZE, target_f) == NULL) break; int line_size = strlen(read_buf); assert(line_size < READ_BUF_SIZE); if (read_buf[line_size - 1] == '\n') { read_buf[line_size - 1] = '\0'; line_size--; } convert_char_to_2bit(read_buf); ++block_line_num; ++total_line_num; strncpy(target_arr + cur_read_size, read_buf, line_size); cur_read_size += line_size; // fprintf(stderr, "%d %d \n", line_size, cur_read_size); } // 读query cur_read_size = 0; for (i = 0; i < block_line_num; ++i) { if (fgets(read_buf, READ_BUF_SIZE, query_f) == NULL) break; int line_size = strlen(read_buf); assert(line_size < READ_BUF_SIZE); if (read_buf[line_size - 1] == '\n') { read_buf[line_size - 1] = '\0'; line_size--; } convert_char_to_2bit(read_buf); strncpy(query_arr + cur_read_size, read_buf, line_size); cur_read_size += line_size; } // 读info cur_read_size = 0; for (i = 0; i < block_line_num; ++i) { if (fgets(read_buf, READ_BUF_SIZE, info_f) == NULL) break; const int line_size = strlen(read_buf); assert(line_size < READ_BUF_SIZE); sscanf(read_buf, "%d %d %d\n", &info_arr[i][0], &info_arr[i][1], &info_arr[i][2]); cur_read_size += line_size; // fprintf(stderr, "%-8d%-8d%-8d\n", info_arr[i][0], info_arr[i][1], info_arr[i][2]); // fprintf(stderr, "%s\n", read_buf); } // 性能测试 // 普通 sw int cur_query_pos = 0; int cur_target_pos = 0; for (i = 0; i < block_line_num; ++i) { #ifdef SHOW_PERF int64_t start_time = get_mseconds(); #endif score_normal = ksw_normal( info_arr[i][0], (uint8_t *)query_arr + cur_query_pos, info_arr[i][1], (uint8_t *)target_arr + cur_target_pos, 5, mat, 6, 1, 6, 1, 100, 5, 100, info_arr[i][2], &qle, &tle, >le, &gscore, &max_off[0]); #ifdef SHOW_PERF time_sw_normal += get_mseconds() - start_time; #endif score_normal_total += score_normal; // fprintf(normal_out_f, "%d %d\n", info_arr[i][2], score_normal); fprintf(normal_out_f, "%d %d %d %d %d %d %d\n", info_arr[i][2], score_normal, qle, tle, gtle, gscore, max_off[0]); #ifdef SHOW_PERF start_time = get_mseconds(); #endif score_avx2 = ksw_avx2( info_arr[i][0], (uint8_t *)query_arr + cur_query_pos, info_arr[i][1], (uint8_t *)target_arr + cur_target_pos, 0, 5, mat, 6, 1, 6, 1, 1, 4, 100, 5, 100, info_arr[i][2], &qle, &tle, >le, &gscore, &max_off[0]); #ifdef SHOW_PERF time_sw_avx2 += get_mseconds() - start_time; #endif score_avx2_total += score_avx2; // fprintf(avx2_out_f, "%d %d\n", info_arr[i][2], score_avx2); fprintf(avx2_out_f, "%d %d %d %d %d %d %d\n", info_arr[i][2], score_avx2, qle, tle, gtle, gscore, max_off[0]); #ifdef SHOW_PERF start_time = get_mseconds(); #endif score_avx2_u8 = ksw_avx2_u8( info_arr[i][0], (uint8_t *)query_arr + cur_query_pos, info_arr[i][1], (uint8_t *)target_arr + cur_target_pos, 0, 5, mat, 6, 1, 6, 1, 1, 4, 100, 5, 100, info_arr[i][2], &qle, &tle, >le, &gscore, &max_off[0]); #ifdef SHOW_PERF time_sw_avx2_u8 += get_mseconds() - start_time; #endif score_avx2_u8_total += score_avx2_u8; fprintf(avx2_u8_out_f, "%d %d %d %d %d %d\n", score_avx2_u8, qle, tle, gtle, gscore, max_off[0]); // 更新query和target位置信息 cur_query_pos += info_arr[i][0]; cur_target_pos += info_arr[i][1]; // fprintf(stderr, "%d %d %d %d %d %d %d\n", score_normal, qle, tle, gtle, gscore, max_off[0], max_off[1]); } /* // avx2 sw cur_query_pos = 0; cur_target_pos = 0; for (i = 0; i < block_line_num; ++i) { #ifdef SHOW_PERF int64_t start_time = get_mseconds(); #endif score_avx2 += ksw_avx2( info_arr[i][0], (uint8_t *)query_arr + cur_query_pos, info_arr[i][1], (uint8_t *)target_arr + cur_target_pos, 0, 5, mat, 6, 1, 6, 1, 1, 4, 100, 5, 100, info_arr[i][2], &qle, &tle, >le, &gscore, &max_off[0]); #ifdef SHOW_PERF time_sw_avx2 += get_mseconds() - start_time; #endif // 更新query和target位置信息 cur_query_pos += info_arr[i][0]; cur_target_pos += info_arr[i][1]; // fprintf(stderr, "%d %d %d %d %d %d %d\n", score_avx2, qle, tle, gtle, gscore, max_off[0], max_off[1]); } // avx2 u8 sw cur_query_pos = 0; cur_target_pos = 0; for (i = 0; i < block_line_num; ++i) { #ifdef SHOW_PERF int64_t start_time = get_mseconds(); #endif score_avx2_u8 += ksw_avx2_u8( info_arr[i][0], (uint8_t *)query_arr + cur_query_pos, info_arr[i][1], (uint8_t *)target_arr + cur_target_pos, 0, 5, mat, 6, 1, 6, 1, 1, 4, 100, 5, 100, info_arr[i][2], &qle, &tle, >le, &gscore, &max_off[0]); #ifdef SHOW_PERF time_sw_avx2_u8 += get_mseconds() - start_time; #endif // 更新query和target位置信息 cur_query_pos += info_arr[i][0]; cur_target_pos += info_arr[i][1]; // fprintf(stderr, "%d %d %d %d %d %d %d\n", score_normal, qle, tle, gtle, gscore, max_off[0], max_off[1]); } */ // fprintf(stderr, "%d %d \n", block_line_num, total_line_num); } // fprintf(stderr, "%d \n", score_normal); #ifdef SHOW_PERF fprintf(stderr, "time_sw_normal: %f s; score: %d\n", time_sw_normal / 1000.0, score_normal_total); fprintf(stderr, "time_sw_avx2: %f s; score: %d\n", time_sw_avx2 / 1000.0, score_avx2_total); fprintf(stderr, "time_sw_avx2_u8: %f s; score: %d\n", time_sw_avx2_u8 / 1000.0, score_avx2_u8_total); #endif if (query_f != 0) fclose(query_f); if (target_f != 0) fclose(target_f); if (info_f != 0) fclose(info_f); if (avx2_out_f != 0) fclose(avx2_out_f); if (avx2_u8_out_f != 0) fclose(avx2_u8_out_f); if (normal_out_f != 0) fclose(normal_out_f); }