sw_perf/main.c

297 lines
9.2 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

/*********************************************************************************************
Description: The entry for sw performance tests
Copyright : All right reserved by NCIC.ICT
Author : Zhang Zhonghai
Date : 2023/08/20
***********************************************************************************************/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <assert.h>
#include <time.h>
#include "sys/time.h"
#include "thread_mem.h"
#include "bsw.h"
#include "utils.h"
#include "common.h"
#define BLOCK_BUF_SIZE 1048576
#define READ_BUF_SIZE 2048
#define SEQ_BUF_SIZE (BLOCK_BUF_SIZE + READ_BUF_SIZE)
#define INIT_ALLOC_SIZE 4096
#define DIVIDE_BY (CLOCKS_PER_SEC * 1.0)
#ifdef SHOW_PERF
// 用来调试,计算感兴趣部分的运行时间
// 获取当前毫秒数
int64_t get_mseconds()
{
// struct timeval tv;
// gettimeofday(&tv, NULL);
// return (int64_t)1000 * (tv.tv_sec + ((1e-6) * tv.tv_usec));
return clock();
}
int64_t time_sw[KERNEL_NUM] = {0};
#endif
#define _PERFORMANCE_TEST(kernel_num, func) \
cur_query_pos = 0; \
cur_target_pos = 0; \
for (i = 0; i < block_line_num; ++i) \
{ \
score[kernel_num] = func( \
&tmem[kernel_num], \
info_arr[i][0], \
(uint8_t *)query_arr + cur_query_pos, \
info_arr[i][1], \
(uint8_t *)target_arr + cur_target_pos, \
5, mat, 6, 1, 6, 1, 100, 5, 100, \
info_arr[i][2], \
&qle, &tle, &gtle, &gscore, &max_off[0]); \
score_total[kernel_num] += score[kernel_num]; \
cur_query_pos += info_arr[i][0]; \
cur_target_pos += info_arr[i][1]; \
}
#ifdef SHOW_PERF
#define PERFORMANCE_TEST(kernel_num, func) \
start_time = get_mseconds(); \
_PERFORMANCE_TEST(kernel_num, func); \
time_sw[kernel_num] += get_mseconds() - start_time
#else
#define PERFORMANCE_TEST(kernel_num, func) _PERFORMANCE_TEST(kernel_num, func)
#endif
// 读取一行序列数据
int read_seq_line(char *read_buf, FILE *f_ptr, char *out_arr)
{
if (fgets(read_buf, READ_BUF_SIZE, f_ptr) == NULL)
return -1;
int line_size = strlen(read_buf);
assert(line_size < READ_BUF_SIZE);
if (read_buf[line_size - 1] == '\n')
{
read_buf[line_size - 1] = '\0';
line_size--;
}
convert_char_to_2bit(read_buf);
memcpy(out_arr, read_buf, line_size);
return line_size;
}
// 全局变量
// 将每次比对的得分等信息写入文件进行debug
FILE *ins_ext_f_arr[KERNEL_NUM] = {0},
*del_ext_f_arr[KERNEL_NUM] = {0},
*score_f_arr[KERNEL_NUM] = {0},
*retval_f_arr[KERNEL_NUM] = {0};
// 程序执行入口
int main(int argc, char *argv[])
{
const char *qf_path = argv[1];
const char *tf_path = argv[2];
const char *if_path = argv[3];
// 初始化一些全局参数
int8_t mat[25] = {1, -4, -4, -4, -1,
-4, 1, -4, -4, -1,
-4, -4, 1, -4, -1,
-4, -4, -4, 1, -1,
-1, -1, -1, -1, -1};
int max_off[2];
int qle, tle, gtle, gscore;
thread_mem_t tmem[KERNEL_NUM];
int i, j;
for (i = 0; i < KERNEL_NUM; ++i)
{
thread_mem_init_alloc(tmem + i, INIT_ALLOC_SIZE);
}
// 记录计算出的分数
int score[KERNEL_NUM] = {0};
int64_t score_total[KERNEL_NUM] = {0};
// 读取测试数据
char *query_arr = (char *)malloc(SEQ_BUF_SIZE);
char *target_arr = (char *)malloc(SEQ_BUF_SIZE);
int *info_buf = (int *)malloc(SEQ_BUF_SIZE * sizeof(int));
int **info_arr = (int **)malloc(SEQ_BUF_SIZE * sizeof(int *));
FILE *query_f = 0, *target_f = 0, *info_f = 0;
// 每次读取一定量的数据,然后执行,直到处理完所有数据
int64_t total_line_num = 0; // 目前处理的总的数据行数
int block_line_num = 0; // 当前循环包含的数据行数
int cur_query_pos, cur_target_pos;
int64_t start_time;
char read_buf[READ_BUF_SIZE]; // 读文件缓存
#ifdef DEBUG_OUT
for (i = 0; i < KERNEL_NUM; ++i)
{
char out_path[64];
sprintf(out_path, "/home/zzh/work/sw_perf/output/ins_%d.txt", i);
ins_ext_f_arr[i] = fopen(out_path, "w");
sprintf(out_path, "/home/zzh/work/sw_perf/output/del_%d.txt", i);
del_ext_f_arr[i] = fopen(out_path, "w");
sprintf(out_path, "/home/zzh/work/sw_perf/output/score_%d.txt", i);
score_f_arr[i] = fopen(out_path, "w");
}
#endif
#ifdef DEBUG_RETURN_VALUE
for (i = 0; i < KERNEL_NUM; ++i)
{
char out_path[64];
sprintf(out_path, "/home/zzh/work/sw_perf/output/retval_%d.txt", i);
retval_f_arr[i] = fopen(out_path, "w");
}
#endif
query_f = fopen(qf_path, "r");
target_f = fopen(tf_path, "r");
info_f = fopen(if_path, "r");
// 初始化info_arr数组
i = 0;
j = 0;
while (1)
{
if (j > BLOCK_BUF_SIZE)
break;
info_arr[i] = &info_buf[j];
i += 1;
j += 3;
}
int64_t all_qlen = 0;
while (!feof(target_f))
{
block_line_num = 0; // 记录每次读取的行数
// target序列一般占用存储最多先读取target看一个buf能读多少行query和info就按照这个行数来读
int cur_read_size = 0;
while (!feof(target_f) && cur_read_size < BLOCK_BUF_SIZE)
{
int line_size = read_seq_line(read_buf, target_f, target_arr + cur_read_size);
// for (j = 0; j < line_size; ++j)
//{
// // fprintf(stderr, "%c", t_2bit2char[(uint8_t)read_buf[j]]);
// fprintf(stderr, "%c", t_2bit2char[(uint8_t)target_arr[j + cur_read_size]]);
// }
// fprintf(stderr, "\n");
// fprintf(retval_f_arr[1], "%d\n", line_size);
if (line_size == -1)
break;
cur_read_size += line_size;
++block_line_num;
++total_line_num;
}
// 读query
cur_read_size = 0;
for (i = 0; i < block_line_num; ++i)
{
int line_size = read_seq_line(read_buf, query_f, query_arr + cur_read_size);
// int j;
// for (j = cur_read_size; j < cur_read_size + line_size; ++j)
//{
// fprintf(retval_f_arr[0], "%c", t_2bit2char[(uint8_t)query_arr[j]]);
// }
// fprintf(retval_f_arr[0], "\n");
if (line_size == -1)
break;
cur_read_size += line_size;
}
// 读info
cur_read_size = 0;
for (i = 0; i < block_line_num; ++i)
{
if (fgets(read_buf, READ_BUF_SIZE, info_f) == NULL)
break;
const int line_size = strlen(read_buf);
assert(line_size < READ_BUF_SIZE);
sscanf(read_buf, "%d %d %d\n", &info_arr[i][0], &info_arr[i][1], &info_arr[i][2]);
cur_read_size += line_size;
// fprintf(stderr, "%-8d%-8d%-8d\n", info_arr[i][0], info_arr[i][1], info_arr[i][2]);
// fprintf(stderr, "%s\n", read_buf);
all_qlen += info_arr[i][0];
}
#ifdef DEBUG_RETURN_VALUE
cur_read_size = 0;
for (i = 0; i < block_line_num; ++i)
{
fprintf(stdout, "%d\n", i);
for (j = cur_read_size; j < cur_read_size + info_arr[i][1]; ++j)
{
fprintf(stdout, "j: %d\n", j);
fprintf(retval_f_arr[0], "%c", t_2bit2char[(uint8_t)target_arr[j]]);
}
fprintf(retval_f_arr[0], "\n");
cur_read_size += info_arr[i][1];
}
#endif
// for (i = 0; i < block_line_num; ++i)
//{
// fprintf(retval_f_arr[0], "%d\n", info_arr[i][1]);
//}
// 性能测试
#if 1
// normal sw
PERFORMANCE_TEST(0, normal);
// normal pruning
PERFORMANCE_TEST(1, normal_pruning);
// avx2
PERFORMANCE_TEST(2, avx2_u8);
// avx2 pruning
PERFORMANCE_TEST(3, avx2_u8_pruning);
#endif
}
fprintf(stderr, "%ld\n", total_line_num);
fprintf(stderr, "all_qlen: %ld\n", all_qlen);
#ifdef SHOW_PERF
char *kernel_names[4] = {
"normal",
"normal_pruning",
"avx2_u8",
"avx2_u8_pruning"};
for (i = 0; i < KERNEL_NUM; ++i)
{
fprintf(stderr, "[%18s] time: %9.6f s; score: %ld\n", kernel_names[i], time_sw[i] / DIVIDE_BY, score_total[i]);
}
#endif
if (query_f != 0)
fclose(query_f);
if (target_f != 0)
fclose(target_f);
if (info_f != 0)
fclose(info_f);
for (i = 0; i < KERNEL_NUM; ++i)
{
if (ins_ext_f_arr[i] != 0)
fclose(ins_ext_f_arr[i]);
if (del_ext_f_arr[i] != 0)
fclose(del_ext_f_arr[i]);
if (score_f_arr[i] != 0)
fclose(score_f_arr[i]);
if (retval_f_arr[i] != 0)
fclose(retval_f_arr[i]);
}
}