sw_perf/main.c

294 lines
9.6 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <assert.h>
#include <time.h>
#include "sys/time.h"
#include "thread_mem.h"
#include "ksw_ext.h"
#include "utils.h"
#include "common.h"
#define BLOCK_BUF_SIZE 1048576
#define READ_BUF_SIZE 2048
#define SEQ_BUF_SIZE (BLOCK_BUF_SIZE + READ_BUF_SIZE)
#define INIT_ALLOC_SIZE 4096
#define DIVIDE_BY (CLOCKS_PER_SEC * 1.0)
#ifdef SHOW_PERF
// 用来调试,计算感兴趣部分的运行时间
// 获取当前毫秒数
int64_t get_mseconds()
{
// struct timeval tv;
// gettimeofday(&tv, NULL);
// return (int64_t)1000 * (tv.tv_sec + ((1e-6) * tv.tv_usec));
return clock();
}
int64_t time_sw[KERNEL_NUM] = {0};
#endif
#ifdef DEBUG_RETURN_VALUE
#define OUTPUT_RETVAL(kernel_num) \
fprintf(retval_f_arr[kernel_num], "%d\t%d\t%d\t%d\t%d\t%d\n", score[kernel_num], qle, tle, gtle, gscore, max_off[0])
#else
#define OUTPUT_RETVAL(out_f)
#endif
#define _PERFORMANCE_TEST_NORMAL(kernel_num, func) \
cur_query_pos = 0; \
cur_target_pos = 0; \
for (i = 0; i < block_line_num; ++i) \
{ \
score[kernel_num] = func( \
&tmem[kernel_num], \
info_arr[i][0], \
(uint8_t *)query_arr + cur_query_pos, \
info_arr[i][1], \
(uint8_t *)target_arr + cur_target_pos, \
5, mat, 6, 1, 6, 1, 100, 5, 100, \
info_arr[i][2], \
&qle, &tle, &gtle, &gscore, &max_off[0]); \
score_total[kernel_num] += score[kernel_num]; \
cur_query_pos += info_arr[i][0]; \
cur_target_pos += info_arr[i][1]; \
OUTPUT_RETVAL(0); \
}
#define _PERFORMANCE_TEST_AVX2(kernel_num, func) \
cur_query_pos = 0; \
cur_target_pos = 0; \
for (i = 0; i < block_line_num; ++i) \
{ \
score[kernel_num] = func( \
&tmem[kernel_num], \
info_arr[i][0], \
(uint8_t *)query_arr + cur_query_pos, \
info_arr[i][1], \
(uint8_t *)target_arr + cur_target_pos, \
0, 6, 1, 6, 1, \
1, 4, \
100, 5, \
info_arr[i][2], \
&qle, &tle, &gtle, &gscore, &max_off[0]); \
score_total[kernel_num] += score[kernel_num]; \
cur_query_pos += info_arr[i][0]; \
cur_target_pos += info_arr[i][1]; \
OUTPUT_RETVAL(kernel_num); \
}
#ifdef SHOW_PERF
#define PERFORMANCE_TEST_NORMAL(kernel_num, func) \
start_time = get_mseconds(); \
_PERFORMANCE_TEST_NORMAL(kernel_num, func); \
time_sw[kernel_num] += get_mseconds() - start_time
#define PERFORMANCE_TEST_AVX2(kernel_num, func) \
start_time = get_mseconds(); \
_PERFORMANCE_TEST_AVX2(kernel_num, func); \
time_sw[kernel_num] += get_mseconds() - start_time
#else
#define PERFORMANCE_TEST_NORMAL(kernel_num, func) _PERFORMANCE_TEST_NORMAL(kernel_num, func)
#define PERFORMANCE_TEST_AVX2(kernel_num, func) _PERFORMANCE_TEST_AVX2(kernel_num, func)
#endif
// 读取一行序列数据
int read_seq_line(char *read_buf, FILE *f_ptr, char *out_arr)
{
if (fgets(read_buf, READ_BUF_SIZE, f_ptr) == NULL)
return 0;
int line_size = strlen(read_buf);
assert(line_size < READ_BUF_SIZE);
if (read_buf[line_size - 1] == '\n')
{
read_buf[line_size - 1] = '\0';
line_size--;
}
convert_char_to_2bit(read_buf);
memcpy(out_arr, read_buf, line_size);
return line_size;
}
// 全局变量
// 将每次比对的得分等信息写入文件进行debug
FILE *ins_ext_f_arr[KERNEL_NUM] = {0},
*del_ext_f_arr[KERNEL_NUM] = {0},
*score_f_arr[KERNEL_NUM] = {0},
*retval_f_arr[KERNEL_NUM] = {0};
// 程序执行入口
int main(int argc, char *argv[])
{
const char *qf_path = argv[1];
const char *tf_path = argv[2];
const char *if_path = argv[3];
// 初始化一些全局参数
int8_t mat[25] = {1, -4, -4, -4, -1,
-4, 1, -4, -4, -1,
-4, -4, 1, -4, -1,
-4, -4, -4, 1, -1,
-1, -1, -1, -1, -1};
int max_off[2];
int qle, tle, gtle, gscore;
thread_mem_t tmem[KERNEL_NUM];
int i, j;
for (i = 0; i < KERNEL_NUM; ++i)
{
thread_mem_init_alloc(tmem + i, INIT_ALLOC_SIZE);
}
// 记录计算出的分数
int score[KERNEL_NUM] = {0};
int score_total[KERNEL_NUM] = {0};
// 读取测试数据
char *query_arr = (char *)malloc(SEQ_BUF_SIZE);
char *target_arr = (char *)malloc(SEQ_BUF_SIZE);
int *info_buf = (int *)malloc(SEQ_BUF_SIZE * sizeof(int));
int **info_arr = (int **)malloc(SEQ_BUF_SIZE * sizeof(int *));
FILE *query_f = 0, *target_f = 0, *info_f = 0;
// 每次读取一定量的数据,然后执行,直到处理完所有数据
int total_line_num = 0; // 目前处理的总的数据行数
int block_line_num = 0; // 当前循环包含的数据行数
int cur_query_pos, cur_target_pos;
int64_t start_time;
char read_buf[READ_BUF_SIZE]; // 读文件缓存
#ifdef DEBUG_OUT
for (i = 0; i < KERNEL_NUM; ++i)
{
char out_path[64];
sprintf(out_path, "/home/zzh/work/sw_perf/output/ins_%d.txt", i);
ins_ext_f_arr[i] = fopen(out_path, "w");
sprintf(out_path, "/home/zzh/work/sw_perf/output/del_%d.txt", i);
del_ext_f_arr[i] = fopen(out_path, "w");
sprintf(out_path, "/home/zzh/work/sw_perf/output/score_%d.txt", i);
score_f_arr[i] = fopen(out_path, "w");
}
#endif
#ifdef DEBUG_RETURN_VALUE
for (i = 0; i < KERNEL_NUM; ++i)
{
char out_path[64];
sprintf(out_path, "/home/zzh/work/sw_perf/output/retval_%d.txt", i);
retval_f_arr[i] = fopen(out_path, "w");
}
#endif
query_f = fopen(qf_path, "r");
target_f = fopen(tf_path, "r");
info_f = fopen(if_path, "r");
// 初始化info_arr数组
i = 0;
j = 0;
while (1)
{
if (j > BLOCK_BUF_SIZE)
break;
info_arr[i] = &info_buf[j];
i += 1;
j += 3;
}
while (!feof(target_f))
{
block_line_num = 0; // 记录每次读取的行数
// target序列一般占用存储最多先读取target看一个buf能读多少行query和info就按照这个行数来读
int cur_read_size = 0;
while (!feof(target_f) && cur_read_size < BLOCK_BUF_SIZE)
{
int line_size = read_seq_line(read_buf, target_f, target_arr + cur_read_size);
// fprintf(stderr, "line: %d\n", line_size);
if (line_size == 0)
break;
cur_read_size += line_size;
++block_line_num;
++total_line_num;
}
// 读query
cur_read_size = 0;
for (i = 0; i < block_line_num; ++i)
{
int line_size = read_seq_line(read_buf, query_f, query_arr + cur_read_size);
if (line_size == 0)
break;
cur_read_size += line_size;
}
// 读info
cur_read_size = 0;
for (i = 0; i < block_line_num; ++i)
{
if (fgets(read_buf, READ_BUF_SIZE, info_f) == NULL)
break;
const int line_size = strlen(read_buf);
assert(line_size < READ_BUF_SIZE);
sscanf(read_buf, "%d %d %d\n", &info_arr[i][0], &info_arr[i][1], &info_arr[i][2]);
cur_read_size += line_size;
// fprintf(stderr, "%-8d%-8d%-8d\n", info_arr[i][0], info_arr[i][1], info_arr[i][2]);
// fprintf(stderr, "%s\n", read_buf);
}
// 性能测试
// normal sw
PERFORMANCE_TEST_NORMAL(0, ksw_extend_normal);
// avx2
// PERFORMANCE_TEST_AVX2(1, ksw_extend_avx2);
// avx2 heuristics
// PERFORMANCE_TEST_AVX2(2, ksw_extend_avx2_heuristics);
// avx2 mem aligned
// PERFORMANCE_TEST_AVX2(3, ksw_extend_avx2_aligned);
// avx2 u8
// PERFORMANCE_TEST_AVX2(4, ksw_extend_avx2_u8);
// avx2 u8 heuristics
// PERFORMANCE_TEST_AVX2(5, ksw_extend_avx2_u8_heuristics);
// avx2 u8 mem aligned
// PERFORMANCE_TEST_AVX2(6, ksw_extend_avx2_u8_aligned);
}
#ifdef SHOW_PERF
char *kernel_names[7] = {
"normal",
"avx2",
"avx2_heuristics",
"avx2_aligned",
"avx2_u8",
"avx2_u8_heuristics",
"avx2_u8_aligned"};
for (i = 0; i < KERNEL_NUM; ++i)
{
fprintf(stderr, "[%18s] time: %9.6f s; score: %d\n", kernel_names[i], time_sw[i] / DIVIDE_BY, score_total[i]);
}
#endif
if (query_f != 0)
fclose(query_f);
if (target_f != 0)
fclose(target_f);
if (info_f != 0)
fclose(info_f);
for (i = 0; i < KERNEL_NUM; ++i)
{
if (ins_ext_f_arr[i] != 0)
fclose(ins_ext_f_arr[i]);
if (del_ext_f_arr[i] != 0)
fclose(del_ext_f_arr[i]);
if (score_f_arr[i] != 0)
fclose(score_f_arr[i]);
if (retval_f_arr[i] != 0)
fclose(retval_f_arr[i]);
}
}