Compare commits

...

2 Commits

Author SHA1 Message Date
zzh ede75a66fa 测试不同数据 2023-08-11 00:36:14 +08:00
zzh 5a41d5e207 测试不同内存开辟策略的影响 2023-08-11 00:35:43 +08:00
5 changed files with 267 additions and 5 deletions

1
.gitignore vendored
View File

@ -1,5 +1,6 @@
*.[oa]
sw_perf
sw_perf_discrete
test
test64
.*.swp

11
.vscode/launch.json vendored
View File

@ -14,6 +14,17 @@
"all"
],
"cwd": "${workspaceFolder}", //
},
{
"name": "sw-perf-discrete",
"preLaunchTask": "Build",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceRoot}/sw_perf_discrete",
"args": [
"all"
],
"cwd": "${workspaceFolder}", //
}
]
}

View File

@ -4,6 +4,7 @@ CFLAGS= -Wall -Wno-unused-function -O2 -mavx2
DFLAGS= -DSHOW_PERF
OBJS= ksw_normal.o ksw_avx2.o ksw_cuda.o ksw_avx2_u8.o
PROG= sw_perf
PROG2= sw_perf_discrete
INCLUDES=
LIBS=
SUBDIRS= .
@ -17,13 +18,16 @@ endif
.c.o:
$(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $(CPPFLAGS) $< -o $@
all:$(PROG)
all:$(PROG) $(PROG2)
sw_perf:$(OBJS) main.o
$(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) main.o -o $@ -L. $(LIBS)
$(PROG2):$(OBJS) discrete_mem_main.o
$(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) main.o -o $@ -L. $(LIBS)
clean:
rm -f *.o a.out $(PROG) *~ *.a
rm -f *.o a.out $(PROG) $(PROG2) *~ *.a
depend:
( LC_ALL=C ; export LC_ALL; makedepend -Y -- $(CFLAGS) $(DFLAGS) $(CPPFLAGS) -- *.c )

243
discrete_mem_main.c 100644
View File

@ -0,0 +1,243 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <assert.h>
#include "sys/time.h"
#define SW_NORMAL 0
#define SW_AVX2 1
#define SW_CUDA 2
#define SW_ALL 3
#define BLOCK_BUF_SIZE 1048576
#define READ_BUF_SIZE 2048
#define SEQ_BUF_SIZE (BLOCK_BUF_SIZE + READ_BUF_SIZE)
// 将文件读取到离散的内存里看一下对sw性能的影响
#ifdef SHOW_PERF
// 用来调试,计算感兴趣部分的运行时间
// 获取当前毫秒数
int64_t get_mseconds()
{
struct timeval tv;
gettimeofday(&tv, NULL);
return (int64_t)1000 * (tv.tv_sec + ((1e-6) * tv.tv_usec));
}
int64_t time_sw_normal = 0,
time_sw_avx2 = 0,
time_sw_avx2_u8 = 0;
#endif
extern int ksw_normal(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off);
extern int ksw_avx2(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int is_left, int m, const int8_t *mat, int o_del, int e_del,
int o_ins, int e_ins, int a, int b, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off);
extern int ksw_avx2_u8(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int is_left, int m, const int8_t *mat, int o_del, int e_del,
int o_ins, int e_ins, int a, int b, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off);
/*
* sw
* normal/avx2/cuda
*/
// 程序执行入口
int main(int argc, char *argv[])
{
// 初始化一些全局参数
int8_t mat[25] = {1, -4, -4, -4, -1,
-4, 1, -4, -4, -1,
-4, -4, 1, -4, -1,
-4, -4, -4, 1, -1,
-1, -1, -1, -1, -1};
int max_off[2];
int qle, tle, gtle, gscore;
// 读取测试数据
char *query_arr = (char **)malloc(SEQ_BUF_SIZE);
char *target_arr = (char **)malloc(SEQ_BUF_SIZE);
int *info_buf = (int *)malloc(SEQ_BUF_SIZE);
int **info_arr = (int **)malloc(SEQ_BUF_SIZE);
FILE *query_f = 0, *target_f = 0, *info_f = 0;
// const char *qf_path = "/public/home/zzh/data/sw/q_s.fa";
// const char *tf_path = "/public/home/zzh/data/sw/t_s.fa";
// const char *if_path = "/public/home/zzh/data/sw/i_s.txt";
const char *qf_path = "/public/home/zzh/data/sw/q_m.fa";
const char *tf_path = "/public/home/zzh/data/sw/t_m.fa";
const char *if_path = "/public/home/zzh/data/sw/i_m.txt";
// const char *qf_path = "/public/home/zzh/data/sw/q_m.fa";
// const char *tf_path = "/public/home/zzh/data/sw/t_m.fa";
// const char *if_path = "/public/home/zzh/data/sw/i_m.txt";
query_f = fopen(qf_path, "r");
target_f = fopen(tf_path, "r");
info_f = fopen(if_path, "r");
// 每次读取一定量的数据,然后执行,直到处理完所有数据
int total_line_num = 0; // 目前处理的总的数据行数
int block_line_num = 0; // 当前循环包含的数据行数
int i, j;
// const int max_read = READ_BUF_SIZE; // 每次最多读取的字符
char read_buf[READ_BUF_SIZE]; // 读文件缓存
// int ret_code = 0;
// 初始化info_arr数组
i = 0;
j = 0;
while (1)
{
if (j > BLOCK_BUF_SIZE)
break;
info_arr[i] = &info_buf[j];
i += 1;
j += 3;
}
int score_normal = 0, score_avx2 = 0, score_avx2_u8 = 0;
while (!feof(target_f))
{
block_line_num = 0;
// target序列一般占用存储最多先读取target看一个buf能读多少行query和info就按照这个行数来读
int cur_read_size = 0;
while (!feof(target_f) && cur_read_size < BLOCK_BUF_SIZE)
{
if (fgets(read_buf, READ_BUF_SIZE, target_f) == NULL)
break;
const int line_size = strlen(read_buf);
target_arr[block_line_num] = (char *)malloc(line_size);
assert(line_size < READ_BUF_SIZE);
++block_line_num;
++total_line_num;
strncpy(target_arr[block_line_num], read_buf, line_size);
cur_read_size += line_size;
// fprintf(stderr, "%d %d \n", line_size, cur_read_size);
}
// 读query
cur_read_size = 0;
for (i = 0; i < block_line_num; ++i)
{
if (fgets(read_buf, READ_BUF_SIZE, query_f) == NULL)
break;
const int line_size = strlen(read_buf);
assert(line_size < READ_BUF_SIZE);
query_arr[i] = (char *)malloc(line_size);
strncpy(query_arr[i], read_buf, line_size);
cur_read_size += line_size;
}
// 读info
cur_read_size = 0;
for (i = 0; i < block_line_num; ++i)
{
if (fgets(read_buf, READ_BUF_SIZE, info_f) == NULL)
break;
const int line_size = strlen(read_buf);
assert(line_size < READ_BUF_SIZE);
sscanf(read_buf, "%d %d %d\n", &info_arr[i][0], &info_arr[i][1], &info_arr[i][2]);
cur_read_size += line_size;
// fprintf(stderr, "%-8d%-8d%-8d\n", info_arr[i][0], info_arr[i][1], info_arr[i][2]);
// fprintf(stderr, "%s\n", read_buf);
}
// 性能测试
// 普通 sw
int cur_query_pos = 0;
int cur_target_pos = 0;
for (i = 0; i < block_line_num; ++i)
{
#ifdef SHOW_PERF
int64_t start_time = get_mseconds();
#endif
score_normal += ksw_normal(
info_arr[i][0],
(uint8_t *)query_arr[i],
info_arr[i][1],
(uint8_t *)target_arr[i],
5, mat, 6, 1, 6, 1, 100, 5, 100,
info_arr[i][2],
&qle, &tle, &gtle, &gscore, &max_off[0]);
#ifdef SHOW_PERF
time_sw_normal += get_mseconds() - start_time;
#endif
// 更新query和target位置信息
cur_query_pos += info_arr[i][0];
cur_target_pos += info_arr[i][1];
// fprintf(stderr, "%d %d %d %d %d %d %d\n", score_normal, qle, tle, gtle, gscore, max_off[0], max_off[1]);
}
// avx2 sw
cur_query_pos = 0;
cur_target_pos = 0;
for (i = 0; i < block_line_num; ++i)
{
#ifdef SHOW_PERF
int64_t start_time = get_mseconds();
#endif
score_avx2 += ksw_avx2(
info_arr[i][0],
(uint8_t *)query_arr[i],
info_arr[i][1],
(uint8_t *)target_arr[i],
0, 5, mat, 6, 1, 6, 1,
1, 4,
100, 5, 100,
info_arr[i][2],
&qle, &tle, &gtle, &gscore, &max_off[0]);
#ifdef SHOW_PERF
time_sw_avx2 += get_mseconds() - start_time;
#endif
// 更新query和target位置信息
cur_query_pos += info_arr[i][0];
cur_target_pos += info_arr[i][1];
// fprintf(stderr, "%d %d %d %d %d %d %d\n", score_avx2, qle, tle, gtle, gscore, max_off[0], max_off[1]);
}
// avx2 u8 sw
cur_query_pos = 0;
cur_target_pos = 0;
for (i = 0; i < block_line_num; ++i)
{
#ifdef SHOW_PERF
int64_t start_time = get_mseconds();
#endif
score_avx2_u8 += ksw_avx2_u8(
info_arr[i][0],
(uint8_t *)query_arr[i],
info_arr[i][1],
(uint8_t *)target_arr[i],
0, 5, mat, 6, 1, 6, 1,
1, 4,
100, 5, 100,
info_arr[i][2],
&qle, &tle, &gtle, &gscore, &max_off[0]);
#ifdef SHOW_PERF
time_sw_avx2_u8 += get_mseconds() - start_time;
#endif
// 更新query和target位置信息
cur_query_pos += info_arr[i][0];
cur_target_pos += info_arr[i][1];
// fprintf(stderr, "%d %d %d %d %d %d %d\n", score_normal, qle, tle, gtle, gscore, max_off[0], max_off[1]);
}
// fprintf(stderr, "%d %d \n", block_line_num, total_line_num);
}
// fprintf(stderr, "%d \n", score_normal);
#ifdef SHOW_PERF
fprintf(stderr, "time_sw_normal: %f s; score: %d\n", time_sw_normal / 1000.0, score_normal);
fprintf(stderr, "time_sw_avx2: %f s; score: %d\n", time_sw_avx2 / 1000.0, score_avx2);
fprintf(stderr, "time_sw_avx2_u8: %f s; score: %d\n", time_sw_avx2_u8 / 1000.0, score_avx2_u8);
#endif
if (query_f != 0)
fclose(query_f);
if (target_f != 0)
fclose(target_f);
if (info_f != 0)
fclose(info_f);
}

9
main.c
View File

@ -85,12 +85,15 @@ int main(int argc, char *argv[])
// const char *qf_path = "/public/home/zzh/data/sw/q_s.fa";
// const char *tf_path = "/public/home/zzh/data/sw/t_s.fa";
// const char *if_path = "/public/home/zzh/data/sw/i_s.txt";
const char *qf_path = "/public/home/zzh/data/sw/q_m.fa";
const char *tf_path = "/public/home/zzh/data/sw/t_m.fa";
const char *if_path = "/public/home/zzh/data/sw/i_m.txt";
// const char *qf_path = "/public/home/zzh/data/sw/q_m.fa";
// const char *tf_path = "/public/home/zzh/data/sw/t_m.fa";
// const char *if_path = "/public/home/zzh/data/sw/i_m.txt";
// const char *qf_path = "/public/home/zzh/data/sw/q_l.fa";
// const char *tf_path = "/public/home/zzh/data/sw/t_l.fa";
// const char *if_path = "/public/home/zzh/data/sw/i_l.txt";
const char *qf_path = "/public/home/zzh/data/sw/query.fa";
const char *tf_path = "/public/home/zzh/data/sw/target.fa";
const char *if_path = "/public/home/zzh/data/sw/info.txt";
query_f = fopen(qf_path, "r");
target_f = fopen(tf_path, "r");
info_f = fopen(if_path, "r");