399 lines
15 KiB
C
399 lines
15 KiB
C
#include <stdlib.h>
|
||
#include <stdio.h>
|
||
#include <string.h>
|
||
#include <stdint.h>
|
||
#include <assert.h>
|
||
#include "sys/time.h"
|
||
|
||
#define SW_NORMAL 0
|
||
#define SW_AVX2 1
|
||
#define SW_CUDA 2
|
||
#define SW_ALL 3
|
||
|
||
#define BLOCK_BUF_SIZE 1048576
|
||
#define READ_BUF_SIZE 2048
|
||
#define SEQ_BUF_SIZE (BLOCK_BUF_SIZE + READ_BUF_SIZE)
|
||
|
||
#ifdef SHOW_PERF
|
||
// 用来调试,计算感兴趣部分的运行时间
|
||
// 获取当前毫秒数
|
||
int64_t get_mseconds()
|
||
{
|
||
struct timeval tv;
|
||
gettimeofday(&tv, NULL);
|
||
return (int64_t)1000 * (tv.tv_sec + ((1e-6) * tv.tv_usec));
|
||
}
|
||
|
||
int64_t time_sw_normal = 0,
|
||
time_sw_avx2 = 0,
|
||
time_sw_avx2_u8 = 0,
|
||
time_bsw_avx2 = 0,
|
||
time_bsw_init = 0,
|
||
time_bsw_main_loop = 0,
|
||
time_bsw_find_max = 0,
|
||
time_bsw_adjust_bound = 0,
|
||
time_compare = 0;
|
||
|
||
#endif
|
||
|
||
extern int ksw_normal(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off);
|
||
extern int ksw_avx2(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int is_left, int m, const int8_t *mat, int o_del, int e_del,
|
||
int o_ins, int e_ins, int a, int b, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off);
|
||
extern int ksw_avx2_u8(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int is_left, int m, const int8_t *mat, int o_del, int e_del,
|
||
int o_ins, int e_ins, int a, int b, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off);
|
||
|
||
unsigned char nst_nt4_table[256] = {
|
||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5 /*'-'*/, 4, 4,
|
||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||
4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
|
||
4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||
4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
|
||
4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4};
|
||
char t_2bit2char[5] = {'A', 'C', 'G', 'T'};
|
||
// 将碱基字符转成2位编码
|
||
void convert_char_to_2bit(char *str)
|
||
{
|
||
int i;
|
||
for (i = 0; i < strlen(str); ++i)
|
||
str[i] = nst_nt4_table[str[i]];
|
||
}
|
||
|
||
/*
|
||
* 包含一个参数,用来区分调用那个sw算法
|
||
* 参数为 normal/avx2/cuda
|
||
*/
|
||
// 程序执行入口
|
||
int main(int argc, char *argv[])
|
||
{
|
||
/*
|
||
int sw_algo = SW_NORMAL;
|
||
|
||
// 判断执行的sw的实现类型
|
||
if (argc > 1)
|
||
{
|
||
if (strcmp(argv[1], "normal") == 0)
|
||
{
|
||
sw_algo = SW_NORMAL;
|
||
}
|
||
else if (strcmp(argv[1], "avx2") == 0)
|
||
{
|
||
sw_algo = SW_AVX2;
|
||
}
|
||
else if (strcmp(argv[1], "cuda") == 0)
|
||
{
|
||
sw_algo = SW_CUDA;
|
||
}
|
||
else
|
||
{
|
||
sw_algo = SW_ALL;
|
||
}
|
||
} */
|
||
|
||
// 初始化一些全局参数
|
||
int8_t mat[25] = {1, -4, -4, -4, -1,
|
||
-4, 1, -4, -4, -1,
|
||
-4, -4, 1, -4, -1,
|
||
-4, -4, -4, 1, -1,
|
||
-1, -1, -1, -1, -1};
|
||
int max_off[2];
|
||
int qle, tle, gtle, gscore;
|
||
|
||
// 读取测试数据
|
||
char *query_arr = (char *)malloc(SEQ_BUF_SIZE);
|
||
char *target_arr = (char *)malloc(SEQ_BUF_SIZE);
|
||
int *info_buf = (int *)malloc(SEQ_BUF_SIZE);
|
||
int **info_arr = (int **)malloc(SEQ_BUF_SIZE);
|
||
FILE *query_f = 0, *target_f = 0, *info_f = 0;
|
||
// const char *qf_path = "q.fa";
|
||
// const char *tf_path = "t.fa";
|
||
// const char *if_path = "i.txt";
|
||
// const char *qf_path = "bug_q.fa";
|
||
// const char *tf_path = "bug_t.fa";
|
||
// const char *if_path = "bug_i.txt";
|
||
// const char *qf_path = "/public/home/zzh/data/sw/q_s.fa";
|
||
// const char *tf_path = "/public/home/zzh/data/sw/t_s.fa";
|
||
// const char *if_path = "/public/home/zzh/data/sw/i_s.txt";
|
||
// const char *qf_path = "/public/home/zzh/data/sw/q_m.fa";
|
||
// const char *tf_path = "/public/home/zzh/data/sw/t_m.fa";
|
||
// const char *if_path = "/public/home/zzh/data/sw/i_m.txt";
|
||
const char *qf_path = "/public/home/zzh/data/sw/q_l.fa";
|
||
const char *tf_path = "/public/home/zzh/data/sw/t_l.fa";
|
||
const char *if_path = "/public/home/zzh/data/sw/i_l.txt";
|
||
// const char *qf_path = "/public/home/zzh/data/sw/query.fa";
|
||
// const char *tf_path = "/public/home/zzh/data/sw/target.fa";
|
||
// const char *if_path = "/public/home/zzh/data/sw/info.txt";
|
||
query_f = fopen(qf_path, "r");
|
||
target_f = fopen(tf_path, "r");
|
||
info_f = fopen(if_path, "r");
|
||
|
||
// 将每次比对的得分等信息写入文件,进行debug
|
||
FILE *normal_out_f = fopen("normal_out.txt", "w");
|
||
FILE *avx2_out_f = fopen("avx2_out.txt", "w");
|
||
FILE *avx2_u8_out_f = fopen("avx2_u8_out.txt", "w");
|
||
FILE *bsw_avx2_out_f = fopen("bsw_avx2_out.txt", "w");
|
||
|
||
// 每次读取一定量的数据,然后执行,直到处理完所有数据
|
||
int total_line_num = 0; // 目前处理的总的数据行数
|
||
int block_line_num = 0; // 当前循环包含的数据行数
|
||
int i, j;
|
||
// const int max_read = READ_BUF_SIZE; // 每次最多读取的字符
|
||
char read_buf[READ_BUF_SIZE]; // 读文件缓存
|
||
// int ret_code = 0;
|
||
|
||
// 初始化info_arr数组
|
||
i = 0;
|
||
j = 0;
|
||
while (1)
|
||
{
|
||
if (j > BLOCK_BUF_SIZE)
|
||
break;
|
||
info_arr[i] = &info_buf[j];
|
||
i += 1;
|
||
j += 3;
|
||
}
|
||
|
||
int score_normal = 0, score_avx2 = 0, score_avx2_u8 = 0, score_bsw_avx2 = 0;
|
||
int score_normal_total = 0, score_avx2_total = 0, score_avx2_u8_total = 0, score_bsw_avx2_total = 0;
|
||
|
||
while (!feof(target_f))
|
||
{
|
||
block_line_num = 0;
|
||
// target序列一般占用存储最多,先读取target,看一个buf能读多少行,query和info就按照这个行数来读
|
||
int cur_read_size = 0;
|
||
while (!feof(target_f) && cur_read_size < BLOCK_BUF_SIZE)
|
||
{
|
||
if (fgets(read_buf, READ_BUF_SIZE, target_f) == NULL)
|
||
break;
|
||
int line_size = strlen(read_buf);
|
||
assert(line_size < READ_BUF_SIZE);
|
||
if (read_buf[line_size - 1] == '\n')
|
||
{
|
||
read_buf[line_size - 1] = '\0';
|
||
line_size--;
|
||
}
|
||
convert_char_to_2bit(read_buf);
|
||
++block_line_num;
|
||
++total_line_num;
|
||
strncpy(target_arr + cur_read_size, read_buf, line_size);
|
||
cur_read_size += line_size;
|
||
// fprintf(stderr, "%d %d \n", line_size, cur_read_size);
|
||
}
|
||
|
||
// 读query
|
||
cur_read_size = 0;
|
||
for (i = 0; i < block_line_num; ++i)
|
||
{
|
||
if (fgets(read_buf, READ_BUF_SIZE, query_f) == NULL)
|
||
break;
|
||
int line_size = strlen(read_buf);
|
||
assert(line_size < READ_BUF_SIZE);
|
||
if (read_buf[line_size - 1] == '\n')
|
||
{
|
||
read_buf[line_size - 1] = '\0';
|
||
line_size--;
|
||
}
|
||
convert_char_to_2bit(read_buf);
|
||
strncpy(query_arr + cur_read_size, read_buf, line_size);
|
||
cur_read_size += line_size;
|
||
}
|
||
|
||
// 读info
|
||
cur_read_size = 0;
|
||
for (i = 0; i < block_line_num; ++i)
|
||
{
|
||
if (fgets(read_buf, READ_BUF_SIZE, info_f) == NULL)
|
||
break;
|
||
const int line_size = strlen(read_buf);
|
||
assert(line_size < READ_BUF_SIZE);
|
||
sscanf(read_buf, "%d %d %d\n", &info_arr[i][0], &info_arr[i][1], &info_arr[i][2]);
|
||
cur_read_size += line_size;
|
||
// fprintf(stderr, "%-8d%-8d%-8d\n", info_arr[i][0], info_arr[i][1], info_arr[i][2]);
|
||
// fprintf(stderr, "%s\n", read_buf);
|
||
}
|
||
|
||
// 性能测试
|
||
|
||
// 普通 sw
|
||
int cur_query_pos = 0;
|
||
int cur_target_pos = 0;
|
||
for (i = 0; i < block_line_num; ++i)
|
||
{
|
||
#ifdef SHOW_PERF
|
||
int64_t start_time = get_mseconds();
|
||
#endif
|
||
score_normal = ksw_normal(
|
||
info_arr[i][0],
|
||
(uint8_t *)query_arr + cur_query_pos,
|
||
info_arr[i][1],
|
||
(uint8_t *)target_arr + cur_target_pos,
|
||
5, mat, 6, 1, 6, 1, 100, 5, 100,
|
||
info_arr[i][2],
|
||
&qle, &tle, >le, &gscore, &max_off[0]);
|
||
#ifdef SHOW_PERF
|
||
time_sw_normal += get_mseconds() - start_time;
|
||
#endif
|
||
score_normal_total += score_normal;
|
||
// fprintf(normal_out_f, "%d %d\n", info_arr[i][2], score_normal);
|
||
// fprintf(stderr, "%d %d %d %d %d %d %d\n", info_arr[i][2], score_normal, qle, tle, gtle, gscore, max_off[0]);
|
||
|
||
#ifdef SHOW_PERF
|
||
start_time = get_mseconds();
|
||
#endif
|
||
score_bsw_avx2 = bsw_avx2(
|
||
info_arr[i][0],
|
||
(uint8_t *)query_arr + cur_query_pos,
|
||
info_arr[i][1],
|
||
(uint8_t *)target_arr + cur_target_pos,
|
||
0, 5, mat, 6, 1, 6, 1,
|
||
1, 4,
|
||
100, 5, 100,
|
||
info_arr[i][2],
|
||
&qle, &tle, >le, &gscore, &max_off[0]);
|
||
#ifdef SHOW_PERF
|
||
time_bsw_avx2 += get_mseconds() - start_time;
|
||
#endif
|
||
score_bsw_avx2_total += score_bsw_avx2;
|
||
// fprintf(avx2_out_f, "%d %d\n", info_arr[i][2], score_avx2);
|
||
// fprintf(stderr, "%d %d %d %d %d %d %d\n", info_arr[i][2], score_bsw_avx2_total, qle, tle, gtle, gscore, max_off[0]);
|
||
/*
|
||
#ifdef SHOW_PERF
|
||
start_time = get_mseconds();
|
||
#endif
|
||
score_avx2 = ksw_avx2(
|
||
info_arr[i][0],
|
||
(uint8_t *)query_arr + cur_query_pos,
|
||
info_arr[i][1],
|
||
(uint8_t *)target_arr + cur_target_pos,
|
||
0, 5, mat, 6, 1, 6, 1,
|
||
1, 4,
|
||
100, 5, 100,
|
||
info_arr[i][2],
|
||
&qle, &tle, >le, &gscore, &max_off[0]);
|
||
#ifdef SHOW_PERF
|
||
time_sw_avx2 += get_mseconds() - start_time;
|
||
#endif
|
||
score_avx2_total += score_avx2;
|
||
// fprintf(avx2_out_f, "%d %d\n", info_arr[i][2], score_avx2);
|
||
fprintf(stderr, "%d %d %d %d %d %d %d\n", info_arr[i][2], score_avx2, qle, tle, gtle, gscore, max_off[0]);
|
||
*/
|
||
#ifdef SHOW_PERF
|
||
start_time = get_mseconds();
|
||
#endif
|
||
score_avx2_u8 = ksw_avx2_u8(
|
||
info_arr[i][0],
|
||
(uint8_t *)query_arr + cur_query_pos,
|
||
info_arr[i][1],
|
||
(uint8_t *)target_arr + cur_target_pos,
|
||
0, 5, mat, 6, 1, 6, 1,
|
||
1, 4,
|
||
100, 5, 100,
|
||
info_arr[i][2],
|
||
&qle, &tle, >le, &gscore, &max_off[0]);
|
||
#ifdef SHOW_PERF
|
||
time_sw_avx2_u8 += get_mseconds() - start_time;
|
||
#endif
|
||
score_avx2_u8_total += score_avx2_u8;
|
||
// fprintf(avx2_u8_out_f, "%d %d %d %d %d %d\n", score_avx2_u8, qle, tle, gtle, gscore, max_off[0]);
|
||
// 更新query和target位置信息
|
||
cur_query_pos += info_arr[i][0];
|
||
cur_target_pos += info_arr[i][1];
|
||
// fprintf(stderr, "%d %d %d %d %d %d %d\n", score_normal, qle, tle, gtle, gscore, max_off[0], max_off[1]);
|
||
}
|
||
/*
|
||
// avx2 sw
|
||
cur_query_pos = 0;
|
||
cur_target_pos = 0;
|
||
for (i = 0; i < block_line_num; ++i)
|
||
{
|
||
#ifdef SHOW_PERF
|
||
int64_t start_time = get_mseconds();
|
||
#endif
|
||
score_avx2 += ksw_avx2(
|
||
info_arr[i][0],
|
||
(uint8_t *)query_arr + cur_query_pos,
|
||
info_arr[i][1],
|
||
(uint8_t *)target_arr + cur_target_pos,
|
||
0, 5, mat, 6, 1, 6, 1,
|
||
1, 4,
|
||
100, 5, 100,
|
||
info_arr[i][2],
|
||
&qle, &tle, >le, &gscore, &max_off[0]);
|
||
#ifdef SHOW_PERF
|
||
time_sw_avx2 += get_mseconds() - start_time;
|
||
#endif
|
||
// 更新query和target位置信息
|
||
cur_query_pos += info_arr[i][0];
|
||
cur_target_pos += info_arr[i][1];
|
||
// fprintf(stderr, "%d %d %d %d %d %d %d\n", score_avx2, qle, tle, gtle, gscore, max_off[0], max_off[1]);
|
||
}
|
||
|
||
// avx2 u8 sw
|
||
cur_query_pos = 0;
|
||
cur_target_pos = 0;
|
||
for (i = 0; i < block_line_num; ++i)
|
||
{
|
||
#ifdef SHOW_PERF
|
||
int64_t start_time = get_mseconds();
|
||
#endif
|
||
score_avx2_u8 += ksw_avx2_u8(
|
||
info_arr[i][0],
|
||
(uint8_t *)query_arr + cur_query_pos,
|
||
info_arr[i][1],
|
||
(uint8_t *)target_arr + cur_target_pos,
|
||
0, 5, mat, 6, 1, 6, 1,
|
||
1, 4,
|
||
100, 5, 100,
|
||
info_arr[i][2],
|
||
&qle, &tle, >le, &gscore, &max_off[0]);
|
||
#ifdef SHOW_PERF
|
||
time_sw_avx2_u8 += get_mseconds() - start_time;
|
||
#endif
|
||
// 更新query和target位置信息
|
||
cur_query_pos += info_arr[i][0];
|
||
cur_target_pos += info_arr[i][1];
|
||
// fprintf(stderr, "%d %d %d %d %d %d %d\n", score_normal, qle, tle, gtle, gscore, max_off[0], max_off[1]);
|
||
}
|
||
*/
|
||
// fprintf(stderr, "%d %d \n", block_line_num, total_line_num);
|
||
}
|
||
|
||
// fprintf(stderr, "%d \n", score_normal);
|
||
|
||
#ifdef SHOW_PERF
|
||
fprintf(stderr, "time_sw_normal: %f s; score: %d\n", time_sw_normal / 1000.0, score_normal_total);
|
||
fprintf(stderr, "time_bsw_avx2: %f s; score: %d\n", time_bsw_avx2 / 1000.0, score_bsw_avx2_total);
|
||
// fprintf(stderr, "time_sw_avx2: %f s; score: %d\n", time_sw_avx2 / 1000.0, score_avx2_total);
|
||
fprintf(stderr, "time_sw_avx2_u8: %f s; score: %d\n", time_sw_avx2_u8 / 1000.0, score_avx2_u8_total);
|
||
|
||
fprintf(stderr, "time_bsw_init: %f s\n", time_bsw_init / 1000.0);
|
||
fprintf(stderr, "time_bsw_main_loop: %f s\n", (time_bsw_main_loop - time_compare) / 1000.0);
|
||
fprintf(stderr, "time_bsw_find_max: %f s\n", (time_bsw_find_max - time_compare) / 1000.0);
|
||
fprintf(stderr, "time_bsw_adjust_bound: %f s\n", (time_bsw_adjust_bound - time_compare) / 1000.0);
|
||
fprintf(stderr, "time_compare: %f s\n", time_compare / 1000.0);
|
||
#endif
|
||
|
||
if (query_f != 0)
|
||
fclose(query_f);
|
||
if (target_f != 0)
|
||
fclose(target_f);
|
||
if (info_f != 0)
|
||
fclose(info_f);
|
||
if (avx2_out_f != 0)
|
||
fclose(avx2_out_f);
|
||
if (avx2_u8_out_f != 0)
|
||
fclose(avx2_u8_out_f);
|
||
if (normal_out_f != 0)
|
||
fclose(normal_out_f);
|
||
if (bsw_avx2_out_f != 0)
|
||
fclose(bsw_avx2_out_f);
|
||
} |