添加了debug打印信息

This commit is contained in:
zzh 2023-08-26 03:00:15 +08:00
parent b53569db63
commit 2aeb566bf7
14 changed files with 189 additions and 96 deletions

2
.gitignore vendored
View File

@ -1,3 +1,5 @@
output/
input/
*.[oa]
*.txt
*.fa

4
.vscode/launch.json vendored
View File

@ -11,7 +11,9 @@
"request": "launch",
"program": "${workspaceRoot}/sw_perf",
"args": [
"all"
"/home/zzh/data/sw/q_s.fa",
"/home/zzh/data/sw/t_s.fa",
"/home/zzh/data/sw/i_s.txt"
],
"cwd": "${workspaceFolder}", //
},

View File

@ -1,7 +1,8 @@
CC= gcc
#CFLAGS= -g -Wall -Wno-unused-function -mavx2
CFLAGS= -Wall -Wno-unused-function -O2 -mavx2
DFLAGS= -DSHOW_PERF
DFLAGS= -DSHOW_PERF -DDEBUG_OUT
#DFLAGS= -DSHOW_PERF -DDEBUG_OUT -DDEBUG_RETURN_VALUE
PROG= sw_perf
INCLUDES=
LIBS=

25
common.h 100644
View File

@ -0,0 +1,25 @@
/*********************************************************************************************
Description: common defines and declarations
Copyright : All right reserved by NCIC.ICT
Author : Zhang Zhonghai
Date : 2023/08/26
***********************************************************************************************/
#ifndef __COMMON_H
#define __COMMON_H
#include <stdio.h>
#undef MAX
#undef MIN
#define MAX(x, y) ((x) > (y) ? (x) : (y))
#define MIN(x, y) ((x) < (y) ? (x) : (y))
#define KERNEL_NUM 7
extern FILE *ins_ext_f_arr[KERNEL_NUM],
*del_ext_f_arr[KERNEL_NUM],
*score_f_arr[KERNEL_NUM],
*retval_f_arr[KERNEL_NUM];
#endif

View File

@ -6,6 +6,7 @@
#include <immintrin.h>
#include <emmintrin.h>
#include "thread_mem.h"
#include "common.h"
#ifdef __GNUC__
#define LIKELY(x) __builtin_expect((x), 1)
@ -15,10 +16,6 @@
#define UNLIKELY(x) (x)
#endif
#undef MAX
#undef MIN
#define MAX(x, y) ((x) > (y) ? (x) : (y))
#define MIN(x, y) ((x) < (y) ? (x) : (y))
#define SIMD_WIDTH 16
static const uint16_t h_vec_int_mask[SIMD_WIDTH][SIMD_WIDTH] = {
@ -207,7 +204,6 @@ int ksw_extend_avx2(thread_mem_t *tmem,
assert(init_score > 0);
// allocate memory
// mem = malloc(mem_size);
mem = thread_mem_request(tmem, mem_size);
qtmem = (int16_t *)&mem[0];
seq = &qtmem[0];
@ -248,7 +244,6 @@ int ksw_extend_avx2(thread_mem_t *tmem,
fA2 = &fA[col_size];
// adjust $window_size if it is too large
// get the max score
max = base_match_score;
max_ins = (int)((double)(qlen * max + end_bonus - o_ins) / e_ins + 1.);
@ -280,6 +275,13 @@ int ksw_extend_avx2(thread_mem_t *tmem,
int iend;
#ifdef DEBUG_OUT
int16_t ins[tlen + 1][qlen + 1];
int16_t del[tlen + 1][qlen + 1];
int16_t score[tlen + 1][qlen + 1];
ins[0][0] = del[0][0] = score[0][0] = init_score;
#endif
for (D = 1; LIKELY(D < Dloop); ++D)
{
// 边界条件一定要注意! tlen 大于,等于,小于 qlen时的情况
@ -295,15 +297,16 @@ int ksw_extend_avx2(thread_mem_t *tmem,
}
end1 = MIN(qlen, beg1 + span);
if (beg < beg1)
beg = beg1;
if (end > end1)
end = end1;
if (beg > end)
break; // 不用计算了直接跳出否则hA2没有被赋值里边是上一轮hA0的值会出bug
beg = 1;
end = qlen;
// if (beg < beg1)
// beg = beg1;
// if (end > end1)
// end = end1;
// if (beg > end)
// break; // 不用计算了直接跳出否则hA2没有被赋值里边是上一轮hA0的值会出bug
iend = D - (beg - 1); // ref开始计算的位置倒序
span = end - beg;
iStart = iend - span - 1; // 0开始的ref索引位置
@ -409,4 +412,4 @@ int ksw_extend_avx2(thread_mem_t *tmem,
if (_max_off)
*_max_off = max_off;
return max;
}
}

View File

@ -5,6 +5,7 @@
#include <immintrin.h>
#include <emmintrin.h>
#include "thread_mem.h"
#include "common.h"
#ifdef __GNUC__
#define LIKELY(x) __builtin_expect((x), 1)
@ -324,12 +325,12 @@ int ksw_extend_avx2_aligned(thread_mem_t *tmem,
}
end1 = MIN(qlen, beg1 + span);
if (read_start_pos < beg1)
read_start_pos = beg1;
if (read_end_pos > end1)
read_end_pos = end1;
if (read_start_pos > read_end_pos)
break; // 不用计算了直接跳出否则hA2没有被赋值里边是上一轮hA0的值会出bug
// if (read_start_pos < beg1)
// read_start_pos = beg1;
// if (read_end_pos > end1)
// read_end_pos = end1;
// if (read_start_pos > read_end_pos)
// break; // 不用计算了直接跳出否则hA2没有被赋值里边是上一轮hA0的值会出bug
read_start_pos = 1;
read_end_pos = qlen;

View File

@ -6,6 +6,7 @@
#include <immintrin.h>
#include <emmintrin.h>
#include "thread_mem.h"
#include "common.h"
#ifdef __GNUC__
#define LIKELY(x) __builtin_expect((x), 1)
@ -190,8 +191,6 @@ static const uint16_t h_vec_int_mask[SIMD_WIDTH][SIMD_WIDTH] = {
cur_match_arr = next_match_arr; \
next_match_arr = tmp;
// uint8_t mem[102400];
int ksw_extend_avx2_heuristics(thread_mem_t *tmem,
int qlen, // query length 待匹配段碱基的query长度
const uint8_t *query, // read碱基序列
@ -235,7 +234,6 @@ int ksw_extend_avx2_heuristics(thread_mem_t *tmem,
assert(init_score > 0);
// allocate memory
// mem = malloc(mem_size);
mem = thread_mem_request(tmem, mem_size);
qtmem = (int16_t *)&mem[0];
seq = &qtmem[0];
@ -325,12 +323,12 @@ int ksw_extend_avx2_heuristics(thread_mem_t *tmem,
beg = 1;
end = qlen;
if (beg < beg1)
beg = beg1;
if (end > end1)
end = end1;
if (beg > end)
break; // 不用计算了直接跳出否则hA2没有被赋值里边是上一轮hA0的值会出bug
// if (beg < beg1)
// beg = beg1;
// if (end > end1)
// end = end1;
// if (beg > end)
// break; // 不用计算了直接跳出否则hA2没有被赋值里边是上一轮hA0的值会出bug
iend = D - (beg - 1); // ref开始计算的位置倒序
span = end - beg;
@ -436,4 +434,4 @@ int ksw_extend_avx2_heuristics(thread_mem_t *tmem,
if (_max_off)
*_max_off = max_off;
return max;
}
}

View File

@ -6,6 +6,7 @@
#include <immintrin.h>
#include <emmintrin.h>
#include "thread_mem.h"
#include "common.h"
#ifdef __GNUC__
#define LIKELY(x) __builtin_expect((x), 1)
@ -320,12 +321,12 @@ int ksw_extend_avx2_u8(thread_mem_t *tmem,
}
end1 = MIN(qlen, beg1 + span);
if (beg < beg1)
beg = beg1;
if (end > end1)
end = end1;
if (beg > end)
break; // 不用计算了直接跳出否则hA2没有被赋值里边是上一轮hA0的值会出bug
// if (beg < beg1)
// beg = beg1;
// if (end > end1)
// end = end1;
// if (beg > end)
// break; // 不用计算了直接跳出否则hA2没有被赋值里边是上一轮hA0的值会出bug
beg = 1;
end = qlen;

View File

@ -5,6 +5,7 @@
#include <immintrin.h>
#include <emmintrin.h>
#include "thread_mem.h"
#include "common.h"
#ifdef __GNUC__
#define LIKELY(x) __builtin_expect((x), 1)
@ -349,12 +350,12 @@ int ksw_extend_avx2_u8_aligned(thread_mem_t *tmem,
}
end1 = MIN(qlen, beg1 + span);
if (beg < beg1)
beg = beg1;
if (end > end1)
end = end1;
if (beg > end)
break; // 不用计算了直接跳出否则hA2没有被赋值里边是上一轮hA0的值会出bug
// if (beg < beg1)
// beg = beg1;
// if (end > end1)
// end = end1;
// if (beg > end)
// break; // 不用计算了直接跳出否则hA2没有被赋值里边是上一轮hA0的值会出bug
beg = 1;
end = qlen;

View File

@ -6,6 +6,7 @@
#include <immintrin.h>
#include <emmintrin.h>
#include "thread_mem.h"
#include "common.h"
#ifdef __GNUC__
#define LIKELY(x) __builtin_expect((x), 1)
@ -348,12 +349,12 @@ int ksw_extend_avx2_u8_heuristics(thread_mem_t *tmem,
}
end1 = MIN(qlen, beg1 + span);
if (beg < beg1)
beg = beg1;
if (end > end1)
end = end1;
if (beg > end)
break; // 不用计算了直接跳出否则hA2没有被赋值里边是上一轮hA0的值会出bug
// if (beg < beg1)
// beg = beg1;
// if (end > end1)
// end = end1;
// if (beg > end)
// break; // 不用计算了直接跳出否则hA2没有被赋值里边是上一轮hA0的值会出bug
beg = 1;
end = qlen;

View File

@ -0,0 +1 @@
#include "common.h"

View File

@ -3,6 +3,7 @@
#include <assert.h>
#include <stdio.h>
#include "thread_mem.h"
#include "common.h"
#ifdef __GNUC__
#define LIKELY(x) __builtin_expect((x), 1)
@ -56,6 +57,20 @@ int ksw_extend_normal(thread_mem_t *tmem, int qlen, const uint8_t *query, int tl
max_ie = -1, gscore = -1;
max_off = 0;
beg = 0, end = qlen;
#ifdef DEBUG_OUT
fprintf(score_f_arr[0], "%-3d", h0);
fprintf(ins_ext_f_arr[0], "%-3d", h0);
fprintf(del_ext_f_arr[0], "%-3d", h0);
for (j = 0; LIKELY(j < end); ++j) // 遍历query字符序列
{
fprintf(score_f_arr[0], "%-3d", MAX(h0 - o_ins - (j + 1) * e_ins, 0));
fprintf(ins_ext_f_arr[0], "%-3d", MAX(h0 - o_ins - (j + 1) * e_ins, 0));
fprintf(del_ext_f_arr[0], "%-3d", 0);
}
fprintf(score_f_arr[0], "\n");
fprintf(ins_ext_f_arr[0], "\n");
fprintf(del_ext_f_arr[0], "\n");
#endif
for (i = 0; LIKELY(i < tlen); ++i) // 对target逐个字符进行遍历
{
int t, f = 0, h1, m = 0, mj = -1;
@ -78,14 +93,22 @@ int ksw_extend_normal(thread_mem_t *tmem, int qlen, const uint8_t *query, int tl
}
else
h1 = 0;
#ifdef DEBUG_OUT
fprintf(ins_ext_f_arr[0], "%-3d", 0);
fprintf(del_ext_f_arr[0], "%-3d", MAX(h0 - o_del - (i + 1) * e_del, 0));
#endif
for (j = beg; LIKELY(j < end); ++j) // 遍历query字符序列
{
// fprintf(stderr, "%-3d", h1);
// At the beginning of the loop: eh[j] = { H(i-1,j-1), E(i,j) }, f = F(i,j) and h1 = H(i,j-1)
// Similar to SSE2-SW, cells are computed in the following order:
// H(i,j) = max{H(i-1,j-1)+S(i,j), E(i,j), F(i,j)}
// E(i+1,j) = max{H(i,j)-gapo, E(i,j)} - gape // E表示delete只消耗target
// F(i,j+1) = max{H(i,j)-gapo, F(i,j)} - gape // F表示insert只消耗querytarget的row id固定query的col index一直增加
#ifdef DEBUG_OUT
fprintf(score_f_arr[0], "%-3d", h1);
fprintf(ins_ext_f_arr[0], "%-3d", f);
fprintf(del_ext_f_arr[0], "%-3d", eh[j].e);
#endif
// At the beginning of the loop: eh[j] = { H(i-1,j-1), E(i,j) }, f = F(i,j) and h1 = H(i,j-1)
// Similar to SSE2-SW, cells are computed in the following order:
// H(i,j) = max{H(i-1,j-1)+S(i,j), E(i,j), F(i,j)}
// E(i+1,j) = max{H(i,j)-gapo, E(i,j)} - gape // E表示delete只消耗target
// F(i,j+1) = max{H(i,j)-gapo, F(i,j)} - gape // F表示insert只消耗querytarget的row id固定query的col index一直增加
eh_t *p = &eh[j];
int h, M = p->h, e = p->e; // get H(i-1,j-1) and E(i-1,j) // 获取上一轮h值和e值
p->h = h1; // set H(i,j-1) for the next row // h1是上一轮计算的结果
@ -105,6 +128,12 @@ int ksw_extend_normal(thread_mem_t *tmem, int qlen, const uint8_t *query, int tl
f -= e_ins;
f = f > t ? f : t; // computed F(i,j+1)
}
#ifdef DEBUG_OUT
fprintf(score_f_arr[0], "%-3d", h1);
fprintf(score_f_arr[0], "\n");
fprintf(ins_ext_f_arr[0], "\n");
fprintf(del_ext_f_arr[0], "\n");
#endif
eh[end].h = h1; // end是query序列之外的位置
eh[end].e = 0;
if (j == qlen) // 此轮遍历到了query的最后一个字符

106
main.c
View File

@ -8,11 +8,7 @@
#include "thread_mem.h"
#include "ksw_ext.h"
#include "utils.h"
#define SW_NORMAL 0
#define SW_AVX2 1
#define SW_CUDA 2
#define SW_ALL 3
#include "common.h"
#define BLOCK_BUF_SIZE 1048576
#define READ_BUF_SIZE 2048
@ -21,8 +17,6 @@
#define DIVIDE_BY (CLOCKS_PER_SEC * 1.0)
#define KERNEL_NUM 7
#ifdef SHOW_PERF
// 用来调试,计算感兴趣部分的运行时间
// 获取当前毫秒数
@ -33,9 +27,14 @@ int64_t get_mseconds()
// return (int64_t)1000 * (tv.tv_sec + ((1e-6) * tv.tv_usec));
return clock();
}
int64_t time_sw[KERNEL_NUM] = {0};
#endif
#ifdef DEBUG_RETURN_VALUE
#define OUTPUT_RETVAL(kernel_num) \
fprintf(retval_f_arr[kernel_num], "%d\t%d\t%d\t%d\t%d\t%d\n", score[kernel_num], qle, tle, gtle, gscore, max_off[0])
#else
#define OUTPUT_RETVAL(out_f)
#endif
#define _PERFORMANCE_TEST_NORMAL(kernel_num, func) \
@ -55,6 +54,7 @@ int64_t time_sw[KERNEL_NUM] = {0};
score_total[kernel_num] += score[kernel_num]; \
cur_query_pos += info_arr[i][0]; \
cur_target_pos += info_arr[i][1]; \
OUTPUT_RETVAL(0); \
}
#define _PERFORMANCE_TEST_AVX2(kernel_num, func) \
@ -76,6 +76,7 @@ int64_t time_sw[KERNEL_NUM] = {0};
score_total[kernel_num] += score[kernel_num]; \
cur_query_pos += info_arr[i][0]; \
cur_target_pos += info_arr[i][1]; \
OUTPUT_RETVAL(kernel_num); \
}
#ifdef SHOW_PERF
@ -110,6 +111,13 @@ int read_seq_line(char *read_buf, FILE *f_ptr, char *out_arr)
return line_size;
}
// 全局变量
// 将每次比对的得分等信息写入文件进行debug
FILE *ins_ext_f_arr[KERNEL_NUM] = {0},
*del_ext_f_arr[KERNEL_NUM] = {0},
*score_f_arr[KERNEL_NUM] = {0},
*retval_f_arr[KERNEL_NUM] = {0};
// 程序执行入口
int main(int argc, char *argv[])
{
@ -142,16 +150,6 @@ int main(int argc, char *argv[])
int **info_arr = (int **)malloc(SEQ_BUF_SIZE * sizeof(int *));
FILE *query_f = 0, *target_f = 0, *info_f = 0;
FILE *normal_out_f = 0, *avx2_out_f = 0, *avx2_u8_out_f = 0;
query_f = fopen(qf_path, "r");
target_f = fopen(tf_path, "r");
info_f = fopen(if_path, "r");
// 将每次比对的得分等信息写入文件进行debug
// normal_out_f = fopen("normal_out.txt", "w");
// avx2_out_f = fopen("avx2_out.txt", "w");
// avx2_u8_out_f = fopen("avx2_u8_out.txt", "w");
// 每次读取一定量的数据,然后执行,直到处理完所有数据
int total_line_num = 0; // 目前处理的总的数据行数
@ -160,6 +158,32 @@ int main(int argc, char *argv[])
int64_t start_time;
char read_buf[READ_BUF_SIZE]; // 读文件缓存
#ifdef DEBUG_OUT
for (i = 0; i < KERNEL_NUM; ++i)
{
char out_path[64];
sprintf(out_path, "/home/zzh/work/sw_perf/output/ins_%d.txt", i);
ins_ext_f_arr[i] = fopen(out_path, "w");
sprintf(out_path, "/home/zzh/work/sw_perf/output/del_%d.txt", i);
del_ext_f_arr[i] = fopen(out_path, "w");
sprintf(out_path, "/home/zzh/work/sw_perf/output/score_%d.txt", i);
score_f_arr[i] = fopen(out_path, "w");
}
#endif
#ifdef DEBUG_RETURN_VALUE
for (i = 0; i < KERNEL_NUM; ++i)
{
char out_path[64];
sprintf(out_path, "/home/zzh/work/sw_perf/output/retval_%d.txt", i);
retval_f_arr[i] = fopen(out_path, "w");
}
#endif
query_f = fopen(qf_path, "r");
target_f = fopen(tf_path, "r");
info_f = fopen(if_path, "r");
// 初始化info_arr数组
i = 0;
j = 0;
@ -218,31 +242,27 @@ int main(int argc, char *argv[])
// avx2
PERFORMANCE_TEST_AVX2(1, ksw_extend_avx2);
// avx2 u8
PERFORMANCE_TEST_AVX2(2, ksw_extend_avx2_u8);
// avx2 heuristics
PERFORMANCE_TEST_AVX2(3, ksw_extend_avx2_heuristics);
// avx2 u8 heuristics
PERFORMANCE_TEST_AVX2(4, ksw_extend_avx2_u8_heuristics);
// avx2 mem aligned
PERFORMANCE_TEST_AVX2(5, ksw_extend_avx2_aligned);
// avx2 u8 mem aligned
PERFORMANCE_TEST_AVX2(6, ksw_extend_avx2_u8_aligned);
// PERFORMANCE_TEST_AVX2(2, ksw_extend_avx2_heuristics);
// // avx2 mem aligned
// PERFORMANCE_TEST_AVX2(3, ksw_extend_avx2_aligned);
//
// // avx2 u8
// PERFORMANCE_TEST_AVX2(4, ksw_extend_avx2_u8);
// // avx2 u8 heuristics
// PERFORMANCE_TEST_AVX2(5, ksw_extend_avx2_u8_heuristics);
// // avx2 u8 mem aligned
// PERFORMANCE_TEST_AVX2(6, ksw_extend_avx2_u8_aligned);
}
#ifdef SHOW_PERF
char *kernel_names[7] = {
"normal",
"avx2",
"avx2_u8",
"avx2_heuristics",
"avx2_u8_heuristics",
"avx2_aligned",
"avx2_u8",
"avx2_u8_heuristics",
"avx2_u8_aligned"};
for (i = 0; i < KERNEL_NUM; ++i)
@ -257,10 +277,16 @@ int main(int argc, char *argv[])
fclose(target_f);
if (info_f != 0)
fclose(info_f);
if (avx2_out_f != 0)
fclose(avx2_out_f);
if (avx2_u8_out_f != 0)
fclose(avx2_u8_out_f);
if (normal_out_f != 0)
fclose(normal_out_f);
for (i = 0; i < KERNEL_NUM; ++i)
{
if (ins_ext_f_arr[i] != 0)
fclose(ins_ext_f_arr[i]);
if (del_ext_f_arr[i] != 0)
fclose(del_ext_f_arr[i]);
if (score_f_arr[i] != 0)
fclose(score_f_arr[i]);
if (retval_f_arr[i] != 0)
fclose(retval_f_arr[i]);
}
}

2
run_debug.sh 100755
View File

@ -0,0 +1,2 @@
#!/bin/bash
/home/zzh/work/sw_perf/sw_perf /home/zzh/work/sw_perf/input/q.fa /home/zzh/work/sw_perf/input/t.fa /home/zzh/work/sw_perf/input/i.txt