对avx2实现的sw的各个部分运行时间进行了测试
This commit is contained in:
parent
2a14c17c68
commit
53332e34e1
6
Makefile
6
Makefile
|
|
@ -1,8 +1,8 @@
|
|||
CC= gcc
|
||||
CFLAGS= -g -Wall -Wno-unused-function -mavx2
|
||||
#CFLAGS= -Wall -Wno-unused-function -O2 -mavx2
|
||||
#CFLAGS= -g -Wall -Wno-unused-function -mavx2
|
||||
CFLAGS= -Wall -Wno-unused-function -O2 -mavx2
|
||||
DFLAGS= -DSHOW_PERF
|
||||
OBJS= ksw_normal.o ksw_avx2.o ksw_cuda.o ksw_avx2_u8.o
|
||||
OBJS= ksw_normal.o ksw_avx2.o ksw_cuda.o ksw_avx2_u8.o bsw_avx2.o
|
||||
PROG= sw_perf
|
||||
PROG2= sw_perf_discrete
|
||||
INCLUDES=
|
||||
|
|
|
|||
43
bsw_avx2.c
43
bsw_avx2.c
|
|
@ -197,7 +197,7 @@ int bsw_avx2(int qlen, // query length 待匹配段碱基的query
|
|||
int16_t *qtmem, *vmem;
|
||||
int seq_size = qlen + SIMD_WIDTH, ref_size = tlen + SIMD_WIDTH;
|
||||
int i, iStart, D, j, k, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off;
|
||||
int Dloop = tlen + qlen; // 循环跳出条件
|
||||
int Dloop = tlen + qlen; // 循环跳出条件 D从1开始遍历
|
||||
int span, beg1, end1; // 边界条件计算
|
||||
int col_size = qlen + 2 + SIMD_WIDTH;
|
||||
int val_mem_size = (col_size * 9 * 2 + 31) >> 5 << 5; // 32字节的整数倍
|
||||
|
|
@ -287,7 +287,7 @@ int bsw_avx2(int qlen, // query length 待匹配段碱基的query
|
|||
// 边界条件一定要注意! tlen 大于,等于,小于 qlen时的情况
|
||||
if (D > tlen)
|
||||
{
|
||||
span = MIN(Dloop - D, w);
|
||||
span = MIN(Dloop - D, w); // 计算的窗口,或者说范围
|
||||
beg1 = MAX(D - tlen + 1, ((D - w) / 2) + 1);
|
||||
}
|
||||
else
|
||||
|
|
@ -372,7 +372,7 @@ int bsw_avx2(int qlen, // query length 待匹配段碱基的query
|
|||
max = m, max_i = mi, max_j = mj;
|
||||
max_off = max_off > abs(mj - mi) ? max_off : abs(mj - mi);
|
||||
}
|
||||
else if (zdrop > 0)
|
||||
else if (0) //(zdrop > 0)
|
||||
{
|
||||
if (mi - max_i > mj - max_j)
|
||||
{
|
||||
|
|
@ -387,24 +387,25 @@ int bsw_avx2(int qlen, // query length 待匹配段碱基的query
|
|||
}
|
||||
|
||||
// 调整计算的边界
|
||||
for (j = beg; LIKELY(j <= end); ++j)
|
||||
{
|
||||
int has_val = hA1[j - 1] | hA2[j];
|
||||
if (has_val)
|
||||
break;
|
||||
}
|
||||
beg = j;
|
||||
for (j = end + 1; LIKELY(j >= beg); --j)
|
||||
{
|
||||
int has_val = hA1[j - 1] | hA2[j];
|
||||
if (has_val)
|
||||
break;
|
||||
else
|
||||
hA0[j - 1] = 0;
|
||||
}
|
||||
end = j + 1 <= qlen ? j + 1 : qlen;
|
||||
// beg = 0;
|
||||
// end = qlen; // uncomment this line for debugging
|
||||
/* for (j = beg; LIKELY(j <= end); ++j)
|
||||
{
|
||||
int has_val = hA1[j - 1] | hA2[j];
|
||||
if (has_val)
|
||||
break;
|
||||
}
|
||||
beg = j;
|
||||
for (j = end + 1; LIKELY(j >= beg); --j)
|
||||
{
|
||||
int has_val = hA1[j - 1] | hA2[j];
|
||||
if (has_val)
|
||||
break;
|
||||
else
|
||||
hA0[j - 1] = 0;
|
||||
}
|
||||
end = j + 1 <= qlen ? j + 1 : qlen;
|
||||
*/
|
||||
beg = 0;
|
||||
end = qlen; // uncomment this line for debugging
|
||||
m_last = m;
|
||||
// swap m, h, e, f
|
||||
SWAP_DATA_POINTER;
|
||||
|
|
|
|||
|
|
@ -217,6 +217,14 @@ int ksw_avx2_u8(int qlen, // query length 待匹配段碱基的que
|
|||
int *_gscore, // query的端到端匹配得分
|
||||
int *_max_off) // 取得最大得分时在query和reference上位置差的 最大值
|
||||
{
|
||||
#ifdef SHOW_PERF
|
||||
extern int64_t time_bsw_init;
|
||||
extern int64_t time_bsw_main_loop;
|
||||
extern int64_t time_bsw_find_max;
|
||||
extern int64_t time_bsw_adjust_bound;
|
||||
extern int64_t time_compare;
|
||||
int64_t start_time = get_mseconds();
|
||||
#endif
|
||||
uint8_t *mA, *hA, *eA, *fA, *mA1, *mA2, *hA0, *hA1, *eA1, *fA1, *hA2, *eA2, *fA2; // hA0保存上上个col的H,其他的保存上个H E F M
|
||||
uint8_t *seq, *ref;
|
||||
uint8_t *mem, *qtmem, *vmem;
|
||||
|
|
@ -307,9 +315,15 @@ int ksw_avx2_u8(int qlen, // query length 待匹配段碱基的que
|
|||
|
||||
int m_last = 0;
|
||||
int iend;
|
||||
#ifdef SHOW_PERF
|
||||
time_bsw_init += get_mseconds() - start_time;
|
||||
#endif
|
||||
|
||||
for (D = 1; LIKELY(D < Dloop); ++D)
|
||||
{
|
||||
#ifdef SHOW_PERF
|
||||
start_time = get_mseconds();
|
||||
#endif
|
||||
// 边界条件一定要注意! tlen 大于,等于,小于 qlen时的情况
|
||||
if (D > tlen)
|
||||
{
|
||||
|
|
@ -380,9 +394,19 @@ int ksw_avx2_u8(int qlen, // query length 待匹配段碱基的que
|
|||
// 存储结果
|
||||
SIMD_STORE;
|
||||
}
|
||||
|
||||
#ifdef SHOW_PERF
|
||||
time_bsw_main_loop += get_mseconds() - start_time;
|
||||
#endif
|
||||
#ifdef SHOW_PERF
|
||||
start_time = get_mseconds();
|
||||
#endif
|
||||
SIMD_FIND_MAX;
|
||||
|
||||
#ifdef SHOW_PERF
|
||||
time_bsw_find_max += get_mseconds() - start_time;
|
||||
#endif
|
||||
#ifdef SHOW_PERF
|
||||
start_time = get_mseconds();
|
||||
#endif
|
||||
// 注意最后跳出循环j的值
|
||||
j = end + 1;
|
||||
|
||||
|
|
@ -391,14 +415,14 @@ int ksw_avx2_u8(int qlen, // query length 待匹配段碱基的que
|
|||
max_ie = gscore > hA2[qlen] ? max_ie : iStart;
|
||||
gscore = gscore > hA2[qlen] ? gscore : hA2[qlen];
|
||||
}
|
||||
if (m == 0 && m_last == 0)
|
||||
break; // 一定要注意,斜对角遍历和按列遍历的不同点
|
||||
// if (m == 0 && m_last == 0)
|
||||
// break; // 一定要注意,斜对角遍历和按列遍历的不同点
|
||||
if (m > max)
|
||||
{
|
||||
max = m, max_i = mi, max_j = mj;
|
||||
max_off = max_off > abs(mj - mi) ? max_off : abs(mj - mi);
|
||||
}
|
||||
else if (zdrop > 0)
|
||||
else if (0) // (zdrop > 0)
|
||||
{
|
||||
if (mi - max_i > mj - max_j)
|
||||
{
|
||||
|
|
@ -430,9 +454,17 @@ int ksw_avx2_u8(int qlen, // query length 待匹配段碱基的que
|
|||
}
|
||||
end = j + 1 <= qlen ? j + 1 : qlen;
|
||||
|
||||
// beg = 0;
|
||||
// end = qlen;
|
||||
|
||||
m_last = m;
|
||||
// swap m, h, e, f
|
||||
SWAP_DATA_POINTER;
|
||||
#ifdef SHOW_PERF
|
||||
time_bsw_adjust_bound += get_mseconds() - start_time;
|
||||
start_time = get_mseconds();
|
||||
time_compare += get_mseconds() - start_time;
|
||||
#endif
|
||||
}
|
||||
|
||||
free(mem);
|
||||
|
|
|
|||
37
ksw_normal.c
37
ksw_normal.c
|
|
@ -1,6 +1,7 @@
|
|||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define LIKELY(x) __builtin_expect((x), 1)
|
||||
|
|
@ -55,13 +56,13 @@ int ksw_normal(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
|
|||
{
|
||||
int t, f = 0, h1, m = 0, mj = -1;
|
||||
int8_t *q = &qp[target[i] * qlen]; // 对于target第i个字符,query中每个字符的分值,只有匹配和不匹配
|
||||
// apply the band and the constraint (if provided)
|
||||
// if (beg < i - w) // 检查开始点是否可以缩小一些
|
||||
// beg = i - w;
|
||||
// if (end > i + w + 1) // 检查终点是否可以缩小,使得整体的遍历范围缩小
|
||||
// end = i + w + 1;
|
||||
// if (end > qlen) // 终点不超过query长度
|
||||
// end = qlen;
|
||||
// apply the band and the constraint (if provided)
|
||||
if (beg < i - w) // 检查开始点是否可以缩小一些
|
||||
beg = i - w;
|
||||
if (end > i + w + 1) // 检查终点是否可以缩小,使得整体的遍历范围缩小
|
||||
end = i + w + 1;
|
||||
if (end > qlen) // 终点不超过query长度
|
||||
end = qlen;
|
||||
// compute the first column
|
||||
if (beg == 0)
|
||||
{
|
||||
|
|
@ -73,11 +74,12 @@ int ksw_normal(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
|
|||
h1 = 0;
|
||||
for (j = beg; LIKELY(j < end); ++j) // 遍历query字符序列
|
||||
{
|
||||
// At the beginning of the loop: eh[j] = { H(i-1,j-1), E(i,j) }, f = F(i,j) and h1 = H(i,j-1)
|
||||
// Similar to SSE2-SW, cells are computed in the following order:
|
||||
// H(i,j) = max{H(i-1,j-1)+S(i,j), E(i,j), F(i,j)}
|
||||
// E(i+1,j) = max{H(i,j)-gapo, E(i,j)} - gape // E表示delete,只消耗target
|
||||
// F(i,j+1) = max{H(i,j)-gapo, F(i,j)} - gape // F表示insert,只消耗query
|
||||
// fprintf(stderr, "%-3d", h1);
|
||||
// At the beginning of the loop: eh[j] = { H(i-1,j-1), E(i,j) }, f = F(i,j) and h1 = H(i,j-1)
|
||||
// Similar to SSE2-SW, cells are computed in the following order:
|
||||
// H(i,j) = max{H(i-1,j-1)+S(i,j), E(i,j), F(i,j)}
|
||||
// E(i+1,j) = max{H(i,j)-gapo, E(i,j)} - gape // E表示delete,只消耗target
|
||||
// F(i,j+1) = max{H(i,j)-gapo, F(i,j)} - gape // F表示insert,只消耗query,target的row id固定,query的col index一直增加
|
||||
eh_t *p = &eh[j];
|
||||
int h, M = p->h, e = p->e; // get H(i-1,j-1) and E(i-1,j) // 获取上一轮h值和e值
|
||||
p->h = h1; // set H(i,j-1) for the next row // h1是上一轮计算的结果
|
||||
|
|
@ -97,7 +99,7 @@ int ksw_normal(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
|
|||
f -= e_ins;
|
||||
f = f > t ? f : t; // computed F(i,j+1)
|
||||
}
|
||||
eh[end].h = h1;
|
||||
eh[end].h = h1; // end是query序列之外的位置
|
||||
eh[end].e = 0;
|
||||
if (j == qlen) // 此轮遍历到了query的最后一个字符
|
||||
{
|
||||
|
|
@ -111,7 +113,7 @@ int ksw_normal(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
|
|||
max = m, max_i = i, max_j = mj; // 更新取得最大值的target和query的位置
|
||||
max_off = max_off > abs(mj - i) ? max_off : abs(mj - i); // 取得最大分值时,query和target对应字符串坐标的差值
|
||||
}
|
||||
else if (0) //(zdrop > 0) // 当前轮匹配之后取得的最大分值没有大于之前的最大值,而且zdrop值大于0
|
||||
else if (0) // zdrop > 0) // 当前轮匹配之后取得的最大分值没有大于之前的最大值,而且zdrop值大于0
|
||||
{
|
||||
if (i - max_i > mj - max_j)
|
||||
{
|
||||
|
|
@ -130,9 +132,10 @@ int ksw_normal(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
|
|||
beg = j;
|
||||
for (j = end; LIKELY(j >= beg) && eh[j].h == 0 && eh[j].e == 0; --j)
|
||||
;
|
||||
end = j + 2 < qlen ? j + 2 : qlen;
|
||||
beg = 0;
|
||||
end = qlen; // uncomment this line for debugging
|
||||
end = j + 2 < qlen ? j + 2 : qlen; // 剪枝没考虑f,即insert
|
||||
beg = 0, end = qlen; // uncomment this line for debugging
|
||||
// fprintf(stderr, "\n");
|
||||
// fprintf(stderr, "%d\n", end);
|
||||
}
|
||||
free(eh);
|
||||
free(qp);
|
||||
|
|
|
|||
65
main.c
65
main.c
|
|
@ -26,7 +26,13 @@ int64_t get_mseconds()
|
|||
|
||||
int64_t time_sw_normal = 0,
|
||||
time_sw_avx2 = 0,
|
||||
time_sw_avx2_u8 = 0;
|
||||
time_sw_avx2_u8 = 0,
|
||||
time_bsw_avx2 = 0,
|
||||
time_bsw_init = 0,
|
||||
time_bsw_main_loop = 0,
|
||||
time_bsw_find_max = 0,
|
||||
time_bsw_adjust_bound = 0,
|
||||
time_compare = 0;
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -111,18 +117,18 @@ int main(int argc, char *argv[])
|
|||
// const char *qf_path = "q.fa";
|
||||
// const char *tf_path = "t.fa";
|
||||
// const char *if_path = "i.txt";
|
||||
const char *qf_path = "bug_q.fa";
|
||||
const char *tf_path = "bug_t.fa";
|
||||
const char *if_path = "bug_i.txt";
|
||||
// const char *qf_path = "bug_q.fa";
|
||||
// const char *tf_path = "bug_t.fa";
|
||||
// const char *if_path = "bug_i.txt";
|
||||
// const char *qf_path = "/public/home/zzh/data/sw/q_s.fa";
|
||||
// const char *tf_path = "/public/home/zzh/data/sw/t_s.fa";
|
||||
// const char *if_path = "/public/home/zzh/data/sw/i_s.txt";
|
||||
// const char *qf_path = "/public/home/zzh/data/sw/q_m.fa";
|
||||
// const char *tf_path = "/public/home/zzh/data/sw/t_m.fa";
|
||||
// const char *if_path = "/public/home/zzh/data/sw/i_m.txt";
|
||||
// const char *qf_path = "/public/home/zzh/data/sw/q_l.fa";
|
||||
// const char *tf_path = "/public/home/zzh/data/sw/t_l.fa";
|
||||
// const char *if_path = "/public/home/zzh/data/sw/i_l.txt";
|
||||
const char *qf_path = "/public/home/zzh/data/sw/q_l.fa";
|
||||
const char *tf_path = "/public/home/zzh/data/sw/t_l.fa";
|
||||
const char *if_path = "/public/home/zzh/data/sw/i_l.txt";
|
||||
// const char *qf_path = "/public/home/zzh/data/sw/query.fa";
|
||||
// const char *tf_path = "/public/home/zzh/data/sw/target.fa";
|
||||
// const char *if_path = "/public/home/zzh/data/sw/info.txt";
|
||||
|
|
@ -134,6 +140,7 @@ int main(int argc, char *argv[])
|
|||
FILE *normal_out_f = fopen("normal_out.txt", "w");
|
||||
FILE *avx2_out_f = fopen("avx2_out.txt", "w");
|
||||
FILE *avx2_u8_out_f = fopen("avx2_u8_out.txt", "w");
|
||||
FILE *bsw_avx2_out_f = fopen("bsw_avx2_out.txt", "w");
|
||||
|
||||
// 每次读取一定量的数据,然后执行,直到处理完所有数据
|
||||
int total_line_num = 0; // 目前处理的总的数据行数
|
||||
|
|
@ -155,8 +162,8 @@ int main(int argc, char *argv[])
|
|||
j += 3;
|
||||
}
|
||||
|
||||
int score_normal = 0, score_avx2 = 0, score_avx2_u8 = 0;
|
||||
int score_normal_total = 0, score_avx2_total = 0, score_avx2_u8_total = 0;
|
||||
int score_normal = 0, score_avx2 = 0, score_avx2_u8 = 0, score_bsw_avx2 = 0;
|
||||
int score_normal_total = 0, score_avx2_total = 0, score_avx2_u8_total = 0, score_bsw_avx2_total = 0;
|
||||
|
||||
while (!feof(target_f))
|
||||
{
|
||||
|
|
@ -237,8 +244,28 @@ int main(int argc, char *argv[])
|
|||
#endif
|
||||
score_normal_total += score_normal;
|
||||
// fprintf(normal_out_f, "%d %d\n", info_arr[i][2], score_normal);
|
||||
fprintf(normal_out_f, "%d %d %d %d %d %d %d\n", info_arr[i][2], score_normal, qle, tle, gtle, gscore, max_off[0]);
|
||||
// fprintf(stderr, "%d %d %d %d %d %d %d\n", info_arr[i][2], score_normal, qle, tle, gtle, gscore, max_off[0]);
|
||||
|
||||
#ifdef SHOW_PERF
|
||||
start_time = get_mseconds();
|
||||
#endif
|
||||
score_bsw_avx2 = bsw_avx2(
|
||||
info_arr[i][0],
|
||||
(uint8_t *)query_arr + cur_query_pos,
|
||||
info_arr[i][1],
|
||||
(uint8_t *)target_arr + cur_target_pos,
|
||||
0, 5, mat, 6, 1, 6, 1,
|
||||
1, 4,
|
||||
100, 5, 100,
|
||||
info_arr[i][2],
|
||||
&qle, &tle, >le, &gscore, &max_off[0]);
|
||||
#ifdef SHOW_PERF
|
||||
time_bsw_avx2 += get_mseconds() - start_time;
|
||||
#endif
|
||||
score_bsw_avx2_total += score_bsw_avx2;
|
||||
// fprintf(avx2_out_f, "%d %d\n", info_arr[i][2], score_avx2);
|
||||
// fprintf(stderr, "%d %d %d %d %d %d %d\n", info_arr[i][2], score_bsw_avx2_total, qle, tle, gtle, gscore, max_off[0]);
|
||||
/*
|
||||
#ifdef SHOW_PERF
|
||||
start_time = get_mseconds();
|
||||
#endif
|
||||
|
|
@ -257,7 +284,8 @@ int main(int argc, char *argv[])
|
|||
#endif
|
||||
score_avx2_total += score_avx2;
|
||||
// fprintf(avx2_out_f, "%d %d\n", info_arr[i][2], score_avx2);
|
||||
fprintf(avx2_out_f, "%d %d %d %d %d %d %d\n", info_arr[i][2], score_avx2, qle, tle, gtle, gscore, max_off[0]);
|
||||
fprintf(stderr, "%d %d %d %d %d %d %d\n", info_arr[i][2], score_avx2, qle, tle, gtle, gscore, max_off[0]);
|
||||
*/
|
||||
#ifdef SHOW_PERF
|
||||
start_time = get_mseconds();
|
||||
#endif
|
||||
|
|
@ -275,8 +303,8 @@ int main(int argc, char *argv[])
|
|||
time_sw_avx2_u8 += get_mseconds() - start_time;
|
||||
#endif
|
||||
score_avx2_u8_total += score_avx2_u8;
|
||||
fprintf(avx2_u8_out_f, "%d %d %d %d %d %d\n", score_avx2_u8, qle, tle, gtle, gscore, max_off[0]);
|
||||
// 更新query和target位置信息
|
||||
// fprintf(avx2_u8_out_f, "%d %d %d %d %d %d\n", score_avx2_u8, qle, tle, gtle, gscore, max_off[0]);
|
||||
// 更新query和target位置信息
|
||||
cur_query_pos += info_arr[i][0];
|
||||
cur_target_pos += info_arr[i][1];
|
||||
// fprintf(stderr, "%d %d %d %d %d %d %d\n", score_normal, qle, tle, gtle, gscore, max_off[0], max_off[1]);
|
||||
|
|
@ -343,8 +371,15 @@ int main(int argc, char *argv[])
|
|||
|
||||
#ifdef SHOW_PERF
|
||||
fprintf(stderr, "time_sw_normal: %f s; score: %d\n", time_sw_normal / 1000.0, score_normal_total);
|
||||
fprintf(stderr, "time_sw_avx2: %f s; score: %d\n", time_sw_avx2 / 1000.0, score_avx2_total);
|
||||
fprintf(stderr, "time_bsw_avx2: %f s; score: %d\n", time_bsw_avx2 / 1000.0, score_bsw_avx2_total);
|
||||
// fprintf(stderr, "time_sw_avx2: %f s; score: %d\n", time_sw_avx2 / 1000.0, score_avx2_total);
|
||||
fprintf(stderr, "time_sw_avx2_u8: %f s; score: %d\n", time_sw_avx2_u8 / 1000.0, score_avx2_u8_total);
|
||||
|
||||
fprintf(stderr, "time_bsw_init: %f s\n", time_bsw_init / 1000.0);
|
||||
fprintf(stderr, "time_bsw_main_loop: %f s\n", (time_bsw_main_loop - time_compare) / 1000.0);
|
||||
fprintf(stderr, "time_bsw_find_max: %f s\n", (time_bsw_find_max - time_compare) / 1000.0);
|
||||
fprintf(stderr, "time_bsw_adjust_bound: %f s\n", (time_bsw_adjust_bound - time_compare) / 1000.0);
|
||||
fprintf(stderr, "time_compare: %f s\n", time_compare / 1000.0);
|
||||
#endif
|
||||
|
||||
if (query_f != 0)
|
||||
|
|
@ -359,4 +394,6 @@ int main(int argc, char *argv[])
|
|||
fclose(avx2_u8_out_f);
|
||||
if (normal_out_f != 0)
|
||||
fclose(normal_out_f);
|
||||
if (bsw_avx2_out_f != 0)
|
||||
fclose(bsw_avx2_out_f);
|
||||
}
|
||||
Loading…
Reference in New Issue