解决了读数据的bug,和avx2的bug,保留了一些调试代码
This commit is contained in:
parent
2aeb566bf7
commit
78f791f3f2
|
|
@ -8,6 +8,14 @@
|
|||
"__split_buffer": "c",
|
||||
"string": "c",
|
||||
"cstdint": "c",
|
||||
"algorithm": "c"
|
||||
"algorithm": "c",
|
||||
"array": "c",
|
||||
"deque": "c",
|
||||
"unordered_map": "c",
|
||||
"string_view": "c",
|
||||
"initializer_list": "c",
|
||||
"__hash_table": "c",
|
||||
"ios": "c",
|
||||
"iterator": "c"
|
||||
}
|
||||
}
|
||||
2
Makefile
2
Makefile
|
|
@ -1,7 +1,7 @@
|
|||
CC= gcc
|
||||
#CFLAGS= -g -Wall -Wno-unused-function -mavx2
|
||||
CFLAGS= -Wall -Wno-unused-function -O2 -mavx2
|
||||
DFLAGS= -DSHOW_PERF -DDEBUG_OUT
|
||||
DFLAGS= -DSHOW_PERF -DDEBUG_RETURN_VALUE
|
||||
#DFLAGS= -DSHOW_PERF -DDEBUG_OUT -DDEBUG_RETURN_VALUE
|
||||
PROG= sw_perf
|
||||
INCLUDES=
|
||||
|
|
|
|||
144
ksw_ext_avx2.c
144
ksw_ext_avx2.c
|
|
@ -109,6 +109,13 @@ static const uint16_t h_vec_int_mask[SIMD_WIDTH][SIMD_WIDTH] = {
|
|||
fn_vec = _mm256_max_epi16(fn_vec, zero_vec); \
|
||||
mn_vec = _mm256_max_epi16(mn_vec, zero_vec); \
|
||||
hn_vec = _mm256_max_epi16(hn_vec, zero_vec);
|
||||
//int16_t *t_ptr = (int16_t *)&ts_vec; \
|
||||
//fprintf(stderr, "D: %d, ibeg: %d, iend: %d, jbeg: %d, jend: %d, %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d \n", \
|
||||
// D, ibeg, iend, beg, end, \
|
||||
// t_ptr[0], t_ptr[1], t_ptr[2], t_ptr[3], \
|
||||
// t_ptr[4], t_ptr[5], t_ptr[6], t_ptr[7], \
|
||||
// t_ptr[8], t_ptr[9], t_ptr[10], t_ptr[11], \
|
||||
// t_ptr[12], t_ptr[13], t_ptr[14], t_ptr[15]);
|
||||
|
||||
// 存储向量化结果
|
||||
#define SIMD_STORE \
|
||||
|
|
@ -192,7 +199,7 @@ int ksw_extend_avx2(thread_mem_t *tmem,
|
|||
uint8_t *mem;
|
||||
int16_t *qtmem, *vmem;
|
||||
int seq_size = qlen + SIMD_WIDTH, ref_size = tlen + SIMD_WIDTH;
|
||||
int i, iStart, D, j, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off;
|
||||
int i, ibeg, iend, D, j, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off;
|
||||
int Dloop = tlen + qlen; // 循环跳出条件
|
||||
int span, beg1, end1; // 边界条件计算
|
||||
int col_size = qlen + 2 + SIMD_WIDTH;
|
||||
|
|
@ -220,7 +227,7 @@ int ksw_extend_avx2(thread_mem_t *tmem,
|
|||
for (i = 0; i < qlen; ++i)
|
||||
seq[i] = query[i];
|
||||
for (i = 0; i < tlen; ++i)
|
||||
ref[i + SIMD_WIDTH] = target[i];
|
||||
ref[i + SIMD_WIDTH - 1] = target[i];
|
||||
}
|
||||
|
||||
vmem = &ref[ref_size];
|
||||
|
|
@ -258,13 +265,15 @@ int ksw_extend_avx2(thread_mem_t *tmem,
|
|||
// DP loop
|
||||
max = init_score, max_i = max_j = -1;
|
||||
max_ie = -1, gscore = -1;
|
||||
;
|
||||
|
||||
max_off = 0;
|
||||
beg = 1;
|
||||
end = qlen;
|
||||
// init init_score
|
||||
hA0[0] = init_score; // 左上角
|
||||
|
||||
fA1[1] = MAX(0, init_score - (o_ins + e_ins));
|
||||
eA2[0] = init_score;
|
||||
hA1[1] = fA1[1];
|
||||
if (qlen == 0 || tlen == 0)
|
||||
Dloop = 0; // 防止意外情况
|
||||
if (window_size >= qlen)
|
||||
|
|
@ -272,66 +281,70 @@ int ksw_extend_avx2(thread_mem_t *tmem,
|
|||
max_ie = 0;
|
||||
gscore = 0;
|
||||
}
|
||||
|
||||
int iend;
|
||||
|
||||
// fprintf(stderr, "qlen:%d, tlen:%d\n", qlen, tlen);
|
||||
#ifdef DEBUG_OUT
|
||||
int16_t ins[tlen + 1][qlen + 1];
|
||||
int16_t del[tlen + 1][qlen + 1];
|
||||
int16_t score[tlen + 1][qlen + 1];
|
||||
int dii, djj;
|
||||
int16_t ins[tlen + 1][qlen + 2];
|
||||
int16_t del[tlen + 1][qlen + 2];
|
||||
int16_t score[tlen + 1][qlen + 2];
|
||||
ins[0][0] = del[0][0] = score[0][0] = init_score;
|
||||
ins[0][1] = MAX(0, init_score - (o_ins + e_ins));
|
||||
del[1][0] = MAX(0, init_score - (o_del + e_del));
|
||||
score[0][1] = ins[0][1];
|
||||
score[1][0] = del[1][0];
|
||||
// fprintf(stderr, "%d %d\n", del[1][0], score[1][0]);
|
||||
#endif
|
||||
|
||||
for (D = 1; LIKELY(D < Dloop); ++D)
|
||||
{
|
||||
// 边界条件一定要注意! tlen 大于,等于,小于 qlen时的情况
|
||||
if (D > tlen)
|
||||
{
|
||||
span = MIN(Dloop - D, window_size);
|
||||
beg1 = MAX(D - tlen + 1, ((D - window_size) / 2) + 1);
|
||||
}
|
||||
if (D < tlen)
|
||||
beg1 = 1;
|
||||
else
|
||||
{
|
||||
span = MIN(D - 1, window_size);
|
||||
beg1 = MAX(1, ((D - window_size) / 2) + 1);
|
||||
}
|
||||
end1 = MIN(qlen, beg1 + span);
|
||||
beg1 = D - tlen + 1;
|
||||
if (D < qlen)
|
||||
end1 = D; // 闭区间
|
||||
else
|
||||
end1 = qlen;
|
||||
// beg1 = MAX(D - window_size, beg1);
|
||||
// end1 = MIN(D + window_size, end1);
|
||||
|
||||
beg = 1;
|
||||
end = qlen;
|
||||
|
||||
// if (beg < beg1)
|
||||
// beg = beg1;
|
||||
// if (end > end1)
|
||||
// end = end1;
|
||||
beg = MAX(beg1, beg);
|
||||
end = MIN(end1, end);
|
||||
// if (beg > end)
|
||||
// break; // 不用计算了,直接跳出,否则hA2没有被赋值,里边是上一轮hA0的值,会出bug
|
||||
// break;
|
||||
|
||||
iend = D - (beg - 1); // ref开始计算的位置,倒序
|
||||
beg = beg1;
|
||||
end = end1;
|
||||
|
||||
iend = D - beg; // ref开始计算的位置,倒序
|
||||
span = end - beg;
|
||||
iStart = iend - span - 1; // 0开始的ref索引位置
|
||||
ibeg = iend - span; // 0开始的ref索引位置
|
||||
|
||||
// fprintf(stderr, "D:%d, jbeg:%d, jend:%d, ibeg:%d, iend:%d\n", D, beg, end, ibeg, iend);
|
||||
|
||||
// 每一轮需要记录的数据
|
||||
int m = 0, mj = -1, mi = -1;
|
||||
max_vec = zero_vec;
|
||||
|
||||
// 要处理边界
|
||||
// 左边界 处理f (insert)
|
||||
if (iStart == 0)
|
||||
{
|
||||
hA1[end] = MAX(0, init_score - (o_ins + e_ins * end));
|
||||
}
|
||||
// 上边界
|
||||
// 处理左边界
|
||||
if (beg == 1)
|
||||
{
|
||||
hA1[0] = MAX(0, init_score - (o_del + e_del * iend));
|
||||
hA0[0] = eA2[0];
|
||||
mA1[0] = 0;
|
||||
eA1[0] = MAX(0, init_score - (o_del + e_del * (iend + 1)));
|
||||
#ifdef DEBUG_OUT
|
||||
del[iend + 1][0] = eA1[0];
|
||||
score[iend + 1][0] = eA1[0];
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
hA1[beg - 1] = 0;
|
||||
eA1[beg - 1] = 0;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_OUT
|
||||
// fprintf(stderr, "eA1: %d\n", eA1[0]);
|
||||
// for (djj = beg - 1; djj < end; ++djj)
|
||||
//{
|
||||
// fprintf(stderr, "%d ", hA0[djj]);
|
||||
//}
|
||||
// fprintf(stderr, "\n");
|
||||
#endif
|
||||
for (j = beg, i = iend; j <= end + 1 - SIMD_WIDTH; j += SIMD_WIDTH, i -= SIMD_WIDTH)
|
||||
{
|
||||
// 取数据
|
||||
|
|
@ -357,15 +370,39 @@ int ksw_extend_avx2(thread_mem_t *tmem,
|
|||
// 存储结果
|
||||
SIMD_STORE;
|
||||
}
|
||||
// 处理上边界
|
||||
if (ibeg == 0)
|
||||
{
|
||||
fA2[end + 1] = MAX(0, init_score - (o_ins + e_ins * (end + 1)));
|
||||
hA2[end + 1] = fA2[end + 1];
|
||||
mA2[end + 1] = 0;
|
||||
#ifdef DEBUG_OUT
|
||||
ins[0][end + 1] = fA2[end + 1];
|
||||
score[0][end + 1] = fA2[end + 1];
|
||||
#endif
|
||||
}
|
||||
|
||||
SIMD_FIND_MAX;
|
||||
|
||||
#ifdef DEBUG_OUT
|
||||
for (djj = beg; djj <= end; ++djj)
|
||||
{
|
||||
dii = D - djj + 1;
|
||||
// fprintf(stderr, "dii:%d, djj:%d, ", dii, djj);
|
||||
ins[dii][djj] = fA2[djj];
|
||||
del[dii][djj] = eA2[djj];
|
||||
score[dii][djj] = hA2[djj];
|
||||
}
|
||||
// fprintf(stderr, "\n");
|
||||
// fprintf(stderr, "%d, %d\n", hA2[0], hA2[1]);
|
||||
#endif
|
||||
|
||||
// 注意最后跳出循环j的值
|
||||
j = end + 1;
|
||||
|
||||
if (j == qlen + 1)
|
||||
{
|
||||
max_ie = gscore > hA2[qlen] ? max_ie : iStart;
|
||||
max_ie = gscore > hA2[qlen] ? max_ie : ibeg;
|
||||
gscore = gscore > hA2[qlen] ? gscore : hA2[qlen];
|
||||
}
|
||||
|
||||
|
|
@ -399,6 +436,21 @@ int ksw_extend_avx2(thread_mem_t *tmem,
|
|||
SWAP_DATA_POINTER;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_OUT
|
||||
for (dii = 0; dii <= tlen; ++dii)
|
||||
{
|
||||
for (djj = 0; djj <= qlen; ++djj)
|
||||
{
|
||||
fprintf(score_f_arr[1], "%-3d", score[dii][djj]);
|
||||
fprintf(ins_ext_f_arr[1], "%-3d", ins[dii][djj]);
|
||||
fprintf(del_ext_f_arr[1], "%-3d", del[dii][djj]);
|
||||
}
|
||||
fprintf(score_f_arr[1], "\n");
|
||||
fprintf(ins_ext_f_arr[1], "\n");
|
||||
fprintf(del_ext_f_arr[1], "\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
// free(mem);
|
||||
thread_mem_release(tmem, mem_size);
|
||||
if (_qle)
|
||||
|
|
|
|||
|
|
@ -221,7 +221,7 @@ int ksw_extend_avx2_u8(thread_mem_t *tmem,
|
|||
uint8_t *seq, *ref;
|
||||
uint8_t *mem, *qtmem, *vmem;
|
||||
int seq_size = qlen + SIMD_WIDTH, ref_size = tlen + SIMD_WIDTH;
|
||||
int i, iStart, D, j, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off;
|
||||
int i, ibeg, iend, D, j, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off;
|
||||
int Dloop = tlen + qlen; // 循环跳出条件
|
||||
int span, beg1, end1; // 边界条件计算
|
||||
int col_size = qlen + 2 + SIMD_WIDTH;
|
||||
|
|
@ -250,7 +250,7 @@ int ksw_extend_avx2_u8(thread_mem_t *tmem,
|
|||
for (i = 0; i < qlen; ++i)
|
||||
seq[i] = query[i];
|
||||
for (i = 0; i < tlen; ++i)
|
||||
ref[i + SIMD_WIDTH] = target[i];
|
||||
ref[i + SIMD_WIDTH - 1] = target[i];
|
||||
}
|
||||
|
||||
vmem = &ref[ref_size];
|
||||
|
|
@ -289,13 +289,15 @@ int ksw_extend_avx2_u8(thread_mem_t *tmem,
|
|||
// DP loop
|
||||
max = init_score, max_i = max_j = -1;
|
||||
max_ie = -1, gscore = -1;
|
||||
;
|
||||
|
||||
max_off = 0;
|
||||
beg = 1;
|
||||
end = qlen;
|
||||
// init init_score
|
||||
hA0[0] = init_score; // 左上角
|
||||
|
||||
fA1[1] = MAX(0, init_score - (o_ins + e_ins));
|
||||
eA2[0] = init_score;
|
||||
hA1[1] = fA1[1];
|
||||
if (qlen == 0 || tlen == 0)
|
||||
Dloop = 0; // 防止意外情况
|
||||
if (window_size >= qlen)
|
||||
|
|
@ -304,55 +306,42 @@ int ksw_extend_avx2_u8(thread_mem_t *tmem,
|
|||
gscore = 0;
|
||||
}
|
||||
|
||||
int iend;
|
||||
|
||||
for (D = 1; LIKELY(D < Dloop); ++D)
|
||||
{
|
||||
// 边界条件一定要注意! tlen 大于,等于,小于 qlen时的情况
|
||||
if (D > tlen)
|
||||
{
|
||||
span = MIN(Dloop - D, window_size);
|
||||
beg1 = MAX(D - tlen + 1, ((D - window_size) / 2) + 1);
|
||||
}
|
||||
if (D < tlen)
|
||||
beg1 = 1;
|
||||
else
|
||||
{
|
||||
span = MIN(D - 1, window_size);
|
||||
beg1 = MAX(1, ((D - window_size) / 2) + 1);
|
||||
}
|
||||
end1 = MIN(qlen, beg1 + span);
|
||||
beg1 = D - tlen + 1;
|
||||
if (D < qlen)
|
||||
end1 = D; // 闭区间
|
||||
else
|
||||
end1 = qlen;
|
||||
// beg1 = MAX(D - window_size, beg1);
|
||||
// end1 = MIN(D + window_size, end1);
|
||||
|
||||
// if (beg < beg1)
|
||||
// beg = beg1;
|
||||
// if (end > end1)
|
||||
// end = end1;
|
||||
beg = MAX(beg1, beg);
|
||||
end = MIN(end1, end);
|
||||
// if (beg > end)
|
||||
// break; // 不用计算了,直接跳出,否则hA2没有被赋值,里边是上一轮hA0的值,会出bug
|
||||
// break;
|
||||
|
||||
beg = 1;
|
||||
end = qlen;
|
||||
iend = D - (beg - 1); // ref开始计算的位置,倒序
|
||||
beg = beg1;
|
||||
end = end1;
|
||||
|
||||
iend = D - beg; // ref开始计算的位置,倒序
|
||||
span = end - beg;
|
||||
iStart = iend - span - 1; // 0开始的ref索引位置
|
||||
ibeg = iend - span; // 0开始的ref索引位置
|
||||
|
||||
// 每一轮需要记录的数据
|
||||
int m = 0, mj = -1, mi = -1;
|
||||
max_vec = zero_vec;
|
||||
|
||||
// 要处理边界
|
||||
// 左边界 处理f (insert)
|
||||
if (iStart == 0)
|
||||
{
|
||||
hA1[end] = MAX(0, init_score - (o_ins + e_ins * end));
|
||||
}
|
||||
// 上边界
|
||||
// 处理左边界
|
||||
if (beg == 1)
|
||||
{
|
||||
hA1[0] = MAX(0, init_score - (o_del + e_del * iend));
|
||||
}
|
||||
else
|
||||
{
|
||||
hA1[beg - 1] = 0;
|
||||
eA1[beg - 1] = 0;
|
||||
hA0[0] = eA2[0];
|
||||
mA1[0] = 0;
|
||||
eA1[0] = MAX(0, init_score - (o_del + e_del * (iend + 1)));
|
||||
}
|
||||
|
||||
for (j = beg, i = iend; j <= end + 1 - SIMD_WIDTH; j += SIMD_WIDTH, i -= SIMD_WIDTH)
|
||||
|
|
@ -380,6 +369,13 @@ int ksw_extend_avx2_u8(thread_mem_t *tmem,
|
|||
// 存储结果
|
||||
SIMD_STORE;
|
||||
}
|
||||
// 处理上边界
|
||||
if (ibeg == 0)
|
||||
{
|
||||
fA2[end + 1] = MAX(0, init_score - (o_ins + e_ins * (end + 1)));
|
||||
hA2[end + 1] = fA2[end + 1];
|
||||
mA2[end + 1] = 0;
|
||||
}
|
||||
|
||||
SIMD_FIND_MAX;
|
||||
|
||||
|
|
@ -388,7 +384,7 @@ int ksw_extend_avx2_u8(thread_mem_t *tmem,
|
|||
|
||||
if (j == qlen + 1)
|
||||
{
|
||||
max_ie = gscore > hA2[qlen] ? max_ie : iStart;
|
||||
max_ie = gscore > hA2[qlen] ? max_ie : ibeg;
|
||||
gscore = gscore > hA2[qlen] ? gscore : hA2[qlen];
|
||||
}
|
||||
if (m > max)
|
||||
|
|
|
|||
|
|
@ -74,14 +74,15 @@ int ksw_extend_normal(thread_mem_t *tmem, int qlen, const uint8_t *query, int tl
|
|||
for (i = 0; LIKELY(i < tlen); ++i) // 对target逐个字符进行遍历
|
||||
{
|
||||
int t, f = 0, h1, m = 0, mj = -1;
|
||||
int8_t *q = &qp[target[i] * qlen]; // 对于target第i个字符,query中每个字符的分值,只有匹配和不匹配
|
||||
// apply the band and the constraint (if provided)
|
||||
// if (beg < i - w) // 检查开始点是否可以缩小一些
|
||||
// beg = i - w;
|
||||
// if (end > i + w + 1) // 检查终点是否可以缩小,使得整体的遍历范围缩小
|
||||
// end = i + w + 1;
|
||||
// if (end > qlen) // 终点不超过query长度
|
||||
// end = qlen;
|
||||
// 对于target第i个字符,query中每个字符的分值,只有匹配和不匹配
|
||||
int8_t *q = &qp[target[i] * qlen];
|
||||
// apply the band and the constraint (if provided)
|
||||
// if (beg < i - w) // 检查开始点是否可以缩小一些
|
||||
// beg = i - w;
|
||||
// if (end > i + w + 1) // 检查终点是否可以缩小,使得整体的遍历范围缩小
|
||||
// end = i + w + 1;
|
||||
// if (end > qlen) // 终点不超过query长度
|
||||
// end = qlen;
|
||||
beg = 0;
|
||||
end = qlen;
|
||||
// compute the first column
|
||||
|
|
|
|||
5
main.c
5
main.c
|
|
@ -107,7 +107,7 @@ int read_seq_line(char *read_buf, FILE *f_ptr, char *out_arr)
|
|||
line_size--;
|
||||
}
|
||||
convert_char_to_2bit(read_buf);
|
||||
strncpy(out_arr, read_buf, line_size);
|
||||
memcpy(out_arr, read_buf, line_size);
|
||||
return line_size;
|
||||
}
|
||||
|
||||
|
|
@ -204,6 +204,7 @@ int main(int argc, char *argv[])
|
|||
while (!feof(target_f) && cur_read_size < BLOCK_BUF_SIZE)
|
||||
{
|
||||
int line_size = read_seq_line(read_buf, target_f, target_arr + cur_read_size);
|
||||
// fprintf(stderr, "line: %d\n", line_size);
|
||||
if (line_size == 0)
|
||||
break;
|
||||
cur_read_size += line_size;
|
||||
|
|
@ -248,7 +249,7 @@ int main(int argc, char *argv[])
|
|||
// PERFORMANCE_TEST_AVX2(3, ksw_extend_avx2_aligned);
|
||||
//
|
||||
// // avx2 u8
|
||||
// PERFORMANCE_TEST_AVX2(4, ksw_extend_avx2_u8);
|
||||
PERFORMANCE_TEST_AVX2(4, ksw_extend_avx2_u8);
|
||||
// // avx2 u8 heuristics
|
||||
// PERFORMANCE_TEST_AVX2(5, ksw_extend_avx2_u8_heuristics);
|
||||
// // avx2 u8 mem aligned
|
||||
|
|
|
|||
6
utils.c
6
utils.c
|
|
@ -9,6 +9,7 @@
|
|||
#include "utils.h"
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
unsigned char nst_nt4_table[256] = {
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
|
|
@ -34,6 +35,9 @@ char t_2bit2char[5] = {'A', 'C', 'G', 'T', 'N'};
|
|||
void convert_char_to_2bit(char *str)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < strlen(str); ++i)
|
||||
const int slen = strlen(str);
|
||||
for (i = 0; i < slen; ++i)
|
||||
{
|
||||
str[i] = nst_nt4_table[(uint8_t)str[i]];
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue