diff --git a/Makefile b/Makefile index 5d685ea..a41c8fa 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,10 @@ CC= gcc #CFLAGS= -g -Wall -Wno-unused-function -mavx2 CFLAGS= -Wall -Wno-unused-function -O2 -mavx2 -DFLAGS= -DSHOW_PERF +DFLAGS= -DSHOW_PERF -DDEBUG_RETURN_VALUE #DFLAGS= -DSHOW_PERF -DDEBUG_OUT -DDEBUG_RETURN_VALUE PROG= sw_perf +PROG2= get_line INCLUDES= LIBS= SUBDIRS= . @@ -30,9 +31,12 @@ endif all:$(PROG) -sw_perf:$(OBJS) main.o +$(PROG):$(OBJS) main.o $(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) main.o -o $@ -L. $(LIBS) +$(PROG2): get_line.o + $(CC) $(CFLAGS) $(LDFLAGS) get_line.o -o $@ -L. $(LIBS) + clean: rm -f *.o a.out $(PROG) *~ *.a diff --git a/get_line b/get_line new file mode 100755 index 0000000..b583d57 Binary files /dev/null and b/get_line differ diff --git a/get_line.c b/get_line.c new file mode 100644 index 0000000..6712d5d --- /dev/null +++ b/get_line.c @@ -0,0 +1,61 @@ +/********************************************************************************************* + Description: Get specific line of a txt file + + Copyright : All right reserved by NCIC.ICT + + Author : Zhang Zhonghai + Date : 2023/08/28 +***********************************************************************************************/ +#include +#include +#include + +#define READ_BUF_SIZE 102400000 + +int main(int argc, char *argv[]) +{ + if (argc < 3) + { + fprintf(stderr, "Please input the file path and the line number you want to get!\n"); + return 1; + } + const char *in_path = argv[1]; + size_t expect_line_num = atoi(argv[2]); + FILE *in_f = fopen(in_path, "r"); + char *read_buf = (char *)malloc(READ_BUF_SIZE); + + int actual_size = 0; + int expect_size = READ_BUF_SIZE; + size_t cur_line_num = 0; + size_t last_line_pos = 0; + size_t cur_line_pos = 0; + int i, j; + int flag_found = 0; + // 读取数据 + actual_size = fread(read_buf, 1, expect_size, in_f); + while (actual_size > 0) + { + for (i = 0; i < actual_size; ++i) + if (read_buf[i] == '\n') + { + cur_line_num += 1; + last_line_pos = cur_line_pos; + cur_line_pos = i; + if (cur_line_num == expect_line_num) + { + for (j = last_line_pos + 1; j <= cur_line_pos; ++j) + { + fprintf(stdout, "%c", read_buf[j]); + } + flag_found = 1; + break; + } + } + if (flag_found) + break; + actual_size = fread(read_buf, 1, expect_size, in_f); + } + if (!flag_found) + fprintf(stderr, "Invalid line number!\n"); + return 0; +} \ No newline at end of file diff --git a/ksw_ext_avx2.c b/ksw_ext_avx2.c index 498b098..a35cff8 100644 --- a/ksw_ext_avx2.c +++ b/ksw_ext_avx2.c @@ -289,6 +289,13 @@ int ksw_extend_avx2(thread_mem_t *tmem, int16_t ins[tlen + 1][qlen + 2]; int16_t del[tlen + 1][qlen + 2]; int16_t score[tlen + 1][qlen + 2]; + for (dii = 0; dii <= tlen; ++dii) + { + for (djj = 0; djj <= qlen; ++djj) + { + ins[dii][djj] = del[dii][djj] = score[dii][djj] = 0; + } + } ins[0][0] = del[0][0] = score[0][0] = init_score; ins[0][1] = MAX(0, init_score - (o_ins + e_ins)); del[1][0] = MAX(0, init_score - (o_del + e_del)); @@ -307,16 +314,15 @@ int ksw_extend_avx2(thread_mem_t *tmem, end1 = D; // 闭区间 else end1 = qlen; - beg1 = MAX(D - window_size, beg1); - end1 = MIN(D + window_size, end1); + // beg1 = MAX(D - window_size, beg1); + // end1 = MIN(D + window_size, end1); + // beg = MAX(beg1, beg); + // end = MIN(end1, end); + // if (beg > end) + // break; - beg = MAX(beg1, beg); - end = MIN(end1, end); - if (beg > end) - break; - - // beg = beg1; - // end = end1; + beg = beg1; + end = end1; iend = D - beg; // ref开始计算的位置,倒序 span = end - beg; @@ -415,20 +421,20 @@ int ksw_extend_avx2(thread_mem_t *tmem, } // 调整计算的边界 - for (j = beg; LIKELY(j <= end); ++j) - { - int has_val = hA1[j - 1] | hA2[j]; - if (has_val) - break; - } - beg = j; - for (j = end + 1; LIKELY(j >= beg); --j) - { - int has_val = hA1[j - 1] | hA2[j]; - if (has_val) - break; - } - end = j + 1 <= qlen ? j + 1 : qlen; + // for (j = beg; LIKELY(j <= end); ++j) + //{ + // int has_val = hA1[j - 1] | hA2[j]; + // if (has_val) + // break; + //} + // beg = j; + // for (j = end + 1; LIKELY(j >= beg); --j) + //{ + // int has_val = hA1[j - 1] | hA2[j]; + // if (has_val) + // break; + //} + // end = j + 1 <= qlen ? j + 1 : qlen; // beg = 0; // end = qlen; // uncomment this line for debugging @@ -441,9 +447,9 @@ int ksw_extend_avx2(thread_mem_t *tmem, { for (djj = 0; djj <= qlen; ++djj) { - fprintf(score_f_arr[1], "%-3d", score[dii][djj]); - fprintf(ins_ext_f_arr[1], "%-3d", ins[dii][djj]); - fprintf(del_ext_f_arr[1], "%-3d", del[dii][djj]); + fprintf(score_f_arr[1], "%-4d", score[dii][djj]); + fprintf(ins_ext_f_arr[1], "%-4d", ins[dii][djj]); + fprintf(del_ext_f_arr[1], "%-4d", del[dii][djj]); } fprintf(score_f_arr[1], "\n"); fprintf(ins_ext_f_arr[1], "\n"); diff --git a/ksw_ext_avx2_u8.c b/ksw_ext_avx2_u8.c index 5053051..2a3ba43 100644 --- a/ksw_ext_avx2_u8.c +++ b/ksw_ext_avx2_u8.c @@ -317,16 +317,15 @@ int ksw_extend_avx2_u8(thread_mem_t *tmem, end1 = D; // 闭区间 else end1 = qlen; - beg1 = MAX(D - window_size, beg1); - end1 = MIN(D + window_size, end1); + // beg1 = MAX(D - window_size, beg1); + // end1 = MIN(D + window_size, end1); + // beg = MAX(beg1, beg); + // end = MIN(end1, end); + // if (beg > end) + // break; - beg = MAX(beg1, beg); - end = MIN(end1, end); - if (beg > end) - break; - - // beg = beg1; - // end = end1; + beg = beg1; + end = end1; iend = D - beg; // ref开始计算的位置,倒序 span = end - beg; @@ -394,22 +393,22 @@ int ksw_extend_avx2_u8(thread_mem_t *tmem, } // 调整计算的边界 - for (j = beg; LIKELY(j <= end); ++j) - { - int has_val = hA1[j - 1] | hA2[j]; - if (has_val) - break; - } - beg = j; - for (j = end + 1; LIKELY(j >= beg); --j) - { - int has_val = hA1[j - 1] | hA2[j]; - if (has_val) - break; - else - hA0[j - 1] = 0; - } - end = j + 1 <= qlen ? j + 1 : qlen; + // for (j = beg; LIKELY(j <= end); ++j) + //{ + // int has_val = hA1[j - 1] | hA2[j]; + // if (has_val) + // break; + //} + // beg = j; + // for (j = end + 1; LIKELY(j >= beg); --j) + //{ + // int has_val = hA1[j - 1] | hA2[j]; + // if (has_val) + // break; + // else + // hA0[j - 1] = 0; + //} + // end = j + 1 <= qlen ? j + 1 : qlen; // swap m, h, e, f SWAP_DATA_POINTER; diff --git a/ksw_ext_normal.c b/ksw_ext_normal.c index d8f3469..c0facc2 100644 --- a/ksw_ext_normal.c +++ b/ksw_ext_normal.c @@ -58,14 +58,14 @@ int ksw_extend_normal(thread_mem_t *tmem, int qlen, const uint8_t *query, int tl max_off = 0; beg = 0, end = qlen; #ifdef DEBUG_OUT - fprintf(score_f_arr[0], "%-3d", h0); - fprintf(ins_ext_f_arr[0], "%-3d", h0); - fprintf(del_ext_f_arr[0], "%-3d", h0); + fprintf(score_f_arr[0], "%-4d", h0); + fprintf(ins_ext_f_arr[0], "%-4d", h0); + fprintf(del_ext_f_arr[0], "%-4d", h0); for (j = 0; LIKELY(j < end); ++j) // 遍历query字符序列 { - fprintf(score_f_arr[0], "%-3d", MAX(h0 - o_ins - (j + 1) * e_ins, 0)); - fprintf(ins_ext_f_arr[0], "%-3d", MAX(h0 - o_ins - (j + 1) * e_ins, 0)); - fprintf(del_ext_f_arr[0], "%-3d", 0); + fprintf(score_f_arr[0], "%-4d", MAX(h0 - o_ins - (j + 1) * e_ins, 0)); + fprintf(ins_ext_f_arr[0], "%-4d", MAX(h0 - o_ins - (j + 1) * e_ins, 0)); + fprintf(del_ext_f_arr[0], "%-4d", 0); } fprintf(score_f_arr[0], "\n"); fprintf(ins_ext_f_arr[0], "\n"); @@ -83,8 +83,8 @@ int ksw_extend_normal(thread_mem_t *tmem, int qlen, const uint8_t *query, int tl end = i + w + 1; if (end > qlen) // 终点不超过query长度 end = qlen; - // beg = 0; - // end = qlen; + beg = 0; + end = qlen; // compute the first column if (beg == 0) { @@ -95,15 +95,15 @@ int ksw_extend_normal(thread_mem_t *tmem, int qlen, const uint8_t *query, int tl else h1 = 0; #ifdef DEBUG_OUT - fprintf(ins_ext_f_arr[0], "%-3d", 0); - fprintf(del_ext_f_arr[0], "%-3d", MAX(h0 - o_del - (i + 1) * e_del, 0)); + fprintf(ins_ext_f_arr[0], "%-4d", 0); + fprintf(del_ext_f_arr[0], "%-4d", MAX(h0 - o_del - (i + 1) * e_del, 0)); #endif for (j = beg; LIKELY(j < end); ++j) // 遍历query字符序列 { #ifdef DEBUG_OUT - fprintf(score_f_arr[0], "%-3d", h1); - fprintf(ins_ext_f_arr[0], "%-3d", f); - fprintf(del_ext_f_arr[0], "%-3d", eh[j].e); + fprintf(score_f_arr[0], "%-4d", h1); + fprintf(ins_ext_f_arr[0], "%-4d", f); + fprintf(del_ext_f_arr[0], "%-4d", eh[j].e); #endif // At the beginning of the loop: eh[j] = { H(i-1,j-1), E(i,j) }, f = F(i,j) and h1 = H(i,j-1) // Similar to SSE2-SW, cells are computed in the following order: @@ -130,7 +130,7 @@ int ksw_extend_normal(thread_mem_t *tmem, int qlen, const uint8_t *query, int tl f = f > t ? f : t; // computed F(i,j+1) } #ifdef DEBUG_OUT - fprintf(score_f_arr[0], "%-3d", h1); + fprintf(score_f_arr[0], "%-4d", h1); fprintf(score_f_arr[0], "\n"); fprintf(ins_ext_f_arr[0], "\n"); fprintf(del_ext_f_arr[0], "\n"); @@ -142,8 +142,8 @@ int ksw_extend_normal(thread_mem_t *tmem, int qlen, const uint8_t *query, int tl max_ie = gscore > h1 ? max_ie : i; // max_ie表示取得全局最大分值时,target字符串的位置 gscore = gscore > h1 ? gscore : h1; } - if (m == 0) // 遍历完query之后,当前轮次的最大分值为0,则跳出循环 - break; + // if (m == 0) // 遍历完query之后,当前轮次的最大分值为0,则跳出循环 + // break; if (m > max) // 当前轮最大分值大于之前的最大分值 { max = m, max_i = i, max_j = mj; // 更新取得最大值的target和query的位置 @@ -163,15 +163,15 @@ int ksw_extend_normal(thread_mem_t *tmem, int qlen, const uint8_t *query, int tl } } // update beg and end for the next round - for (j = beg; LIKELY(j < end) && eh[j].h == 0 && eh[j].e == 0; ++j) - ; - beg = j; - for (j = end; LIKELY(j >= beg) && eh[j].h == 0 && eh[j].e == 0; --j) - ; - end = j + 2 < qlen ? j + 2 : qlen; // 剪枝没考虑f,即insert - // beg = 0, end = qlen; // uncomment this line for debugging - // fprintf(stderr, "\n"); - // fprintf(stderr, "%d\n", end); + // for (j = beg; LIKELY(j < end) && eh[j].h == 0 && eh[j].e == 0; ++j) + // ; + // beg = j; + // for (j = end; LIKELY(j >= beg) && eh[j].h == 0 && eh[j].e == 0; --j) + // ; + // end = j + 2 < qlen ? j + 2 : qlen; // 剪枝没考虑f,即insert + // beg = 0, end = qlen; // uncomment this line for debugging + // fprintf(stderr, "\n"); + // fprintf(stderr, "%d\n", end); } // free(eh); // free(qp); diff --git a/main.c b/main.c index ccb4a69..866ae44 100644 --- a/main.c +++ b/main.c @@ -1,3 +1,11 @@ +/********************************************************************************************* + Description: The entry for sw performance tests + + Copyright : All right reserved by NCIC.ICT + + Author : Zhang Zhonghai + Date : 2023/08/20 +***********************************************************************************************/ #include #include #include @@ -31,10 +39,43 @@ int64_t time_sw[KERNEL_NUM] = {0}; #endif #ifdef DEBUG_RETURN_VALUE -#define OUTPUT_RETVAL(kernel_num) \ +#define OUTPUT_RETVAL_1(kernel_num) \ + fprintf(retval_f_arr[kernel_num], "%d\t%d\t%d\t%d\t", score[kernel_num], info_arr[i][0], info_arr[i][1], info_arr[i][2]); \ + for (j = cur_query_pos - info_arr[i][0]; j < cur_query_pos; ++j) \ + { \ + fprintf(retval_f_arr[kernel_num], "%c", t_2bit2char[(uint8_t)query_arr[j]]); \ + } \ + fprintf(retval_f_arr[kernel_num], "\t"); \ + for (j = cur_target_pos - info_arr[i][1]; j < cur_target_pos; ++j) \ + { \ + fprintf(retval_f_arr[kernel_num], "%c", t_2bit2char[(uint8_t)target_arr[j]]); \ + } \ + fprintf(retval_f_arr[kernel_num], "\n"); + +#define OUTPUT_RETVAL_SCORE \ fprintf(retval_f_arr[kernel_num], "%d\t%d\t%d\t%d\t%d\t%d\n", score[kernel_num], qle, tle, gtle, gscore, max_off[0]) +// fprintf(retval_f_arr[kernel_num], " %d %d\n", cur_query_pos, info_arr[i][0]); +// fprintf(retval_f_arr[kernel_num], "%d\t%d\t%d\t%d\n", score[kernel_num], info_arr[i][0], info_arr[i][1], info_arr[i][2]) + +#define OUTPUT_RETVAL_target(kernel_num) \ + for (j = cur_target_pos - info_arr[i][1]; j < cur_target_pos; ++j) \ + { \ + fprintf(retval_f_arr[kernel_num], "%c", t_2bit2char[(uint8_t)target_arr[j]]); \ + } \ + fprintf(retval_f_arr[kernel_num], "\n"); + +#define OUTPUT_RETVAL_query(kernel_num) \ + for (j = cur_query_pos - info_arr[i][0]; j < cur_query_pos; ++j) \ + { \ + fprintf(retval_f_arr[kernel_num], "%c", t_2bit2char[(uint8_t)query_arr[j]]); \ + } \ + fprintf(retval_f_arr[kernel_num], "\n"); + +#define OUTPUT_RETVAL_INFO(kernel_num) \ + fprintf(retval_f_arr[kernel_num], "%-8d%-8d%-8d\n", info_arr[i][0], info_arr[i][1], info_arr[i][2]); +#define OUTPUT_RETVAL(kernel_num) OUTPUT_RETVAL_target(kernel_num) #else -#define OUTPUT_RETVAL(out_f) +#define OUTPUT_RETVAL(kernel_num) #endif #define _PERFORMANCE_TEST_NORMAL(kernel_num, func) \ @@ -78,16 +119,24 @@ int64_t time_sw[KERNEL_NUM] = {0}; cur_target_pos += info_arr[i][1]; \ OUTPUT_RETVAL(kernel_num); \ } - +#define _PERFORMANCE_TEST_AVX2_T1(kernel_num, func) \ + cur_query_pos = 0; \ + cur_target_pos = 0; \ + for (i = 0; i < block_line_num; ++i) \ + { \ + cur_query_pos += info_arr[i][0]; \ + cur_target_pos += info_arr[i][1]; \ + OUTPUT_RETVAL(kernel_num); \ + } #ifdef SHOW_PERF #define PERFORMANCE_TEST_NORMAL(kernel_num, func) \ start_time = get_mseconds(); \ _PERFORMANCE_TEST_NORMAL(kernel_num, func); \ time_sw[kernel_num] += get_mseconds() - start_time -#define PERFORMANCE_TEST_AVX2(kernel_num, func) \ - start_time = get_mseconds(); \ - _PERFORMANCE_TEST_AVX2(kernel_num, func); \ +#define PERFORMANCE_TEST_AVX2(kernel_num, func) \ + start_time = get_mseconds(); \ + _PERFORMANCE_TEST_AVX2_T1(kernel_num, func); \ time_sw[kernel_num] += get_mseconds() - start_time #else #define PERFORMANCE_TEST_NORMAL(kernel_num, func) _PERFORMANCE_TEST_NORMAL(kernel_num, func) @@ -98,7 +147,7 @@ int64_t time_sw[KERNEL_NUM] = {0}; int read_seq_line(char *read_buf, FILE *f_ptr, char *out_arr) { if (fgets(read_buf, READ_BUF_SIZE, f_ptr) == NULL) - return 0; + return -1; int line_size = strlen(read_buf); assert(line_size < READ_BUF_SIZE); if (read_buf[line_size - 1] == '\n') @@ -152,8 +201,8 @@ int main(int argc, char *argv[]) FILE *query_f = 0, *target_f = 0, *info_f = 0; // 每次读取一定量的数据,然后执行,直到处理完所有数据 - int total_line_num = 0; // 目前处理的总的数据行数 - int block_line_num = 0; // 当前循环包含的数据行数 + int64_t total_line_num = 0; // 目前处理的总的数据行数 + int block_line_num = 0; // 当前循环包含的数据行数 int cur_query_pos, cur_target_pos; int64_t start_time; char read_buf[READ_BUF_SIZE]; // 读文件缓存 @@ -204,8 +253,14 @@ int main(int argc, char *argv[]) while (!feof(target_f) && cur_read_size < BLOCK_BUF_SIZE) { int line_size = read_seq_line(read_buf, target_f, target_arr + cur_read_size); - // fprintf(stderr, "line: %d\n", line_size); - if (line_size == 0) + // for (j = 0; j < line_size; ++j) + //{ + // // fprintf(stderr, "%c", t_2bit2char[(uint8_t)read_buf[j]]); + // fprintf(stderr, "%c", t_2bit2char[(uint8_t)target_arr[j + cur_read_size]]); + // } + // fprintf(stderr, "\n"); + // fprintf(retval_f_arr[1], "%d\n", line_size); + if (line_size == -1) break; cur_read_size += line_size; ++block_line_num; @@ -217,7 +272,13 @@ int main(int argc, char *argv[]) for (i = 0; i < block_line_num; ++i) { int line_size = read_seq_line(read_buf, query_f, query_arr + cur_read_size); - if (line_size == 0) + // int j; + // for (j = cur_read_size; j < cur_read_size + line_size; ++j) + //{ + // fprintf(retval_f_arr[0], "%c", t_2bit2char[(uint8_t)query_arr[j]]); + // } + // fprintf(retval_f_arr[0], "\n"); + if (line_size == -1) break; cur_read_size += line_size; } @@ -236,26 +297,43 @@ int main(int argc, char *argv[]) // fprintf(stderr, "%s\n", read_buf); } + cur_read_size = 0; + for (i = 0; i < block_line_num; ++i) + { + for (j = cur_read_size; j < cur_read_size + info_arr[i][1]; ++j) + { + fprintf(retval_f_arr[0], "%c", t_2bit2char[(uint8_t)target_arr[j]]); + } + fprintf(retval_f_arr[0], "\n"); + cur_read_size += info_arr[i][1]; + } + // for (i = 0; i < block_line_num; ++i) + //{ + // fprintf(retval_f_arr[0], "%d\n", info_arr[i][1]); + //} + // 性能测试 // normal sw - PERFORMANCE_TEST_NORMAL(0, ksw_extend_normal); + // PERFORMANCE_TEST_NORMAL(0, ksw_extend_normal); // avx2 - PERFORMANCE_TEST_AVX2(1, ksw_extend_avx2); + // PERFORMANCE_TEST_AVX2(1, ksw_extend_avx2); // avx2 heuristics - PERFORMANCE_TEST_AVX2(2, ksw_extend_avx2_heuristics); + // PERFORMANCE_TEST_AVX2(2, ksw_extend_avx2_heuristics); // avx2 mem aligned - PERFORMANCE_TEST_AVX2(3, ksw_extend_avx2_aligned); + // PERFORMANCE_TEST_AVX2(3, ksw_extend_avx2_aligned); // avx2 u8 - PERFORMANCE_TEST_AVX2(4, ksw_extend_avx2_u8); + // PERFORMANCE_TEST_AVX2(4, ksw_extend_avx2_u8); // avx2 u8 heuristics - PERFORMANCE_TEST_AVX2(5, ksw_extend_avx2_u8_heuristics); + // PERFORMANCE_TEST_AVX2(5, ksw_extend_avx2_u8_heuristics); // avx2 u8 mem aligned - PERFORMANCE_TEST_AVX2(6, ksw_extend_avx2_u8_aligned); + // PERFORMANCE_TEST_AVX2(6, ksw_extend_avx2_u8_aligned); } + fprintf(stderr, "%ld\n", total_line_num); + #ifdef SHOW_PERF char *kernel_names[7] = { "normal", diff --git a/run_debug.sh b/run_debug.sh index 17cc634..7e78b2f 100755 --- a/run_debug.sh +++ b/run_debug.sh @@ -1,2 +1,4 @@ #!/bin/bash -/home/zzh/work/sw_perf/sw_perf /home/zzh/work/sw_perf/input/q.fa /home/zzh/work/sw_perf/input/t.fa /home/zzh/work/sw_perf/input/i.txt \ No newline at end of file +#/home/zzh/work/sw_perf/sw_perf /home/zzh/work/sw_perf/input/q.fa /home/zzh/work/sw_perf/input/t.fa /home/zzh/work/sw_perf/input/i.txt +/home/zzh/work/sw_perf/sw_perf /home/zzh/work/sw_perf/input/q_d.fa /home/zzh/work/sw_perf/input/t_d.fa /home/zzh/work/sw_perf/input/i_d.txt + diff --git a/utils.h b/utils.h index 79ca7f7..6edb9ec 100644 --- a/utils.h +++ b/utils.h @@ -9,6 +9,10 @@ #ifndef __UTILS_H #define __UTILS_H +extern char t_2bit2char[5]; + +extern unsigned char nst_nt4_table[256]; + // 将碱基字符转成2位编码 void convert_char_to_2bit(char *str);