/********************************************************************************************* Description: sw extend functions in bwa-mem Copyright : All right reserved by NCIC.ICT Author : Zhang Zhonghai Date : 2024/04/08 ***********************************************************************************************/ #include #include #include "byte_alloc.h" #include "utils.h" #include "profiling.h" #include "extend.h" #include "debug.h" #define EXTEND_PERFORMANCE_TEST(kernel_id, func, sp, ep) \ do \ { \ PROF_START(extend); \ int i, score; \ for (i = sp; i < ep; ++i) \ { \ score = func( \ &bmem, \ kv_A(kv_A(i_arr, i), 0), kv_A(q_arr, i).a, \ kv_A(kv_A(i_arr, i), 1), kv_A(t_arr, i).a, \ 5, mat, 6, 1, 6, 1, 100, 5, 100, \ /*kv_A(kv_A(i_arr, i), 2)*/10, \ &qle, &tle, >le, &gscore, &max_off[0]); \ score_total[kernel_id] += score; \ } \ PROF_END(gprof[kernel_prof_idx[kernel_id]], extend); \ } while (0) /****** * 输入说明:三个文件,query.fa, target.fa, info.txt * query.fa: 每一行代表一个query序列,由ACGTN组成 * target.fa: 每一行代表一个reference(target)序列,由ACGTN组成 * info.txt: 每一行由三个数字组成,分别代表query序列长度,target序列长度,以及初始分数h0 */ int main_extend(int argc, char *argv[]) { if (argc < 3) { fprintf(stderr, "Need 3 files: query, target, info.\n"); return -1; } const char *qf_path = argv[0]; const char *tf_path = argv[1]; const char *if_path = argv[2]; FILE *qfp = fopen(qf_path, "r"); FILE *tfp = fopen(tf_path, "r"); FILE *ifp = fopen(if_path, "r"); buf_t read_buf = {0}; seq_v q_arr = {0}; seq_v t_arr = {0}; qti_v i_arr = {0}; uint64_t score_total[EXTEND_FUNC_NUM] = {0}; const int kmax_row = 3000000; int query_read_row = read_seq(&q_arr, &read_buf, kmax_row, qfp); int target_read_row = read_seq(&t_arr, &read_buf, kmax_row, tfp); int info_read_row = read_qt_info(&i_arr, &read_buf, kmax_row, 3, ifp); // fprintf(stderr, "read row: %d\t%d\t%d\n", query_read_row, target_read_row, info_read_row); int8_t mat[25] = {1, -4, -4, -4, -1, -4, 1, -4, -4, -1, -4, -4, 1, -4, -1, -4, -4, -4, 1, -1, -1, -1, -1, -1, -1}; int kernel_prof_idx[] = {G_EXT_SCALAR, G_EXT_AVX2_I16, G_EXT_AVX2_U8, G_EXT_AVX2_I16_SP}; byte_mem_t bmem = {0}; byte_mem_init_alloc(&bmem, 1024 * 1024); int max_off[2], qle, tle, gtle, gscore; int excute_lines = MIN(MIN(query_read_row, target_read_row), info_read_row); //open_qti_files(); //open_debug_files(); fprintf(stderr, "excute nums: %d\n", excute_lines); EXTEND_PERFORMANCE_TEST(0, extend_scalar, 0, excute_lines); EXTEND_PERFORMANCE_TEST(1, extend_avx2_i16, 0, excute_lines); EXTEND_PERFORMANCE_TEST(2, extend_avx2_u8, 0, excute_lines); EXTEND_PERFORMANCE_TEST(3, extend_avx2_i16_sp, 0, excute_lines); int i = 0; for(; i