diff --git a/ksw_ext_avx2_aligned.c b/ksw_ext_avx2_aligned.c index e708ea7..9951ee9 100644 --- a/ksw_ext_avx2_aligned.c +++ b/ksw_ext_avx2_aligned.c @@ -389,6 +389,7 @@ int ksw_extend_avx2_aligned(thread_mem_t *tmem, hA2[end + 1] = fA2[end + 1]; mA2[end + 1] = 0; } + SIMD_FIND_MAX; // 注意最后跳出循环j的值 diff --git a/ksw_ext_normal.c b/ksw_ext_normal.c index 3492ce5..d8f3469 100644 --- a/ksw_ext_normal.c +++ b/ksw_ext_normal.c @@ -25,11 +25,11 @@ int ksw_extend_normal(thread_mem_t *tmem, int qlen, const uint8_t *query, int tl int8_t *qp; // query profile int i, j, k, oe_del = o_del + e_del, oe_ins = o_ins + e_ins, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off; assert(h0 > 0); - qp = malloc(qlen * m); - eh = calloc(qlen + 1, 8); - // qp = thread_mem_request(tmem, qlen * m); - // eh = thread_mem_request_and_clean(tmem, (qlen + 1) * 9); - // generate the query profile + // qp = malloc(qlen * m); + // eh = calloc(qlen + 1, 8); + qp = thread_mem_request(tmem, qlen * m); + eh = thread_mem_request_and_clean(tmem, (qlen + 1) * 8); + // generate the query profile for (k = i = 0; k < m; ++k) { const int8_t *p = &mat[k * m]; @@ -173,9 +173,9 @@ int ksw_extend_normal(thread_mem_t *tmem, int qlen, const uint8_t *query, int tl // fprintf(stderr, "\n"); // fprintf(stderr, "%d\n", end); } - free(eh); - free(qp); - // thread_mem_release(tmem, qlen * m + (qlen + 1) * 9); + // free(eh); + // free(qp); + thread_mem_release(tmem, qlen * m + (qlen + 1) * 8); if (_qle) *_qle = max_j + 1; if (_tle) diff --git a/main.c b/main.c index f5adbcc..ccb4a69 100644 --- a/main.c +++ b/main.c @@ -141,7 +141,7 @@ int main(int argc, char *argv[]) } // 记录计算出的分数 int score[KERNEL_NUM] = {0}; - int score_total[KERNEL_NUM] = {0}; + int64_t score_total[KERNEL_NUM] = {0}; // 读取测试数据 char *query_arr = (char *)malloc(SEQ_BUF_SIZE); @@ -242,18 +242,18 @@ int main(int argc, char *argv[]) PERFORMANCE_TEST_NORMAL(0, ksw_extend_normal); // avx2 - // PERFORMANCE_TEST_AVX2(1, ksw_extend_avx2); + PERFORMANCE_TEST_AVX2(1, ksw_extend_avx2); // avx2 heuristics - // PERFORMANCE_TEST_AVX2(2, ksw_extend_avx2_heuristics); + PERFORMANCE_TEST_AVX2(2, ksw_extend_avx2_heuristics); // avx2 mem aligned - // PERFORMANCE_TEST_AVX2(3, ksw_extend_avx2_aligned); + PERFORMANCE_TEST_AVX2(3, ksw_extend_avx2_aligned); // avx2 u8 - // PERFORMANCE_TEST_AVX2(4, ksw_extend_avx2_u8); + PERFORMANCE_TEST_AVX2(4, ksw_extend_avx2_u8); // avx2 u8 heuristics - // PERFORMANCE_TEST_AVX2(5, ksw_extend_avx2_u8_heuristics); + PERFORMANCE_TEST_AVX2(5, ksw_extend_avx2_u8_heuristics); // avx2 u8 mem aligned - // PERFORMANCE_TEST_AVX2(6, ksw_extend_avx2_u8_aligned); + PERFORMANCE_TEST_AVX2(6, ksw_extend_avx2_u8_aligned); } #ifdef SHOW_PERF @@ -268,7 +268,7 @@ int main(int argc, char *argv[]) for (i = 0; i < KERNEL_NUM; ++i) { - fprintf(stderr, "[%18s] time: %9.6f s; score: %d\n", kernel_names[i], time_sw[i] / DIVIDE_BY, score_total[i]); + fprintf(stderr, "[%18s] time: %9.6f s; score: %ld\n", kernel_names[i], time_sw[i] / DIVIDE_BY, score_total[i]); } #endif