sw_perf/extend.c

100 lines
4.3 KiB
C
Raw Permalink Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

/*********************************************************************************************
Description: sw extend functions in bwa-mem
Copyright : All right reserved by NCIC.ICT
Author : Zhang Zhonghai
Date : 2024/04/08
***********************************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include "byte_alloc.h"
#include "utils.h"
#include "profiling.h"
#include "extend.h"
#include "debug.h"
#define EXTEND_PERFORMANCE_TEST(kernel_id, func, sp, ep) \
do \
{ \
PROF_START(extend); \
int i, score; \
for (i = sp; i < ep; ++i) \
{ \
score = func( \
&bmem, \
kv_A(kv_A(i_arr, i), 0), kv_A(q_arr, i).a, \
kv_A(kv_A(i_arr, i), 1), kv_A(t_arr, i).a, \
5, mat, 6, 1, 6, 1, 100, 5, 100, \
/*kv_A(kv_A(i_arr, i), 2)*/10, \
&qle, &tle, &gtle, &gscore, &max_off[0]); \
score_total[kernel_id] += score; \
} \
PROF_END(gprof[kernel_prof_idx[kernel_id]], extend); \
} while (0)
/******
* 输入说明三个文件query.fa, target.fa, info.txt
* query.fa: 每一行代表一个query序列由ACGTN组成
* target.fa: 每一行代表一个referencetarget序列由ACGTN组成
* info.txt: 每一行由三个数字组成分别代表query序列长度target序列长度以及初始分数h0
*/
int main_extend(int argc, char *argv[])
{
if (argc < 3) {
fprintf(stderr, "Need 3 files: query, target, info.\n");
return -1;
}
const char *qf_path = argv[0];
const char *tf_path = argv[1];
const char *if_path = argv[2];
FILE *qfp = fopen(qf_path, "r");
FILE *tfp = fopen(tf_path, "r");
FILE *ifp = fopen(if_path, "r");
buf_t read_buf = {0};
seq_v q_arr = {0};
seq_v t_arr = {0};
qti_v i_arr = {0};
uint64_t score_total[EXTEND_FUNC_NUM] = {0};
const int kmax_row = 3000000;
int query_read_row = read_seq(&q_arr, &read_buf, kmax_row, qfp);
int target_read_row = read_seq(&t_arr, &read_buf, kmax_row, tfp);
int info_read_row = read_qt_info(&i_arr, &read_buf, kmax_row, 3, ifp);
// fprintf(stderr, "read row: %d\t%d\t%d\n", query_read_row, target_read_row, info_read_row);
int8_t mat[25] = {1, -4, -4, -4, -1,
-4, 1, -4, -4, -1,
-4, -4, 1, -4, -1,
-4, -4, -4, 1, -1,
-1, -1, -1, -1, -1};
int kernel_prof_idx[] = {G_EXT_SCALAR, G_EXT_AVX2_I16, G_EXT_AVX2_U8, G_EXT_AVX2_I16_SP};
byte_mem_t bmem = {0};
byte_mem_init_alloc(&bmem, 1024 * 1024);
int max_off[2], qle, tle, gtle, gscore;
int excute_lines = MIN(MIN(query_read_row, target_read_row), info_read_row);
//open_qti_files();
//open_debug_files();
fprintf(stderr, "excute nums: %d\n", excute_lines);
EXTEND_PERFORMANCE_TEST(0, extend_scalar, 0, excute_lines);
EXTEND_PERFORMANCE_TEST(1, extend_avx2_i16, 0, excute_lines);
EXTEND_PERFORMANCE_TEST(2, extend_avx2_u8, 0, excute_lines);
EXTEND_PERFORMANCE_TEST(3, extend_avx2_i16_sp, 0, excute_lines);
int i = 0; for(; i<ARRAY_SIZE(kernel_prof_idx); ++i) { gdata[kernel_prof_idx[i]] = score_total[i]; }
#ifdef SHOW_PERF
fprintf(stderr, "[extend scalar ] time: %9.6lf s; score: %ld\n", gprof[G_EXT_SCALAR] / TIME_DIVIDE_BY, gdata[G_EXT_SCALAR]);
fprintf(stderr, "[extend avx i16] time: %9.6lf s; score: %ld\n", gprof[G_EXT_AVX2_I16] / TIME_DIVIDE_BY, gdata[G_EXT_AVX2_I16]);
fprintf(stderr, "[extend avx u8 ] time: %9.6lf s; score: %ld\n", gprof[G_EXT_AVX2_U8] / TIME_DIVIDE_BY, gdata[G_EXT_AVX2_U8]);
fprintf(stderr, "[extend avx sp ] time: %9.6lf s; score: %ld\n", gprof[G_EXT_AVX2_I16_SP] / TIME_DIVIDE_BY, gdata[G_EXT_AVX2_I16_SP]);
#endif
// close_files();
fclose(qfp);
fclose(tfp);
fclose(ifp);
return 0;
}