hyb-align/profiling.c

225 lines
7.9 KiB
C
Raw Normal View History

/*
Description: profiling related data
Copyright : All right reserved by ICT
Author : Zhang Zhonghai
Date : 2024/04/06
*/
#include <stdio.h>
#include "utils.h"
#include "profiling.h"
#include "debug.h"
uint64_t proc_freq = 1000;
#ifdef SHOW_PERF
uint64_t tprof[LIM_THREAD_PROF_TYPE][LIM_THREAD] = {0};
uint64_t gprof[LIM_GLOBAL_PROF_TYPE] = {0};
#endif
#ifdef SHOW_DATA_PERF
/*
tdat[0]: read nums
tdat[1]: seed-1 full match nums
*/
int64_t tdat[LIM_THREAD_DATA_TYPE][LIM_THREAD] = {0};
int64_t t_sd[LIM_THREAD_DATA_TYPE][LIM_THREAD] = {0};
int64_t gdat[LIM_GLOBAL_DATA_TYPE] = {0};
int64_t gd1[LIM_GLOBAL_DATA_TYPE] = {0};
#endif
int64_t sum(int64_t *a, int len) {
int64_t res = 0;
int i = 0;
for (i=0; i<len; ++i) {
res += a[i];
}
return res;
}
int find_opt(uint64_t *a, int len, double *max, double *min, double *avg)
{
int i = 0;
uint64_t umax = 0, umin = UINT64_MAX, uavg = 0;
for (i = 0; i < len; i++)
{
if (a[i] > umax) umax = a[i];
if (a[i] < umin) umin = a[i];
uavg += a[i];
}
*avg = uavg * 1.0 / len / proc_freq;
*max = umax * 1.0 / proc_freq;
*min = umin * 1.0 / proc_freq;
return 1;
}
uint64_t get_sum(uint64_t *a, int len) {
int i = 0;
uint64_t all = 0;
for (i = 0; i < len; i++) {
all += a[i];
}
return all;
}
int display_stats(int nthreads)
{
#ifdef SHOW_PERF
double avg, max, min;
int i;
fprintf(stderr, "[steps in main_mem]\n");
fprintf(stderr, "time_parse_arg: %0.2lf s\n", gprof[G_PREPARE] * 1.0 / proc_freq);
fprintf(stderr, "time_load_idx: %0.2lf s\n", gprof[G_LOAD_IDX] * 1.0 / proc_freq);
fprintf(stderr, "time_pipeline: %0.2lf s\n", gprof[G_PIPELINE] * 1.0 / proc_freq);
fprintf(stderr, "time_all: %0.2lf s\n", gprof[G_ALL] * 1.0 / proc_freq);
fprintf(stderr, "\n[steps in pipeline]\n");
fprintf(stderr, "time_read: %0.2lf s\n", gprof[G_READ] * 1.0 / proc_freq);
fprintf(stderr, "time_compute: %0.2lf s\n", gprof[G_COMPUTE] * 1.0 / proc_freq);
fprintf(stderr, "time_write: %0.2lf s\n", gprof[G_WRITE] * 1.0 / proc_freq);
fprintf(stderr, "\n[steps in mem_process_seqs]\n");
fprintf(stderr, "time_mem_prepare: %0.2lf s\n", gprof[G_MEM_PREPARE] * 1.0 / proc_freq);
fprintf(stderr, "time_mem_kernel: %0.2lf s\n", gprof[G_MEM_KERNEL] * 1.0 / proc_freq);
fprintf(stderr, "time_mem_pestat: %0.2lf s\n", gprof[G_MEM_PESTAT] * 1.0 / proc_freq);
fprintf(stderr, "time_mem_sam: %0.2lf s\n", gprof[G_MEM_SAM] * 1.0 / proc_freq);
fprintf(stderr, "\n[steps in kernel]\n");
find_opt(tprof[T_SEED_ALL], nthreads, &max, &min, &avg);
fprintf(stderr, "time_seed_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min);
find_opt(tprof[T_CHAIN_ALL], nthreads, &max, &min, &avg);
fprintf(stderr, "time_chain_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min);
find_opt(tprof[T_ALN_ALL], nthreads, &max, &min, &avg);
fprintf(stderr, "time_aln_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min);
find_opt(tprof[T_INS_SIZE], nthreads, &max, &min, &avg);
fprintf(stderr, "time_ins_size_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min);
fprintf(stderr, "\n[steps in seeding]\n");
find_opt(tprof[T_SEED_1], nthreads, &max, &min, &avg);
fprintf(stderr, "time_seed_1: %0.2lf s %0.2lf s %0.2lf s\n", max, min, avg);
find_opt(tprof[T_SEED_2], nthreads, &max, &min, &avg);
fprintf(stderr, "time_seed_2: %0.2lf s\n", avg);
find_opt(tprof[T_SEED_3], nthreads, &max, &min, &avg);
fprintf(stderr, "time_seed_3: %0.2lf s\n", avg);
fprintf(stderr, "\n[steps in chain]\n");
find_opt(tprof[T_GEN_CHAIN], nthreads, &max, &min, &avg);
fprintf(stderr, "time_gen_chain: %0.2lf s\n", avg);
find_opt(tprof[T_FLT_CHAIN], nthreads, &max, &min, &avg);
fprintf(stderr, "time_flt_chain: %0.2lf s\n", avg);
find_opt(tprof[T_FLT_CHANNED_SEEDS], nthreads, &max, &min, &avg);
fprintf(stderr, "time_flt_chained_seeds: %0.2lf s\n", avg);
find_opt(tprof[T_SAL], nthreads, &max, &min, &avg);
fprintf(stderr, "time_sal: %0.2lf s\n", avg);
find_opt(tprof[T_BSW], nthreads, &max, &min, &avg);
fprintf(stderr, "time_bsw: %0.2lf s\n", avg);
fprintf(stderr, "\n[steps in gen sam]\n");
find_opt(tprof[T_SAM_MATESW], nthreads, &max, &min, &avg);
fprintf(stderr, "time_mate_sw: %0.2lf s\n", avg);
find_opt(tprof[T_KSW_ALIGN2], nthreads, &max, &min, &avg);
fprintf(stderr, "time_ksw_align2: %0.2lf s\n", avg);
find_opt(tprof[T_KSW_LOOP], nthreads, &max, &min, &avg);
fprintf(stderr, "time_ksw_loop: %0.2lf s\n", avg);
find_opt(tprof[T_KSW_REVERSE], nthreads, &max, &min, &avg);
fprintf(stderr, "time_ksw_reverse: %0.2lf s\n", avg);
find_opt(tprof[T_SAM_REG2ALN], nthreads, &max, &min, &avg);
fprintf(stderr, "time_reg2aln: %0.2lf s\n", avg);
fprintf(stderr, "time_ksw_loop: %0.2lf s\n", gprof[G_KSW_LOOP] * 1.0 / proc_freq);
fprintf(stderr, "time_ksw_end_loop: %0.2lf s\n", gprof[G_KSW_END_LOOP] * 1.0 / proc_freq);
2025-11-16 01:37:21 +08:00
#if SHOW_DATA_PERF
fprintf(stderr, "seq num: %ld\n", gdat[0]);
fprintf(stderr, "full num: %ld\n", gdat[1]);
fprintf(stderr, "percent: %0.2lf%c\n", (double)gdat[1] / gdat[0] * 100, '%');
2025-11-16 01:37:21 +08:00
#endif
fprintf(stderr, "all_match_len: %ld\n", all_match_len);
fprintf(stderr, "all_seq_num: %ld\n", all_seq_num);
fprintf(stderr, "all_type_hits: %ld\n", all_type_hits);
fprintf(stderr, "seed_time: %f\n", seed_time);
fprintf(stderr, "all_match_len: %ld\n", get_sum(tprof[T_SEED_LEN], nthreads));
#define PRINT_SEED_TIME(mark) \
find_opt(tprof[T_SEED_##mark], nthreads, &max, &min, &avg); \
fprintf(stderr, "time_seed_%s: %0.2lf s %0.2lf s %0.2lf s\n", #mark, max, min, avg);
#if 1
// PRINT_SEED_TIME(1_ALL);
// PRINT_SEED_TIME(1_0);
// PRINT_SEED_TIME(1_1);
// PRINT_SEED_TIME(1_2);
// PRINT_SEED_TIME(1_3);
PRINT_SEED_TIME(1_3_1);
// PRINT_SEED_TIME(1_3_2);
// PRINT_SEED_TIME(1_3_3);
// PRINT_SEED_TIME(1_3_4);
// PRINT_SEED_TIME(1_3_5);
// PRINT_SEED_TIME(1_3_6);
// PRINT_SEED_TIME(1_3_7);
#endif
#if 1
// PRINT_SEED_TIME(2_ALL);
// PRINT_SEED_TIME(2_0);
// PRINT_SEED_TIME(2_1);
// PRINT_SEED_TIME(2_2);
PRINT_SEED_TIME(2_2_0);
// PRINT_SEED_TIME(2_2_1);
// PRINT_SEED_TIME(2_2_2);
// PRINT_SEED_TIME(2_2_3);
#endif
#if 1
// PRINT_SEED_TIME(3_ALL);
// PRINT_SEED_TIME(3_0);
// PRINT_SEED_TIME(3_1);
// PRINT_SEED_TIME(3_2);
// PRINT_SEED_TIME(3_3);
PRINT_SEED_TIME(3_3_0);
// PRINT_SEED_TIME(3_3_1);
// PRINT_SEED_TIME(3_3_2);
#endif
double all = 0;
for (i = 0; i < 50; ++i) {
//all += sum(tdat[i], nthreads);
// fprintf(stderr, "sum %d: %ld\n", i, sum(tdat[i], nthreads));
}
for (i = 0; i < 50; ++i) {
//all += sum(tdat[i], nthreads);
// fprintf(stderr, "%d: %f\n", i, sum(tdat[i], nthreads) * 100 / all);
}
#if 0
uint64_t b64 = 0, u64 = 0;
for (i = 0; i < 256; ++i) {
uint64_t s = sum(t_sd[i], nthreads);
if (i < 64)
b64 += s;
else
u64 += s;
fprintf(stderr, "addr %d: %ld\n", i, s);
}
fprintf(stderr, "b64 %ld; u64 %ld\n", b64, u64);
#endif
// fprintf(stderr, "sum 0: %ld\n", sum(tdat[TD_SEED_1_0], nthreads));
// fprintf(stderr, "sum 1: %ld\n", sum(tdat[TD_SEED_1_1], nthreads));
// fprintf(stderr, "sum 2: %ld\n", sum(tdat[TD_SEED_1_2], nthreads));
// fprintf(stderr, "sum 3: %ld\n", sum(tdat[TD_SEED_1_3], nthreads));
// fprintf(stderr, "sum 4: %ld\n", sum(tdat[TD_SEED_1_4], nthreads));
// fprintf(stderr, "sum 5: %ld\n", sum(tdat[TD_SEED_1_5], nthreads));
// int i;
// for (i=0; i<LIM_THREAD_DATA_TYPE; ++i) {
// for (i = 1; i <= 132; ++i) {
// fprintf(stderr, "len: %d, sum: %ld\n", i, sum(tdat[i], nthreads));
// fprintf(stderr, "%ld,\n", sum(tdat[i], nthreads));
// }
fprintf(stderr, "\n");
#endif
return 1;
}