/* Description: profiling related data Copyright : All right reserved by ICT Author : Zhang Zhonghai Date : 2024/04/06 */ #include #include "utils.h" #include "profiling.h" #include "debug.h" #ifdef SHOW_PERF uint64_t tprof[LIM_THREAD_PROF_TYPE][LIM_THREAD] = {0}; uint64_t proc_freq = 1000; uint64_t gprof[LIM_GLOBAL_PROF_TYPE] = {0}; #endif #ifdef SHOW_DATA_PERF /* tdat[0]: read nums tdat[1]: seed-1 full match nums */ int64_t tdat[LIM_THREAD_DATA_TYPE][LIM_THREAD] = {0}; int64_t t_sd[LIM_THREAD_DATA_TYPE][LIM_THREAD] = {0}; int64_t gdat[LIM_GLOBAL_DATA_TYPE] = {0}; int64_t gd1[LIM_GLOBAL_DATA_TYPE] = {0}; #endif int64_t sum(int64_t *a, int len) { int64_t res = 0; int i = 0; for (i=0; i umax) umax = a[i]; if (a[i] < umin) umin = a[i]; uavg += a[i]; } *avg = uavg * 1.0 / len / proc_freq; *max = umax * 1.0 / proc_freq; *min = umin * 1.0 / proc_freq; return 1; } uint64_t get_sum(uint64_t *a, int len) { int i = 0; uint64_t all = 0; for (i = 0; i < len; i++) { all += a[i]; } return all; } int display_stats(int nthreads) { #ifdef SHOW_PERF double avg, max, min; int i; fprintf(stderr, "[steps in main_mem]\n"); fprintf(stderr, "time_parse_arg: %0.2lf s\n", gprof[G_PREPARE] * 1.0 / proc_freq); fprintf(stderr, "time_load_idx: %0.2lf s\n", gprof[G_LOAD_IDX] * 1.0 / proc_freq); fprintf(stderr, "time_pipeline: %0.2lf s\n", gprof[G_PIPELINE] * 1.0 / proc_freq); fprintf(stderr, "time_all: %0.2lf s\n", gprof[G_ALL] * 1.0 / proc_freq); fprintf(stderr, "\n[steps in pipeline]\n"); fprintf(stderr, "time_read: %0.2lf s\n", gprof[G_READ] * 1.0 / proc_freq); fprintf(stderr, "time_compute: %0.2lf s\n", gprof[G_COMPUTE] * 1.0 / proc_freq); fprintf(stderr, "time_write: %0.2lf s\n", gprof[G_WRITE] * 1.0 / proc_freq); fprintf(stderr, "\n[steps in mem_process_seqs]\n"); fprintf(stderr, "time_mem_prepare: %0.2lf s\n", gprof[G_MEM_PREPARE] * 1.0 / proc_freq); fprintf(stderr, "time_mem_kernel: %0.2lf s\n", gprof[G_MEM_KERNEL] * 1.0 / proc_freq); fprintf(stderr, "time_mem_pestat: %0.2lf s\n", gprof[G_MEM_PESTAT] * 1.0 / proc_freq); fprintf(stderr, "time_mem_sam: %0.2lf s\n", gprof[G_MEM_SAM] * 1.0 / proc_freq); fprintf(stderr, "\n[steps in kernel]\n"); find_opt(tprof[T_SEED_ALL], nthreads, &max, &min, &avg); fprintf(stderr, "time_seed_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min); find_opt(tprof[T_CHAIN_ALL], nthreads, &max, &min, &avg); fprintf(stderr, "time_chain_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min); find_opt(tprof[T_ALN_ALL], nthreads, &max, &min, &avg); fprintf(stderr, "time_aln_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min); find_opt(tprof[T_INS_SIZE], nthreads, &max, &min, &avg); fprintf(stderr, "time_ins_size_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min); fprintf(stderr, "\n[steps in seeding]\n"); find_opt(tprof[T_SEED_1], nthreads, &max, &min, &avg); fprintf(stderr, "time_seed_1: %0.2lf s %0.2lf s %0.2lf s\n", max, min, avg); find_opt(tprof[T_SEED_2], nthreads, &max, &min, &avg); fprintf(stderr, "time_seed_2: %0.2lf s\n", avg); find_opt(tprof[T_SEED_3], nthreads, &max, &min, &avg); fprintf(stderr, "time_seed_3: %0.2lf s\n", avg); fprintf(stderr, "\n[steps in chain]\n"); find_opt(tprof[T_GEN_CHAIN], nthreads, &max, &min, &avg); fprintf(stderr, "time_gen_chain: %0.2lf s\n", avg); find_opt(tprof[T_FLT_CHAIN], nthreads, &max, &min, &avg); fprintf(stderr, "time_flt_chain: %0.2lf s\n", avg); find_opt(tprof[T_FLT_CHANNED_SEEDS], nthreads, &max, &min, &avg); fprintf(stderr, "time_flt_chained_seeds: %0.2lf s\n", avg); find_opt(tprof[T_SAL], nthreads, &max, &min, &avg); fprintf(stderr, "time_sal: %0.2lf s\n", avg); find_opt(tprof[T_BSW], nthreads, &max, &min, &avg); fprintf(stderr, "time_bsw: %0.2lf s\n", avg); fprintf(stderr, "\n[steps in gen sam]\n"); find_opt(tprof[T_SAM_MATESW], nthreads, &max, &min, &avg); fprintf(stderr, "time_mate_sw: %0.2lf s\n", avg); find_opt(tprof[T_KSW_ALIGN2], nthreads, &max, &min, &avg); fprintf(stderr, "time_ksw_align2: %0.2lf s\n", avg); find_opt(tprof[T_KSW_LOOP], nthreads, &max, &min, &avg); fprintf(stderr, "time_ksw_loop: %0.2lf s\n", avg); find_opt(tprof[T_KSW_REVERSE], nthreads, &max, &min, &avg); fprintf(stderr, "time_ksw_reverse: %0.2lf s\n", avg); find_opt(tprof[T_SAM_REG2ALN], nthreads, &max, &min, &avg); fprintf(stderr, "time_reg2aln: %0.2lf s\n", avg); fprintf(stderr, "time_ksw_loop: %0.2lf s\n", gprof[G_KSW_LOOP] * 1.0 / proc_freq); fprintf(stderr, "time_ksw_end_loop: %0.2lf s\n", gprof[G_KSW_END_LOOP] * 1.0 / proc_freq); fprintf(stderr, "seq num: %ld\n", gdat[0]); fprintf(stderr, "full num: %ld\n", gdat[1]); fprintf(stderr, "percent: %0.2lf%c\n", (double)gdat[1] / gdat[0] * 100, '%'); fprintf(stderr, "all_match_len: %ld\n", all_match_len); fprintf(stderr, "all_seq_num: %ld\n", all_seq_num); fprintf(stderr, "all_type_hits: %ld\n", all_type_hits); fprintf(stderr, "seed_time: %f\n", seed_time); fprintf(stderr, "all_match_len: %ld\n", get_sum(tprof[T_SEED_LEN], nthreads)); #define PRINT_SEED_TIME(mark) \ find_opt(tprof[T_SEED_##mark], nthreads, &max, &min, &avg); \ fprintf(stderr, "time_seed_%s: %0.2lf s %0.2lf s %0.2lf s\n", #mark, max, min, avg); #if 1 // PRINT_SEED_TIME(1_ALL); // PRINT_SEED_TIME(1_0); // PRINT_SEED_TIME(1_1); // PRINT_SEED_TIME(1_2); // PRINT_SEED_TIME(1_3); PRINT_SEED_TIME(1_3_1); // PRINT_SEED_TIME(1_3_2); // PRINT_SEED_TIME(1_3_3); // PRINT_SEED_TIME(1_3_4); // PRINT_SEED_TIME(1_3_5); // PRINT_SEED_TIME(1_3_6); // PRINT_SEED_TIME(1_3_7); #endif #if 1 // PRINT_SEED_TIME(2_ALL); // PRINT_SEED_TIME(2_0); // PRINT_SEED_TIME(2_1); // PRINT_SEED_TIME(2_2); PRINT_SEED_TIME(2_2_0); // PRINT_SEED_TIME(2_2_1); // PRINT_SEED_TIME(2_2_2); // PRINT_SEED_TIME(2_2_3); #endif #if 1 // PRINT_SEED_TIME(3_ALL); // PRINT_SEED_TIME(3_0); // PRINT_SEED_TIME(3_1); // PRINT_SEED_TIME(3_2); // PRINT_SEED_TIME(3_3); PRINT_SEED_TIME(3_3_0); // PRINT_SEED_TIME(3_3_1); // PRINT_SEED_TIME(3_3_2); #endif double all = 0; for (i = 0; i < 50; ++i) { //all += sum(tdat[i], nthreads); // fprintf(stderr, "sum %d: %ld\n", i, sum(tdat[i], nthreads)); } for (i = 0; i < 50; ++i) { //all += sum(tdat[i], nthreads); // fprintf(stderr, "%d: %f\n", i, sum(tdat[i], nthreads) * 100 / all); } #if 0 uint64_t b64 = 0, u64 = 0; for (i = 0; i < 256; ++i) { uint64_t s = sum(t_sd[i], nthreads); if (i < 64) b64 += s; else u64 += s; fprintf(stderr, "addr %d: %ld\n", i, s); } fprintf(stderr, "b64 %ld; u64 %ld\n", b64, u64); #endif // fprintf(stderr, "sum 0: %ld\n", sum(tdat[TD_SEED_1_0], nthreads)); // fprintf(stderr, "sum 1: %ld\n", sum(tdat[TD_SEED_1_1], nthreads)); // fprintf(stderr, "sum 2: %ld\n", sum(tdat[TD_SEED_1_2], nthreads)); // fprintf(stderr, "sum 3: %ld\n", sum(tdat[TD_SEED_1_3], nthreads)); // fprintf(stderr, "sum 4: %ld\n", sum(tdat[TD_SEED_1_4], nthreads)); // fprintf(stderr, "sum 5: %ld\n", sum(tdat[TD_SEED_1_5], nthreads)); // int i; // for (i=0; i