/* Description: profiling related data Copyright : All right reserved by ICT Author : Zhang Zhonghai Date : 2024/04/06 */ #include #include "utils.h" #include "profiling.h" #include "debug.h" uint64_t proc_freq = 1000; #ifdef SHOW_PERF uint64_t tprof[LIM_THREAD_PROF_TYPE][LIM_THREAD] = {0}; uint64_t gprof[LIM_GLOBAL_PROF_TYPE] = {0}; #endif #ifdef SHOW_DATA_PERF /* tdat[0]: read nums tdat[1]: seed-1 full match nums */ int64_t tdat[LIM_THREAD_DATA_TYPE][LIM_THREAD] = {0}; int64_t t_sd[LIM_THREAD_DATA_TYPE][LIM_THREAD] = {0}; int64_t gdat[LIM_GLOBAL_DATA_TYPE] = {0}; int64_t gd1[LIM_GLOBAL_DATA_TYPE] = {0}; #endif int64_t sum(int64_t *a, int len) { int64_t res = 0; int i = 0; for (i=0; i umax) umax = a[i]; if (a[i] < umin) umin = a[i]; uavg += a[i]; } *avg = uavg * 1.0 / len / proc_freq; *max = umax * 1.0 / proc_freq; *min = umin * 1.0 / proc_freq; return 1; } uint64_t get_sum(uint64_t *a, int len) { int i = 0; uint64_t all = 0; for (i = 0; i < len; i++) { all += a[i]; } return all; } int display_stats(int nthreads) { #ifdef SHOW_PERF #define FORMAT_PERF_OUT(name, sec, layer) \ do { \ int i = 0; \ for (i = 0; i < layer; i++) fprintf(stderr, " "); \ fprintf(stderr, "%s: %0.2lf s\n", name, sec); \ } while (0) #define FORMAT_PERF_OUT_3(name, sec_arr, layer) \ do { \ int i = 0; \ double avg, max, min; \ for (i = 0; i < layer; i++) fprintf(stderr, " "); \ find_opt(sec_arr, nthreads, &max, &min, &avg); \ fprintf(stderr, "%s: %0.2lf (%0.2lf, %0.2lf) s\n", name, avg, max, min); \ } while (0) #define FORMAT_PERF_OUT_SUM_3(name, sec_arr, sec_arr1, layer) \ do { \ int i = 0; \ double avg, max, min, avg1, max1, min1; \ for (i = 0; i < layer; i++) fprintf(stderr, " "); \ find_opt(sec_arr, nthreads, &max, &min, &avg); \ find_opt(sec_arr1, nthreads, &max1, &min1, &avg1); \ fprintf(stderr, "%s: %0.2lf (%0.2lf, %0.2lf) s\n", name, avg + avg1, max + max1, min + min1); \ } while (0) #define FORMAT_PERF_OUT_SUB_3(name, sec_arr, sec_arr1, layer) \ do { \ int i = 0; \ double avg, max, min, avg1, max1, min1; \ for (i = 0; i < layer; i++) fprintf(stderr, " "); \ find_opt(sec_arr, nthreads, &max, &min, &avg); \ find_opt(sec_arr1, nthreads, &max1, &min1, &avg1); \ fprintf(stderr, "%s: %0.2lf (%0.2lf, %0.2lf) s\n", name, avg - avg1, max - max1, min - min1); \ } while (0) // for overall pipeline FORMAT_PERF_OUT("all", gprof[G_ALL] * 1.0 / proc_freq, 0); FORMAT_PERF_OUT("load_idx", gprof[G_LOAD_IDX] * 1.0 / proc_freq, 1); FORMAT_PERF_OUT("pipeline", gprof[G_PIPELINE] * 1.0 / proc_freq, 1); FORMAT_PERF_OUT("read", gprof[G_READ] * 1.0 / proc_freq, 2); FORMAT_PERF_OUT("uncompress", gprof[G_UNCOMPRESS] * 1.0 / proc_freq, 3); FORMAT_PERF_OUT("compute", gprof[G_COMPUTE] * 1.0 / proc_freq, 2); FORMAT_PERF_OUT("seeding-extension", gprof[G_SEED_AND_EXT] * 1.0 / proc_freq, 3); FORMAT_PERF_OUT("gen-sam", gprof[G_GEN_SAM] * 1.0 / proc_freq, 3); FORMAT_PERF_OUT("write", gprof[G_WRITE] * 1.0 / proc_freq, 2); fprintf(stderr, "\n"); // for seeding FORMAT_PERF_OUT_SUM_3("seed-chain", tprof[T_SEEDING], tprof[T_CHAIN], 0); FORMAT_PERF_OUT_3("seeding", tprof[T_SEEDING], 1); FORMAT_PERF_OUT_3("gen-seq", tprof[T_GEN_SEQ], 2); FORMAT_PERF_OUT_3("smem", tprof[T_SMEM], 2); FORMAT_PERF_OUT_3("seeding-1", tprof[T_SEED_1], 3); FORMAT_PERF_OUT_3("seeding-2", tprof[T_SEED_2], 3); FORMAT_PERF_OUT_3("seeding-3", tprof[T_SEED_3], 3); FORMAT_PERF_OUT_3("chain", tprof[T_CHAIN], 1); FORMAT_PERF_OUT_3("gen-chain", tprof[T_GEN_CHAIN], 2); FORMAT_PERF_OUT_3("sa", tprof[T_SA], 3); FORMAT_PERF_OUT_3("flt-chain", tprof[T_FLT_CHAIN],2); FORMAT_PERF_OUT_SUM_3("smem + sa", tprof[T_SMEM], tprof[T_SA], 1); fprintf(stderr, "\n"); // for extension FORMAT_PERF_OUT_3("extension", tprof[T_EXTENSION], 0); FORMAT_PERF_OUT_3("mem_chain2aln", tprof[T_MEM_CHAIN2ALN], 1); FORMAT_PERF_OUT_3("bsw_ext", tprof[T_BSW_EXT], 2); FORMAT_PERF_OUT_SUB_3("other", tprof[T_MEM_CHAIN2ALN], tprof[T_BSW_EXT], 2); FORMAT_PERF_OUT_3("sort_dedup", tprof[T_SORT_DEDUP], 1); // for gen-sam FORMAT_PERF_OUT("gen-sam", gprof[G_GEN_SAM] * 1.0 / proc_freq, 0); FORMAT_PERF_OUT_3("sam_mate_sw", tprof[T_SAM_MATESW], 1); FORMAT_PERF_OUT_3("sam_reg2aln", tprof[T_SAM_REG2ALN], 1); #if 0 #if SHOW_DATA_PERF fprintf(stderr, "seq num: %ld\n", gdat[0]); fprintf(stderr, "full num: %ld\n", gdat[1]); fprintf(stderr, "percent: %0.2lf%c\n", (double)gdat[1] / gdat[0] * 100, '%'); #endif fprintf(stderr, "all_match_len: %ld\n", all_match_len); fprintf(stderr, "all_seq_num: %ld\n", all_seq_num); fprintf(stderr, "all_type_hits: %ld\n", all_type_hits); fprintf(stderr, "seed_time: %f\n", seed_time); fprintf(stderr, "all_match_len: %ld\n", get_sum(tprof[T_SEED_LEN], nthreads)); #define PRINT_SEED_TIME(mark) \ find_opt(tprof[T_SEED_##mark], nthreads, &max, &min, &avg); \ fprintf(stderr, "time_seed_%s: %0.2lf s %0.2lf s %0.2lf s\n", #mark, max, min, avg); #if 1 // PRINT_SEED_TIME(1_ALL); // PRINT_SEED_TIME(1_0); // PRINT_SEED_TIME(1_1); // PRINT_SEED_TIME(1_2); // PRINT_SEED_TIME(1_3); PRINT_SEED_TIME(1_3_1); // PRINT_SEED_TIME(1_3_2); // PRINT_SEED_TIME(1_3_3); // PRINT_SEED_TIME(1_3_4); // PRINT_SEED_TIME(1_3_5); // PRINT_SEED_TIME(1_3_6); // PRINT_SEED_TIME(1_3_7); #endif #if 1 // PRINT_SEED_TIME(2_ALL); // PRINT_SEED_TIME(2_0); // PRINT_SEED_TIME(2_1); // PRINT_SEED_TIME(2_2); PRINT_SEED_TIME(2_2_0); // PRINT_SEED_TIME(2_2_1); // PRINT_SEED_TIME(2_2_2); // PRINT_SEED_TIME(2_2_3); #endif #if 1 // PRINT_SEED_TIME(3_ALL); // PRINT_SEED_TIME(3_0); // PRINT_SEED_TIME(3_1); // PRINT_SEED_TIME(3_2); // PRINT_SEED_TIME(3_3); PRINT_SEED_TIME(3_3_0); // PRINT_SEED_TIME(3_3_1); // PRINT_SEED_TIME(3_3_2); #endif double all = 0; int i; for (i = 0; i < 50; ++i) { //all += sum(tdat[i], nthreads); // fprintf(stderr, "sum %d: %ld\n", i, sum(tdat[i], nthreads)); } for (i = 0; i < 50; ++i) { //all += sum(tdat[i], nthreads); // fprintf(stderr, "%d: %f\n", i, sum(tdat[i], nthreads) * 100 / all); } #if 0 uint64_t b64 = 0, u64 = 0; for (i = 0; i < 256; ++i) { uint64_t s = sum(t_sd[i], nthreads); if (i < 64) b64 += s; else u64 += s; fprintf(stderr, "addr %d: %ld\n", i, s); } fprintf(stderr, "b64 %ld; u64 %ld\n", b64, u64); #endif // fprintf(stderr, "sum 0: %ld\n", sum(tdat[TD_SEED_1_0], nthreads)); // fprintf(stderr, "sum 1: %ld\n", sum(tdat[TD_SEED_1_1], nthreads)); // fprintf(stderr, "sum 2: %ld\n", sum(tdat[TD_SEED_1_2], nthreads)); // fprintf(stderr, "sum 3: %ld\n", sum(tdat[TD_SEED_1_3], nthreads)); // fprintf(stderr, "sum 4: %ld\n", sum(tdat[TD_SEED_1_4], nthreads)); // fprintf(stderr, "sum 5: %ld\n", sum(tdat[TD_SEED_1_5], nthreads)); // int i; // for (i=0; i