hyb-align/profiling.c

264 lines
9.9 KiB
C

/*
Description: profiling related data
Copyright : All right reserved by ICT
Author : Zhang Zhonghai
Date : 2024/04/06
*/
#include <stdio.h>
#include "utils.h"
#include "profiling.h"
#include "debug.h"
uint64_t proc_freq = 1000;
#ifdef SHOW_PERF
uint64_t tprof[LIM_THREAD_PROF_TYPE][LIM_THREAD] = {0};
uint64_t gprof[LIM_GLOBAL_PROF_TYPE] = {0};
#endif
#ifdef SHOW_DATA_PERF
/*
tdat[0]: read nums
tdat[1]: seed-1 full match nums
*/
int64_t tdat[LIM_THREAD_DATA_TYPE][LIM_THREAD] = {0};
int64_t t_sd[LIM_THREAD_DATA_TYPE][LIM_THREAD] = {0};
int64_t gdat[LIM_GLOBAL_DATA_TYPE] = {0};
int64_t gd1[LIM_GLOBAL_DATA_TYPE] = {0};
#endif
int64_t sum(int64_t *a, int len) {
int64_t res = 0;
int i = 0;
for (i=0; i<len; ++i) {
res += a[i];
}
return res;
}
int find_opt(uint64_t *a, int len, double *max, double *min, double *avg)
{
int i = 0;
uint64_t umax = 0, umin = UINT64_MAX, uavg = 0;
for (i = 0; i < len; i++)
{
if (a[i] > umax) umax = a[i];
if (a[i] < umin) umin = a[i];
uavg += a[i];
}
*avg = uavg * 1.0 / len / proc_freq;
*max = umax * 1.0 / proc_freq;
*min = umin * 1.0 / proc_freq;
return 1;
}
int64_t get_sum(int64_t *a, int len) {
int i = 0;
int64_t all = 0;
for (i = 0; i < len; i++) {
all += a[i];
}
return all;
}
int display_stats(int nthreads)
{
#ifdef SHOW_PERF
#define FORMAT_PERF_OUT(name, sec, layer) \
do { \
int i = 0; \
for (i = 0; i < layer; i++) fprintf(stderr, " "); \
fprintf(stderr, "%s: %0.2lf s\n", name, sec); \
} while (0)
#define FORMAT_PERF_OUT_3(name, sec_arr, layer) \
do { \
int i = 0; \
double avg, max, min; \
for (i = 0; i < layer; i++) fprintf(stderr, " "); \
find_opt(sec_arr, nthreads, &max, &min, &avg); \
fprintf(stderr, "%s: %0.2lf (%0.2lf, %0.2lf) s\n", name, avg, max, min); \
} while (0)
#define FORMAT_PERF_OUT_SUM_3(name, sec_arr, sec_arr1, layer) \
do { \
int i = 0; \
double avg, max, min, avg1, max1, min1; \
for (i = 0; i < layer; i++) fprintf(stderr, " "); \
find_opt(sec_arr, nthreads, &max, &min, &avg); \
find_opt(sec_arr1, nthreads, &max1, &min1, &avg1); \
fprintf(stderr, "%s: %0.2lf (%0.2lf, %0.2lf) s\n", name, avg + avg1, max + max1, min + min1); \
} while (0)
#define FORMAT_PERF_OUT_SUB_3(name, sec_arr, sec_arr1, layer) \
do { \
int i = 0; \
double avg, max, min, avg1, max1, min1; \
for (i = 0; i < layer; i++) fprintf(stderr, " "); \
find_opt(sec_arr, nthreads, &max, &min, &avg); \
find_opt(sec_arr1, nthreads, &max1, &min1, &avg1); \
fprintf(stderr, "%s: %0.2lf (%0.2lf, %0.2lf) s\n", name, avg - avg1, max - max1, min - min1); \
} while (0)
// for overall pipeline
FORMAT_PERF_OUT("all", gprof[G_ALL] * 1.0 / proc_freq, 0);
FORMAT_PERF_OUT("load_idx", gprof[G_LOAD_IDX] * 1.0 / proc_freq, 1);
FORMAT_PERF_OUT("pipeline", gprof[G_PIPELINE] * 1.0 / proc_freq, 1);
FORMAT_PERF_OUT("read", gprof[G_READ] * 1.0 / proc_freq, 2);
FORMAT_PERF_OUT("uncompress", gprof[G_UNCOMPRESS] * 1.0 / proc_freq, 3);
FORMAT_PERF_OUT("compute", gprof[G_COMPUTE] * 1.0 / proc_freq, 2);
FORMAT_PERF_OUT("seeding-extension", gprof[G_SEED_AND_EXT] * 1.0 / proc_freq, 3);
FORMAT_PERF_OUT("gen-sam", gprof[G_GEN_SAM] * 1.0 / proc_freq, 3);
FORMAT_PERF_OUT("write", gprof[G_WRITE] * 1.0 / proc_freq, 2);
fprintf(stderr, "\n");
// for seeding
FORMAT_PERF_OUT_SUM_3("seed-chain", tprof[T_SEEDING], tprof[T_CHAIN], 0);
FORMAT_PERF_OUT_3("seeding", tprof[T_SEEDING], 1);
FORMAT_PERF_OUT_3("gen-seq", tprof[T_GEN_SEQ], 2);
FORMAT_PERF_OUT_3("smem", tprof[T_SMEM], 2);
FORMAT_PERF_OUT_3("seeding-1", tprof[T_SEED_1], 3);
FORMAT_PERF_OUT_3("seeding-2", tprof[T_SEED_2], 3);
FORMAT_PERF_OUT_3("seeding-3", tprof[T_SEED_3], 3);
FORMAT_PERF_OUT_3("chain", tprof[T_CHAIN], 1);
FORMAT_PERF_OUT_3("gen-chain", tprof[T_GEN_CHAIN], 2);
FORMAT_PERF_OUT_3("sa", tprof[T_SA], 3);
FORMAT_PERF_OUT_3("flt-chain", tprof[T_FLT_CHAIN],2);
FORMAT_PERF_OUT_SUM_3("smem + sa", tprof[T_SMEM], tprof[T_SA], 1);
fprintf(stderr, "\n");
// for extension
FORMAT_PERF_OUT_3("extension", tprof[T_EXTENSION], 0);
FORMAT_PERF_OUT_3("mem_chain2aln", tprof[T_MEM_CHAIN2ALN], 1);
FORMAT_PERF_OUT_3("bsw_ext", tprof[T_BSW_EXT], 2);
FORMAT_PERF_OUT_SUB_3("other", tprof[T_MEM_CHAIN2ALN], tprof[T_BSW_EXT], 2);
FORMAT_PERF_OUT_3("sort_dedup", tprof[T_SORT_DEDUP], 1);
// for gen-sam
FORMAT_PERF_OUT("gen-sam", gprof[G_GEN_SAM] * 1.0 / proc_freq, 0);
FORMAT_PERF_OUT("get_matesw_data", gprof[G_get_matesw_data] * 1.0 / proc_freq, 1);
FORMAT_PERF_OUT("update_stats_cache", gprof[G_update_stats_cache] * 1.0 / proc_freq, 1);
FORMAT_PERF_OUT("gather_matesw_task", gprof[G_gather_matesw_task] * 1.0 / proc_freq, 1);
FORMAT_PERF_OUT("calc_matesw", gprof[G_calc_matesw] * 1.0 / proc_freq, 1);
FORMAT_PERF_OUT("gen_sam", gprof[G_gen_sam] * 1.0 / proc_freq, 1);
FORMAT_PERF_OUT_3("sam_mate_sw", tprof[T_SAM_MATESW], 1);
FORMAT_PERF_OUT_3("mate_sw_1", tprof[T_MSW_1], 2);
FORMAT_PERF_OUT_3("mate_sw_2", tprof[T_MSW_2], 2);
FORMAT_PERF_OUT_3("sam_reg2aln", tprof[T_SAM_REG2ALN], 1);
FORMAT_PERF_OUT_3("sam_gen_alt", tprof[T_SAM_GEN_ALT], 1);
FORMAT_PERF_OUT_3("sam_aln2sam", tprof[T_SAM_ALN2SAM], 1);
#if 0
#if SHOW_DATA_PERF
fprintf(stderr, "seq num: %ld\n", gdat[0]);
fprintf(stderr, "full num: %ld\n", gdat[1]);
fprintf(stderr, "percent: %0.2lf%c\n", (double)gdat[1] / gdat[0] * 100, '%');
#endif
fprintf(stderr, "all_match_len: %ld\n", all_match_len);
fprintf(stderr, "all_seq_num: %ld\n", all_seq_num);
fprintf(stderr, "all_type_hits: %ld\n", all_type_hits);
fprintf(stderr, "seed_time: %f\n", seed_time);
fprintf(stderr, "all_match_len: %ld\n", get_sum(tprof[T_SEED_LEN], nthreads));
#define PRINT_SEED_TIME(mark) \
find_opt(tprof[T_SEED_##mark], nthreads, &max, &min, &avg); \
fprintf(stderr, "time_seed_%s: %0.2lf s %0.2lf s %0.2lf s\n", #mark, max, min, avg);
#if 1
// PRINT_SEED_TIME(1_ALL);
// PRINT_SEED_TIME(1_0);
// PRINT_SEED_TIME(1_1);
// PRINT_SEED_TIME(1_2);
// PRINT_SEED_TIME(1_3);
PRINT_SEED_TIME(1_3_1);
// PRINT_SEED_TIME(1_3_2);
// PRINT_SEED_TIME(1_3_3);
// PRINT_SEED_TIME(1_3_4);
// PRINT_SEED_TIME(1_3_5);
// PRINT_SEED_TIME(1_3_6);
// PRINT_SEED_TIME(1_3_7);
#endif
#if 1
// PRINT_SEED_TIME(2_ALL);
// PRINT_SEED_TIME(2_0);
// PRINT_SEED_TIME(2_1);
// PRINT_SEED_TIME(2_2);
PRINT_SEED_TIME(2_2_0);
// PRINT_SEED_TIME(2_2_1);
// PRINT_SEED_TIME(2_2_2);
// PRINT_SEED_TIME(2_2_3);
#endif
#if 1
// PRINT_SEED_TIME(3_ALL);
// PRINT_SEED_TIME(3_0);
// PRINT_SEED_TIME(3_1);
// PRINT_SEED_TIME(3_2);
// PRINT_SEED_TIME(3_3);
PRINT_SEED_TIME(3_3_0);
// PRINT_SEED_TIME(3_3_1);
// PRINT_SEED_TIME(3_3_2);
#endif
double all = 0;
int i;
for (i = 0; i < 50; ++i) {
//all += sum(tdat[i], nthreads);
// fprintf(stderr, "sum %d: %ld\n", i, sum(tdat[i], nthreads));
}
for (i = 0; i < 50; ++i) {
//all += sum(tdat[i], nthreads);
// fprintf(stderr, "%d: %f\n", i, sum(tdat[i], nthreads) * 100 / all);
}
#if 0
uint64_t b64 = 0, u64 = 0;
for (i = 0; i < 256; ++i) {
uint64_t s = sum(t_sd[i], nthreads);
if (i < 64)
b64 += s;
else
u64 += s;
fprintf(stderr, "addr %d: %ld\n", i, s);
}
fprintf(stderr, "b64 %ld; u64 %ld\n", b64, u64);
#endif
// fprintf(stderr, "sum 0: %ld\n", sum(tdat[TD_SEED_1_0], nthreads));
// fprintf(stderr, "sum 1: %ld\n", sum(tdat[TD_SEED_1_1], nthreads));
// fprintf(stderr, "sum 2: %ld\n", sum(tdat[TD_SEED_1_2], nthreads));
// fprintf(stderr, "sum 3: %ld\n", sum(tdat[TD_SEED_1_3], nthreads));
// fprintf(stderr, "sum 4: %ld\n", sum(tdat[TD_SEED_1_4], nthreads));
// fprintf(stderr, "sum 5: %ld\n", sum(tdat[TD_SEED_1_5], nthreads));
// int i;
// for (i=0; i<LIM_THREAD_DATA_TYPE; ++i) {
// for (i = 1; i <= 132; ++i) {
// fprintf(stderr, "len: %d, sum: %ld\n", i, sum(tdat[i], nthreads));
// fprintf(stderr, "%ld,\n", sum(tdat[i], nthreads));
// }
#endif
#endif
#ifdef SHOW_DATA_PERF
fprintf(stderr, "\n");
fprintf(stderr, "average seed cnt: %0.2lf\n", get_sum(tdat[TD_SEED_CNT], nthreads) * 1.0 / gdat[GD_READ_CNT]);
fprintf(stderr, "average matesw cnt: %0.2lf\n", get_sum(tdat[TD_MSW_CNT], nthreads) * 1.0 / gdat[GD_READ_CNT]);
fprintf(stderr, "align 1 cnt: %ld\n", get_sum(tdat[TD_ALIGN_1_CNT], nthreads));
fprintf(stderr, "align 2 cnt: %ld\n", get_sum(tdat[TD_ALIGN_2_CNT], nthreads));
#endif
fprintf(stderr, "\n");
return 0;
}