264 lines
9.9 KiB
C
264 lines
9.9 KiB
C
/*
|
|
Description: profiling related data
|
|
|
|
Copyright : All right reserved by ICT
|
|
|
|
Author : Zhang Zhonghai
|
|
Date : 2024/04/06
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include "utils.h"
|
|
#include "profiling.h"
|
|
#include "debug.h"
|
|
|
|
uint64_t proc_freq = 1000;
|
|
|
|
#ifdef SHOW_PERF
|
|
uint64_t tprof[LIM_THREAD_PROF_TYPE][LIM_THREAD] = {0};
|
|
uint64_t gprof[LIM_GLOBAL_PROF_TYPE] = {0};
|
|
#endif
|
|
|
|
#ifdef SHOW_DATA_PERF
|
|
/*
|
|
tdat[0]: read nums
|
|
tdat[1]: seed-1 full match nums
|
|
*/
|
|
int64_t tdat[LIM_THREAD_DATA_TYPE][LIM_THREAD] = {0};
|
|
int64_t t_sd[LIM_THREAD_DATA_TYPE][LIM_THREAD] = {0};
|
|
int64_t gdat[LIM_GLOBAL_DATA_TYPE] = {0};
|
|
int64_t gd1[LIM_GLOBAL_DATA_TYPE] = {0};
|
|
|
|
#endif
|
|
|
|
int64_t sum(int64_t *a, int len) {
|
|
int64_t res = 0;
|
|
int i = 0;
|
|
for (i=0; i<len; ++i) {
|
|
res += a[i];
|
|
}
|
|
return res;
|
|
}
|
|
|
|
int find_opt(uint64_t *a, int len, double *max, double *min, double *avg)
|
|
{
|
|
int i = 0;
|
|
uint64_t umax = 0, umin = UINT64_MAX, uavg = 0;
|
|
for (i = 0; i < len; i++)
|
|
{
|
|
if (a[i] > umax) umax = a[i];
|
|
if (a[i] < umin) umin = a[i];
|
|
uavg += a[i];
|
|
}
|
|
*avg = uavg * 1.0 / len / proc_freq;
|
|
*max = umax * 1.0 / proc_freq;
|
|
*min = umin * 1.0 / proc_freq;
|
|
return 1;
|
|
}
|
|
|
|
int64_t get_sum(int64_t *a, int len) {
|
|
int i = 0;
|
|
int64_t all = 0;
|
|
for (i = 0; i < len; i++) {
|
|
all += a[i];
|
|
}
|
|
return all;
|
|
}
|
|
|
|
int display_stats(int nthreads)
|
|
{
|
|
#ifdef SHOW_PERF
|
|
|
|
#define FORMAT_PERF_OUT(name, sec, layer) \
|
|
do { \
|
|
int i = 0; \
|
|
for (i = 0; i < layer; i++) fprintf(stderr, " "); \
|
|
fprintf(stderr, "%s: %0.2lf s\n", name, sec); \
|
|
} while (0)
|
|
|
|
#define FORMAT_PERF_OUT_3(name, sec_arr, layer) \
|
|
do { \
|
|
int i = 0; \
|
|
double avg, max, min; \
|
|
for (i = 0; i < layer; i++) fprintf(stderr, " "); \
|
|
find_opt(sec_arr, nthreads, &max, &min, &avg); \
|
|
fprintf(stderr, "%s: %0.2lf (%0.2lf, %0.2lf) s\n", name, avg, max, min); \
|
|
} while (0)
|
|
|
|
#define FORMAT_PERF_OUT_SUM_3(name, sec_arr, sec_arr1, layer) \
|
|
do { \
|
|
int i = 0; \
|
|
double avg, max, min, avg1, max1, min1; \
|
|
for (i = 0; i < layer; i++) fprintf(stderr, " "); \
|
|
find_opt(sec_arr, nthreads, &max, &min, &avg); \
|
|
find_opt(sec_arr1, nthreads, &max1, &min1, &avg1); \
|
|
fprintf(stderr, "%s: %0.2lf (%0.2lf, %0.2lf) s\n", name, avg + avg1, max + max1, min + min1); \
|
|
} while (0)
|
|
|
|
#define FORMAT_PERF_OUT_SUB_3(name, sec_arr, sec_arr1, layer) \
|
|
do { \
|
|
int i = 0; \
|
|
double avg, max, min, avg1, max1, min1; \
|
|
for (i = 0; i < layer; i++) fprintf(stderr, " "); \
|
|
find_opt(sec_arr, nthreads, &max, &min, &avg); \
|
|
find_opt(sec_arr1, nthreads, &max1, &min1, &avg1); \
|
|
fprintf(stderr, "%s: %0.2lf (%0.2lf, %0.2lf) s\n", name, avg - avg1, max - max1, min - min1); \
|
|
} while (0)
|
|
|
|
// for overall pipeline
|
|
FORMAT_PERF_OUT("all", gprof[G_ALL] * 1.0 / proc_freq, 0);
|
|
FORMAT_PERF_OUT("load_idx", gprof[G_LOAD_IDX] * 1.0 / proc_freq, 1);
|
|
FORMAT_PERF_OUT("pipeline", gprof[G_PIPELINE] * 1.0 / proc_freq, 1);
|
|
FORMAT_PERF_OUT("read", gprof[G_READ] * 1.0 / proc_freq, 2);
|
|
FORMAT_PERF_OUT("uncompress", gprof[G_UNCOMPRESS] * 1.0 / proc_freq, 3);
|
|
FORMAT_PERF_OUT("compute", gprof[G_COMPUTE] * 1.0 / proc_freq, 2);
|
|
FORMAT_PERF_OUT("seeding-extension", gprof[G_SEED_AND_EXT] * 1.0 / proc_freq, 3);
|
|
FORMAT_PERF_OUT("gen-sam", gprof[G_GEN_SAM] * 1.0 / proc_freq, 3);
|
|
FORMAT_PERF_OUT("write", gprof[G_WRITE] * 1.0 / proc_freq, 2);
|
|
|
|
fprintf(stderr, "\n");
|
|
|
|
// for seeding
|
|
FORMAT_PERF_OUT_SUM_3("seed-chain", tprof[T_SEEDING], tprof[T_CHAIN], 0);
|
|
FORMAT_PERF_OUT_3("seeding", tprof[T_SEEDING], 1);
|
|
FORMAT_PERF_OUT_3("gen-seq", tprof[T_GEN_SEQ], 2);
|
|
FORMAT_PERF_OUT_3("smem", tprof[T_SMEM], 2);
|
|
FORMAT_PERF_OUT_3("seeding-1", tprof[T_SEED_1], 3);
|
|
FORMAT_PERF_OUT_3("seeding-2", tprof[T_SEED_2], 3);
|
|
FORMAT_PERF_OUT_3("seeding-3", tprof[T_SEED_3], 3);
|
|
FORMAT_PERF_OUT_3("chain", tprof[T_CHAIN], 1);
|
|
FORMAT_PERF_OUT_3("gen-chain", tprof[T_GEN_CHAIN], 2);
|
|
FORMAT_PERF_OUT_3("sa", tprof[T_SA], 3);
|
|
FORMAT_PERF_OUT_3("flt-chain", tprof[T_FLT_CHAIN],2);
|
|
FORMAT_PERF_OUT_SUM_3("smem + sa", tprof[T_SMEM], tprof[T_SA], 1);
|
|
|
|
fprintf(stderr, "\n");
|
|
|
|
// for extension
|
|
FORMAT_PERF_OUT_3("extension", tprof[T_EXTENSION], 0);
|
|
FORMAT_PERF_OUT_3("mem_chain2aln", tprof[T_MEM_CHAIN2ALN], 1);
|
|
FORMAT_PERF_OUT_3("bsw_ext", tprof[T_BSW_EXT], 2);
|
|
FORMAT_PERF_OUT_SUB_3("other", tprof[T_MEM_CHAIN2ALN], tprof[T_BSW_EXT], 2);
|
|
FORMAT_PERF_OUT_3("sort_dedup", tprof[T_SORT_DEDUP], 1);
|
|
|
|
// for gen-sam
|
|
FORMAT_PERF_OUT("gen-sam", gprof[G_GEN_SAM] * 1.0 / proc_freq, 0);
|
|
FORMAT_PERF_OUT("get_matesw_data", gprof[G_get_matesw_data] * 1.0 / proc_freq, 1);
|
|
FORMAT_PERF_OUT("update_stats_cache", gprof[G_update_stats_cache] * 1.0 / proc_freq, 1);
|
|
FORMAT_PERF_OUT("gather_matesw_task", gprof[G_gather_matesw_task] * 1.0 / proc_freq, 1);
|
|
FORMAT_PERF_OUT("calc_matesw", gprof[G_calc_matesw] * 1.0 / proc_freq, 1);
|
|
FORMAT_PERF_OUT("gen_sam", gprof[G_gen_sam] * 1.0 / proc_freq, 1);
|
|
|
|
FORMAT_PERF_OUT_3("sam_mate_sw", tprof[T_SAM_MATESW], 1);
|
|
FORMAT_PERF_OUT_3("mate_sw_1", tprof[T_MSW_1], 2);
|
|
FORMAT_PERF_OUT_3("mate_sw_2", tprof[T_MSW_2], 2);
|
|
FORMAT_PERF_OUT_3("sam_reg2aln", tprof[T_SAM_REG2ALN], 1);
|
|
FORMAT_PERF_OUT_3("sam_gen_alt", tprof[T_SAM_GEN_ALT], 1);
|
|
FORMAT_PERF_OUT_3("sam_aln2sam", tprof[T_SAM_ALN2SAM], 1);
|
|
|
|
#if 0
|
|
|
|
|
|
#if SHOW_DATA_PERF
|
|
fprintf(stderr, "seq num: %ld\n", gdat[0]);
|
|
fprintf(stderr, "full num: %ld\n", gdat[1]);
|
|
fprintf(stderr, "percent: %0.2lf%c\n", (double)gdat[1] / gdat[0] * 100, '%');
|
|
#endif
|
|
|
|
fprintf(stderr, "all_match_len: %ld\n", all_match_len);
|
|
fprintf(stderr, "all_seq_num: %ld\n", all_seq_num);
|
|
fprintf(stderr, "all_type_hits: %ld\n", all_type_hits);
|
|
fprintf(stderr, "seed_time: %f\n", seed_time);
|
|
fprintf(stderr, "all_match_len: %ld\n", get_sum(tprof[T_SEED_LEN], nthreads));
|
|
|
|
#define PRINT_SEED_TIME(mark) \
|
|
find_opt(tprof[T_SEED_##mark], nthreads, &max, &min, &avg); \
|
|
fprintf(stderr, "time_seed_%s: %0.2lf s %0.2lf s %0.2lf s\n", #mark, max, min, avg);
|
|
|
|
#if 1
|
|
// PRINT_SEED_TIME(1_ALL);
|
|
// PRINT_SEED_TIME(1_0);
|
|
// PRINT_SEED_TIME(1_1);
|
|
// PRINT_SEED_TIME(1_2);
|
|
// PRINT_SEED_TIME(1_3);
|
|
PRINT_SEED_TIME(1_3_1);
|
|
// PRINT_SEED_TIME(1_3_2);
|
|
// PRINT_SEED_TIME(1_3_3);
|
|
// PRINT_SEED_TIME(1_3_4);
|
|
// PRINT_SEED_TIME(1_3_5);
|
|
// PRINT_SEED_TIME(1_3_6);
|
|
// PRINT_SEED_TIME(1_3_7);
|
|
#endif
|
|
#if 1
|
|
// PRINT_SEED_TIME(2_ALL);
|
|
// PRINT_SEED_TIME(2_0);
|
|
// PRINT_SEED_TIME(2_1);
|
|
// PRINT_SEED_TIME(2_2);
|
|
PRINT_SEED_TIME(2_2_0);
|
|
// PRINT_SEED_TIME(2_2_1);
|
|
// PRINT_SEED_TIME(2_2_2);
|
|
// PRINT_SEED_TIME(2_2_3);
|
|
#endif
|
|
#if 1
|
|
// PRINT_SEED_TIME(3_ALL);
|
|
// PRINT_SEED_TIME(3_0);
|
|
// PRINT_SEED_TIME(3_1);
|
|
// PRINT_SEED_TIME(3_2);
|
|
// PRINT_SEED_TIME(3_3);
|
|
PRINT_SEED_TIME(3_3_0);
|
|
// PRINT_SEED_TIME(3_3_1);
|
|
// PRINT_SEED_TIME(3_3_2);
|
|
#endif
|
|
double all = 0;
|
|
int i;
|
|
for (i = 0; i < 50; ++i) {
|
|
//all += sum(tdat[i], nthreads);
|
|
// fprintf(stderr, "sum %d: %ld\n", i, sum(tdat[i], nthreads));
|
|
}
|
|
for (i = 0; i < 50; ++i) {
|
|
//all += sum(tdat[i], nthreads);
|
|
// fprintf(stderr, "%d: %f\n", i, sum(tdat[i], nthreads) * 100 / all);
|
|
}
|
|
#if 0
|
|
uint64_t b64 = 0, u64 = 0;
|
|
for (i = 0; i < 256; ++i) {
|
|
uint64_t s = sum(t_sd[i], nthreads);
|
|
if (i < 64)
|
|
b64 += s;
|
|
else
|
|
u64 += s;
|
|
fprintf(stderr, "addr %d: %ld\n", i, s);
|
|
}
|
|
fprintf(stderr, "b64 %ld; u64 %ld\n", b64, u64);
|
|
#endif
|
|
// fprintf(stderr, "sum 0: %ld\n", sum(tdat[TD_SEED_1_0], nthreads));
|
|
// fprintf(stderr, "sum 1: %ld\n", sum(tdat[TD_SEED_1_1], nthreads));
|
|
// fprintf(stderr, "sum 2: %ld\n", sum(tdat[TD_SEED_1_2], nthreads));
|
|
// fprintf(stderr, "sum 3: %ld\n", sum(tdat[TD_SEED_1_3], nthreads));
|
|
// fprintf(stderr, "sum 4: %ld\n", sum(tdat[TD_SEED_1_4], nthreads));
|
|
// fprintf(stderr, "sum 5: %ld\n", sum(tdat[TD_SEED_1_5], nthreads));
|
|
// int i;
|
|
// for (i=0; i<LIM_THREAD_DATA_TYPE; ++i) {
|
|
// for (i = 1; i <= 132; ++i) {
|
|
// fprintf(stderr, "len: %d, sum: %ld\n", i, sum(tdat[i], nthreads));
|
|
// fprintf(stderr, "%ld,\n", sum(tdat[i], nthreads));
|
|
// }
|
|
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
#ifdef SHOW_DATA_PERF
|
|
fprintf(stderr, "\n");
|
|
fprintf(stderr, "average seed cnt: %0.2lf\n", get_sum(tdat[TD_SEED_CNT], nthreads) * 1.0 / gdat[GD_READ_CNT]);
|
|
fprintf(stderr, "average matesw cnt: %0.2lf\n", get_sum(tdat[TD_MSW_CNT], nthreads) * 1.0 / gdat[GD_READ_CNT]);
|
|
fprintf(stderr, "align 1 cnt: %ld\n", get_sum(tdat[TD_ALIGN_1_CNT], nthreads));
|
|
fprintf(stderr, "align 2 cnt: %ld\n", get_sum(tdat[TD_ALIGN_2_CNT], nthreads));
|
|
|
|
#endif
|
|
|
|
fprintf(stderr, "\n");
|
|
return 0;
|
|
}
|