diff --git a/.gitignore b/.gitignore index b4a6f13..54233a1 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ bwa test test64 .*.swp +*.log Makefile.bak bwamem-lite test_index/ diff --git a/bwamem.c b/bwamem.c index c69ba8a..7ac2f3b 100644 --- a/bwamem.c +++ b/bwamem.c @@ -184,7 +184,8 @@ static void mem_collect_intv(const mem_opt_t* opt, const bwt_t* bwt, int len, co int split_len = (int)(opt->min_seed_len * opt->split_factor + .499); smem->mem.n = 0; // first pass: find all SMEMs - while (x < len) { + PROF_START(seed_1); + while (x < len) { if (seq[x] < 4) { x = bwt_smem1(bwt, len, seq, x, start_width, &a->mem1, a->tmpv); for (i = 0; i < a->mem1.n; ++i) { @@ -195,7 +196,10 @@ static void mem_collect_intv(const mem_opt_t* opt, const bwt_t* bwt, int len, co } } else ++x; } + PROF_END(tprof[T_SEED_1][tid], seed_1); + // second pass: find MEMs inside a long SMEM + PROF_START(seed_2); old_n = smem->mem.n; for (k = 0; k < old_n; ++k) { bwtintv_t *p = &smem->mem.a[k]; @@ -206,8 +210,11 @@ static void mem_collect_intv(const mem_opt_t* opt, const bwt_t* bwt, int len, co if ((uint32_t)a->mem1.a[i].info - (a->mem1.a[i].info>>32) >= opt->min_seed_len) kv_push(bwtintv_t, smem->mem, a->mem1.a[i]); } + PROF_END(tprof[T_SEED_2][tid], seed_2); + // third pass: LAST-like - if (opt->max_mem_intv > 0) { + PROF_START(seed_3); + if (opt->max_mem_intv > 0) { x = 0; while (x < len) { if (seq[x] < 4) { @@ -223,6 +230,7 @@ static void mem_collect_intv(const mem_opt_t* opt, const bwt_t* bwt, int len, co } else ++x; } } + PROF_END(tprof[T_SEED_3][tid], seed_3); // sort ks_introsort(mem_intv, smem->mem.n, smem->mem.a); } @@ -246,9 +254,9 @@ static void hyb_seeding(const mem_opt_t* opt, const HybridIndex* hyb, ReadSeq* r // fprintf(stderr, "seq-id: %ld\n", seq_id); - if (seq_id == 4) { - fprintf(stderr, "seq-id: %ld\n", seq_id); - } + //if (seq_id == 4) { + // fprintf(stderr, "seq-id: %ld\n", seq_id); + //} // 1. seeding-1: find all SMEMs PROF_START(seed_1); @@ -260,8 +268,7 @@ static void hyb_seeding(const mem_opt_t* opt, const HybridIndex* hyb, ReadSeq* r hyb_first_seeding(hyb, read_seq, &range, opt->min_seed_len, seeds, tid); seeds_ranges->a[i].end = seeds->n; } - - tprof[T_SEED_LEN][tid] += seeds->n; + //tprof[T_SEED_LEN][tid] += seeds->n; PROF_END(tprof[T_SEED_1][tid], seed_1); #if 1 // 2. seeding-2: find MEMs inside a long SMEM @@ -422,7 +429,9 @@ void generate_chain(const mem_opt_t* opt, const bwt_t* bwt, const bntseq_t* bns, mem_chain_t tmp, *lower, *upper; mem_seed_t s; int rid, to_add = 0; - s.rbeg = tmp.pos = bwt_sa(bwt, p->x[0] + k); // this is the base coordinate in the forward-reverse reference + PROF_START(sa); + s.rbeg = tmp.pos = bwt_sa(bwt, p->x[0] + k); // this is the base coordinate in the forward-reverse reference + PROF_END(tprof[T_SA][tid], sa); s.qbeg = p->info>>32; s.score= s.len = slen; rid = bns_intv2rid(bns, s.rbeg, s.rbeg + s.len); @@ -932,12 +941,14 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac printf("*** Left ref: "); for (j = 0; j < tmp; ++j) putchar("ACGTN"[(int)rseq[tmp - 1 - j]]); putchar('\n'); printf("*** Left query: "); for (j = 0; j < s->qbeg; ++j) putchar("ACGTN"[(int)query[s->qbeg - 1 - j]]); putchar('\n'); } + PROF_START(bsw_ext); #ifndef USE_AVX2_EXT a->score = ksw_extend2(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[0], opt->pen_clip5, opt->zdrop, s->len * opt->a, &qle, &tle, >le, &gscore, &max_off[0]); #else a->score = ksw_extend2_avx2(s->qbeg, query, tmp, rseq, 1, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->a, opt->b, aw[0], opt->pen_clip5, opt->zdrop, s->len * opt->a, &qle, &tle, >le, &gscore, &max_off[0], aux->sw_buf); #endif + PROF_END(tprof[T_BSW_EXT][tid], bsw_ext); if (bwa_verbose >= 4) { printf("*** Left extension: prev_score=%d; score=%d; bandwidth=%d; max_off_diagonal_dist=%d\n", prev, a->score, aw[0], max_off[0]); fflush(stdout); } if (a->score == prev || max_off[0] < (aw[0]>>1) + (aw[0]>>2)) break; } @@ -967,12 +978,14 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac printf("*** Right ref: "); for (j = 0; j < rmax[1] - rmax[0] - re; ++j) putchar("ACGTN"[(int)rseq[re+j]]); putchar('\n'); printf("*** Right query: "); for (j = 0; j < l_query - qe; ++j) putchar("ACGTN"[(int)query[qe+j]]); putchar('\n'); } + PROF_START(bsw_ext); #ifndef USE_AVX2_EXT a->score = ksw_extend2(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[1], opt->pen_clip3, opt->zdrop, sc0, &qle, &tle, >le, &gscore, &max_off[1]); #else a->score = ksw_extend2_avx2(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 0, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->a, opt->b, aw[1], opt->pen_clip3, opt->zdrop, sc0, &qle, &tle, >le, &gscore, &max_off[1], aux->sw_buf); #endif - if (bwa_verbose >= 4) { printf("*** Right extension: prev_score=%d; score=%d; bandwidth=%d; max_off_diagonal_dist=%d\n", prev, a->score, aw[1], max_off[1]); fflush(stdout); } + PROF_END(tprof[T_BSW_EXT][tid], bsw_ext); + if (bwa_verbose >= 4) { printf("*** Right extension: prev_score=%d; score=%d; bandwidth=%d; max_off_diagonal_dist=%d\n", prev, a->score, aw[1], max_off[1]); fflush(stdout); } if (a->score == prev || max_off[1] < (aw[1]>>1) + (aw[1]>>2)) break; } // similar to the above @@ -1372,19 +1385,23 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex* if (opt->use_bwt) { smem_v *smem_arr = (smem_v*)seed_arr; // 1. seeding - PROF_START(seed_all); + PROF_START(seeding); for (i = 0; i < nseq; ++i) { + PROF_START(gen_seq); seq = seq_arr[i].seq; l_seq = seq_arr[i].l_seq; for (j = 0; j < l_seq; ++j) { seq[j] = seq[j] < 4 ? seq[j] : nst_nt4_table[(int)seq[j]]; } + PROF_END(tprof[T_GEN_SEQ][tid], gen_seq); + PROF_START(smem); find_smem(opt, bwt, l_seq, (uint8_t*)seq, aux, &smem_arr[i], tid); + PROF_END(tprof[T_SMEM][tid], smem); } - PROF_END(tprof[T_SEED_ALL][tid], seed_all); + PROF_END(tprof[T_SEEDING][tid], seeding); // 2. chain - PROF_START(chain_all); + PROF_START(chain); for (i = 0; i < nseq; ++i) { seq = seq_arr[i].seq; l_seq = seq_arr[i].l_seq; @@ -1400,15 +1417,16 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex* PROF_END(tprof[T_FLT_CHANNED_SEEDS][tid], flt_chained_seeds); if (bwa_verbose >= 4) mem_print_chain(bns, chnp); } - PROF_END(tprof[T_CHAIN_ALL][tid], chain_all); + PROF_END(tprof[T_CHAIN][tid], chain); } else { HybSeedArr* seeds = (HybSeedArr*)seed_arr; // 1. seeding - PROF_START(seed_all); + PROF_START(seeding); RangeArr read_ranges = {0}; RangeArr seeds_ranges = {0}; Range init_range = {0}; for (i = 0; i < nseq; ++i) { + PROF_START(gen_seq); uint8_t* reverse_seq = aux->reverse_seq->a; uint8_t* for_bits = aux->for_bits->a; uint8_t* back_bits = aux->back_bits->a; @@ -1441,14 +1459,17 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex* create_seq_fb_bits((uint8_t*)seq, l_seq, for_bits, back_bits); ReadSeq read_seq = {l_seq, (uint8_t*)seq, reverse_seq, for_bits, back_bits, aux->seq_id}; ++aux->seq_id; + PROF_END(tprof[T_GEN_SEQ][tid], gen_seq); + PROF_START(smem); hyb_seeding(opt, hyb, &read_seq, &read_ranges, &seeds_ranges, &seeds[i], aux->seq_id, tid); + PROF_END(tprof[T_SMEM][tid], smem); } kv_destroy(read_ranges); kv_destroy(seeds_ranges); - PROF_END(tprof[T_SEED_ALL][tid], seed_all); + PROF_END(tprof[T_SEEDING][tid], seeding); // 2. chain - PROF_START(chain_all); + PROF_START(chain); for (i = 0; i < nseq; ++i) { seq = seq_arr[i].seq; l_seq = seq_arr[i].l_seq; @@ -1465,11 +1486,11 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex* if (bwa_verbose >= 4) mem_print_chain(bns, chnp); } - PROF_END(tprof[T_CHAIN_ALL][tid], chain_all); + PROF_END(tprof[T_CHAIN][tid], chain); } // 3. align - PROF_START(aln_all); + PROF_START(extension); for (i = 0; i < nseq; ++i) { seq = seq_arr[i].seq; l_seq = seq_arr[i].l_seq; @@ -1481,14 +1502,18 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex* mem_chain_t* p = &chnp->a[j]; if (bwa_verbose >= 4) err_printf("* ---> Processing chain(%d) <---\n", j); + PROF_START(mem_chain2aln); mem_chain2aln(opt, bns, pac, l_seq, (uint8_t*)seq, p, regp, aux, tid); + PROF_END(tprof[T_MEM_CHAIN2ALN][tid], mem_chain2aln); free(chnp->a[j].seeds); } free(chnp->a); chnp->m = 0; chnp->a = 0; + PROF_START(sort_dedup); regp->n = mem_sort_dedup_patch(opt, bns, pac, (uint8_t*)seq, regp->n, regp->a); + PROF_END(tprof[T_SORT_DEDUP][tid], sort_dedup); if (bwa_verbose >= 4) { err_printf("* %ld chains remain after removing duplicated chains\n", regp->n); for (j = 0; j < regp->n; ++j) { @@ -1502,7 +1527,7 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex* p->is_alt = 1; } } - PROF_END(tprof[T_ALN_ALL][tid], aln_all); + PROF_END(tprof[T_EXTENSION][tid], extension); // 4. calc insert size #define MIN_RATIO 0.8 @@ -1530,7 +1555,7 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex* } } -static void worker_smem_align(void *data, long i, int tid) +static void worker_smem_extension(void *data, long i, int tid) { mem_worker_t *w = (mem_worker_t*)data; int start = i * w->opt->batch_size; @@ -1584,21 +1609,21 @@ void mem_process_seqs(const mem_opt_t* opt, mem_worker_t* w, int64_t n_processed for (j = 0; j < 4; ++j) w->isize_arr[i][j].n = 0; } - PROF_START(kernel); - kt_for(opt->n_threads, worker_smem_align, w, n_batch); // find mapping positions - PROF_END(gprof[G_MEM_KERNEL], kernel); + PROF_START(seed_and_ext); + kt_for(opt->n_threads, worker_smem_extension, w, n_batch); // find mapping positions + PROF_END(gprof[G_SEED_AND_EXT], seed_and_ext); - PROF_START(pestat); + PROF_START(pestat); if (opt->flag&MEM_F_PE) { // infer insert sizes if not provided if (pes0) memcpy(pes, pes0, 4 * sizeof(mem_pestat_t)); // if pes0 != NULL, set the insert-size distribution as pes0 else mem_pestat(opt, w->bns->l_pac, n, w->isize_arr, pes); // otherwise, infer the insert size distribution from data } PROF_END(gprof[G_MEM_PESTAT], pestat); - PROF_START(mem_sam); + PROF_START(gen_sam); kt_for(opt->n_threads, worker_sam, w, (opt->flag & MEM_F_PE) ? n >> 1 : n); // generate alignment - PROF_END(gprof[G_MEM_SAM], mem_sam); + PROF_END(gprof[G_GEN_SAM], gen_sam); - if (bwa_verbose >= 3) + if (bwa_verbose >= 3) fprintf(stderr, "[M::%s] Processed %d reads in %.3f CPU sec, %.3f real sec\n", __func__, n, cputime() - ctime, realtime() - rtime); } diff --git a/fastmap.c b/fastmap.c index ede9d4b..e706e8a 100644 --- a/fastmap.c +++ b/fastmap.c @@ -306,7 +306,7 @@ static void update_a(mem_opt_t *opt, const mem_opt_t *opt0) int main_mem(int argc, char *argv[]) { - mem_opt_t *opt, opt0; + mem_opt_t *opt, opt0; int fd, fd2, i, c, ignore_alt = 0, no_mt_io = 0; int fixed_chunk_size = -1; gzFile fp, fp2 = 0; @@ -648,6 +648,8 @@ int main_mem(int argc, char *argv[]) close_files(); #endif + PROF_END(gprof[G_ALL], all); + return 0; } diff --git a/profiling.c b/profiling.c index 4ca819f..9352582 100644 --- a/profiling.c +++ b/profiling.c @@ -68,70 +68,86 @@ uint64_t get_sum(uint64_t *a, int len) { int display_stats(int nthreads) { #ifdef SHOW_PERF - double avg, max, min; - int i; - fprintf(stderr, "[steps in main_mem]\n"); - fprintf(stderr, "time_parse_arg: %0.2lf s\n", gprof[G_PREPARE] * 1.0 / proc_freq); - fprintf(stderr, "time_load_idx: %0.2lf s\n", gprof[G_LOAD_IDX] * 1.0 / proc_freq); - fprintf(stderr, "time_pipeline: %0.2lf s\n", gprof[G_PIPELINE] * 1.0 / proc_freq); - fprintf(stderr, "time_all: %0.2lf s\n", gprof[G_ALL] * 1.0 / proc_freq); - - fprintf(stderr, "\n[steps in pipeline]\n"); - fprintf(stderr, "time_read: %0.2lf s\n", gprof[G_READ] * 1.0 / proc_freq); - fprintf(stderr, "time_compute: %0.2lf s\n", gprof[G_COMPUTE] * 1.0 / proc_freq); - fprintf(stderr, "time_write: %0.2lf s\n", gprof[G_WRITE] * 1.0 / proc_freq); +#define FORMAT_PERF_OUT(name, sec, layer) \ + do { \ + int i = 0; \ + for (i = 0; i < layer; i++) fprintf(stderr, " "); \ + fprintf(stderr, "%s: %0.2lf s\n", name, sec); \ + } while (0) - fprintf(stderr, "\n[steps in mem_process_seqs]\n"); - fprintf(stderr, "time_mem_prepare: %0.2lf s\n", gprof[G_MEM_PREPARE] * 1.0 / proc_freq); - fprintf(stderr, "time_mem_kernel: %0.2lf s\n", gprof[G_MEM_KERNEL] * 1.0 / proc_freq); - fprintf(stderr, "time_mem_pestat: %0.2lf s\n", gprof[G_MEM_PESTAT] * 1.0 / proc_freq); - fprintf(stderr, "time_mem_sam: %0.2lf s\n", gprof[G_MEM_SAM] * 1.0 / proc_freq); +#define FORMAT_PERF_OUT_3(name, sec_arr, layer) \ + do { \ + int i = 0; \ + double avg, max, min; \ + for (i = 0; i < layer; i++) fprintf(stderr, " "); \ + find_opt(sec_arr, nthreads, &max, &min, &avg); \ + fprintf(stderr, "%s: %0.2lf (%0.2lf, %0.2lf) s\n", name, avg, max, min); \ + } while (0) - fprintf(stderr, "\n[steps in kernel]\n"); - find_opt(tprof[T_SEED_ALL], nthreads, &max, &min, &avg); - fprintf(stderr, "time_seed_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min); - find_opt(tprof[T_CHAIN_ALL], nthreads, &max, &min, &avg); - fprintf(stderr, "time_chain_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min); - find_opt(tprof[T_ALN_ALL], nthreads, &max, &min, &avg); - fprintf(stderr, "time_aln_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min); - find_opt(tprof[T_INS_SIZE], nthreads, &max, &min, &avg); - fprintf(stderr, "time_ins_size_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min); +#define FORMAT_PERF_OUT_SUM_3(name, sec_arr, sec_arr1, layer) \ + do { \ + int i = 0; \ + double avg, max, min, avg1, max1, min1; \ + for (i = 0; i < layer; i++) fprintf(stderr, " "); \ + find_opt(sec_arr, nthreads, &max, &min, &avg); \ + find_opt(sec_arr1, nthreads, &max1, &min1, &avg1); \ + fprintf(stderr, "%s: %0.2lf (%0.2lf, %0.2lf) s\n", name, avg + avg1, max + max1, min + min1); \ + } while (0) - fprintf(stderr, "\n[steps in seeding]\n"); - find_opt(tprof[T_SEED_1], nthreads, &max, &min, &avg); - fprintf(stderr, "time_seed_1: %0.2lf s %0.2lf s %0.2lf s\n", max, min, avg); - find_opt(tprof[T_SEED_2], nthreads, &max, &min, &avg); - fprintf(stderr, "time_seed_2: %0.2lf s\n", avg); - find_opt(tprof[T_SEED_3], nthreads, &max, &min, &avg); - fprintf(stderr, "time_seed_3: %0.2lf s\n", avg); - - fprintf(stderr, "\n[steps in chain]\n"); - find_opt(tprof[T_GEN_CHAIN], nthreads, &max, &min, &avg); - fprintf(stderr, "time_gen_chain: %0.2lf s\n", avg); - find_opt(tprof[T_FLT_CHAIN], nthreads, &max, &min, &avg); - fprintf(stderr, "time_flt_chain: %0.2lf s\n", avg); - find_opt(tprof[T_FLT_CHANNED_SEEDS], nthreads, &max, &min, &avg); - fprintf(stderr, "time_flt_chained_seeds: %0.2lf s\n", avg); - find_opt(tprof[T_SAL], nthreads, &max, &min, &avg); - fprintf(stderr, "time_sal: %0.2lf s\n", avg); - find_opt(tprof[T_BSW], nthreads, &max, &min, &avg); - fprintf(stderr, "time_bsw: %0.2lf s\n", avg); +#define FORMAT_PERF_OUT_SUB_3(name, sec_arr, sec_arr1, layer) \ + do { \ + int i = 0; \ + double avg, max, min, avg1, max1, min1; \ + for (i = 0; i < layer; i++) fprintf(stderr, " "); \ + find_opt(sec_arr, nthreads, &max, &min, &avg); \ + find_opt(sec_arr1, nthreads, &max1, &min1, &avg1); \ + fprintf(stderr, "%s: %0.2lf (%0.2lf, %0.2lf) s\n", name, avg - avg1, max - max1, min - min1); \ + } while (0) - fprintf(stderr, "\n[steps in gen sam]\n"); - find_opt(tprof[T_SAM_MATESW], nthreads, &max, &min, &avg); - fprintf(stderr, "time_mate_sw: %0.2lf s\n", avg); - find_opt(tprof[T_KSW_ALIGN2], nthreads, &max, &min, &avg); - fprintf(stderr, "time_ksw_align2: %0.2lf s\n", avg); - find_opt(tprof[T_KSW_LOOP], nthreads, &max, &min, &avg); - fprintf(stderr, "time_ksw_loop: %0.2lf s\n", avg); - find_opt(tprof[T_KSW_REVERSE], nthreads, &max, &min, &avg); - fprintf(stderr, "time_ksw_reverse: %0.2lf s\n", avg); - find_opt(tprof[T_SAM_REG2ALN], nthreads, &max, &min, &avg); - fprintf(stderr, "time_reg2aln: %0.2lf s\n", avg); + // for overall pipeline + FORMAT_PERF_OUT("all", gprof[G_ALL] * 1.0 / proc_freq, 0); + FORMAT_PERF_OUT("load_idx", gprof[G_LOAD_IDX] * 1.0 / proc_freq, 1); + FORMAT_PERF_OUT("pipeline", gprof[G_PIPELINE] * 1.0 / proc_freq, 1); + FORMAT_PERF_OUT("read", gprof[G_READ] * 1.0 / proc_freq, 2); + FORMAT_PERF_OUT("uncompress", gprof[G_UNCOMPRESS] * 1.0 / proc_freq, 3); + FORMAT_PERF_OUT("compute", gprof[G_COMPUTE] * 1.0 / proc_freq, 2); + FORMAT_PERF_OUT("seeding-extension", gprof[G_SEED_AND_EXT] * 1.0 / proc_freq, 3); + FORMAT_PERF_OUT("gen-sam", gprof[G_GEN_SAM] * 1.0 / proc_freq, 3); + FORMAT_PERF_OUT("write", gprof[G_WRITE] * 1.0 / proc_freq, 2); + + fprintf(stderr, "\n"); + + // for seeding + FORMAT_PERF_OUT_SUM_3("seed-chain", tprof[T_SEEDING], tprof[T_CHAIN], 0); + FORMAT_PERF_OUT_3("seeding", tprof[T_SEEDING], 1); + FORMAT_PERF_OUT_3("gen-seq", tprof[T_GEN_SEQ], 2); + FORMAT_PERF_OUT_3("smem", tprof[T_SMEM], 2); + FORMAT_PERF_OUT_3("seeding-1", tprof[T_SEED_1], 3); + FORMAT_PERF_OUT_3("seeding-2", tprof[T_SEED_2], 3); + FORMAT_PERF_OUT_3("seeding-3", tprof[T_SEED_3], 3); + FORMAT_PERF_OUT_3("chain", tprof[T_CHAIN], 1); + FORMAT_PERF_OUT_3("gen-chain", tprof[T_GEN_CHAIN], 2); + FORMAT_PERF_OUT_3("sa", tprof[T_SA], 3); + FORMAT_PERF_OUT_3("flt-chain", tprof[T_FLT_CHAIN],2); + FORMAT_PERF_OUT_SUM_3("smem + sa", tprof[T_SMEM], tprof[T_SA], 1); + + fprintf(stderr, "\n"); + + // for extension + FORMAT_PERF_OUT_3("extension", tprof[T_EXTENSION], 0); + FORMAT_PERF_OUT_3("mem_chain2aln", tprof[T_MEM_CHAIN2ALN], 1); + FORMAT_PERF_OUT_3("bsw_ext", tprof[T_BSW_EXT], 2); + FORMAT_PERF_OUT_SUB_3("other", tprof[T_MEM_CHAIN2ALN], tprof[T_BSW_EXT], 2); + FORMAT_PERF_OUT_3("sort_dedup", tprof[T_SORT_DEDUP], 1); + + // for gen-sam + FORMAT_PERF_OUT("gen-sam", gprof[G_GEN_SAM] * 1.0 / proc_freq, 0); + FORMAT_PERF_OUT_3("sam_mate_sw", tprof[T_SAM_MATESW], 1); + FORMAT_PERF_OUT_3("sam_reg2aln", tprof[T_SAM_REG2ALN], 1); + +#if 0 - fprintf(stderr, "time_ksw_loop: %0.2lf s\n", gprof[G_KSW_LOOP] * 1.0 / proc_freq); - fprintf(stderr, "time_ksw_end_loop: %0.2lf s\n", gprof[G_KSW_END_LOOP] * 1.0 / proc_freq); #if SHOW_DATA_PERF fprintf(stderr, "seq num: %ld\n", gdat[0]); @@ -184,6 +200,7 @@ int display_stats(int nthreads) // PRINT_SEED_TIME(3_3_2); #endif double all = 0; + int i; for (i = 0; i < 50; ++i) { //all += sum(tdat[i], nthreads); // fprintf(stderr, "sum %d: %ld\n", i, sum(tdat[i], nthreads)); @@ -217,8 +234,11 @@ int display_stats(int nthreads) // fprintf(stderr, "%ld,\n", sum(tdat[i], nthreads)); // } - fprintf(stderr, "\n"); + #endif - return 1; +#endif + + fprintf(stderr, "\n"); + return 0; } diff --git a/profiling.h b/profiling.h index 287eb5c..46c8af8 100644 --- a/profiling.h +++ b/profiling.h @@ -60,34 +60,42 @@ enum { // GLOBAL enum { G_ALL = 0, + G_LOAD_IDX, G_PIPELINE, G_READ, - G_WRITE, G_COMPUTE, - G_PREPARE, - G_LOAD_IDX, - G_MEM_PREPARE, - G_MEM_KERNEL, + G_WRITE, + G_SEED_AND_EXT, G_MEM_PESTAT, - G_MEM_SAM, - G_KSW_LOOP, - G_KSW_END_LOOP, - G_read_seq + G_GEN_SAM, + G_UNCOMPRESS }; // THREAD enum { - T_SEED_ALL = 0, - T_CHAIN_ALL, - T_ALN_ALL, - T_INS_SIZE, + T_SEEDING = 0, + T_GEN_SEQ, + T_SMEM, T_SEED_1, T_SEED_2, T_SEED_3, - T_SAL, + T_CHAIN, T_GEN_CHAIN, + T_SA, T_FLT_CHAIN, T_FLT_CHANNED_SEEDS, + T_EXTENSION, + T_MEM_CHAIN2ALN, + T_BSW_EXT, + T_SORT_DEDUP, + T_GEN_SAM, + T_MEM_REG2ALN, + + + T_CHAIN_ALL, + T_ALN_ALL, + T_INS_SIZE, + T_SAL, T_READ_SA, T_BSW, T_BSW_ALL, diff --git a/utils.c b/utils.c index a5591e8..69454eb 100644 --- a/utils.c +++ b/utils.c @@ -161,7 +161,7 @@ size_t err_fread_noeof(void *ptr, size_t size, size_t nmemb, FILE *stream) int err_gzread(gzFile file, void *ptr, unsigned int len) { int ret = 0; - PROF_START(read); + PROF_START(uncompress); #ifdef USE_ASYNC_READ khiter_t k = kh_get(fkv, fHash, (int64_t)file); FileKV* val = &kh_value(fHash, k); @@ -189,7 +189,7 @@ int err_gzread(gzFile file, void *ptr, unsigned int len) #else ret = gzread(file, ptr, len); #endif - PROF_END(gprof[G_read_seq], read); + PROF_END(gprof[G_UNCOMPRESS], uncompress); if (ret < 0) {