更新了性能测试
This commit is contained in:
parent
70979c1b60
commit
49028547ac
|
|
@ -3,6 +3,7 @@ bwa
|
|||
test
|
||||
test64
|
||||
.*.swp
|
||||
*.log
|
||||
Makefile.bak
|
||||
bwamem-lite
|
||||
test_index/
|
||||
|
|
|
|||
67
bwamem.c
67
bwamem.c
|
|
@ -184,6 +184,7 @@ static void mem_collect_intv(const mem_opt_t* opt, const bwt_t* bwt, int len, co
|
|||
int split_len = (int)(opt->min_seed_len * opt->split_factor + .499);
|
||||
smem->mem.n = 0;
|
||||
// first pass: find all SMEMs
|
||||
PROF_START(seed_1);
|
||||
while (x < len) {
|
||||
if (seq[x] < 4) {
|
||||
x = bwt_smem1(bwt, len, seq, x, start_width, &a->mem1, a->tmpv);
|
||||
|
|
@ -195,7 +196,10 @@ static void mem_collect_intv(const mem_opt_t* opt, const bwt_t* bwt, int len, co
|
|||
}
|
||||
} else ++x;
|
||||
}
|
||||
PROF_END(tprof[T_SEED_1][tid], seed_1);
|
||||
|
||||
// second pass: find MEMs inside a long SMEM
|
||||
PROF_START(seed_2);
|
||||
old_n = smem->mem.n;
|
||||
for (k = 0; k < old_n; ++k) {
|
||||
bwtintv_t *p = &smem->mem.a[k];
|
||||
|
|
@ -206,7 +210,10 @@ static void mem_collect_intv(const mem_opt_t* opt, const bwt_t* bwt, int len, co
|
|||
if ((uint32_t)a->mem1.a[i].info - (a->mem1.a[i].info>>32) >= opt->min_seed_len)
|
||||
kv_push(bwtintv_t, smem->mem, a->mem1.a[i]);
|
||||
}
|
||||
PROF_END(tprof[T_SEED_2][tid], seed_2);
|
||||
|
||||
// third pass: LAST-like
|
||||
PROF_START(seed_3);
|
||||
if (opt->max_mem_intv > 0) {
|
||||
x = 0;
|
||||
while (x < len) {
|
||||
|
|
@ -223,6 +230,7 @@ static void mem_collect_intv(const mem_opt_t* opt, const bwt_t* bwt, int len, co
|
|||
} else ++x;
|
||||
}
|
||||
}
|
||||
PROF_END(tprof[T_SEED_3][tid], seed_3);
|
||||
// sort
|
||||
ks_introsort(mem_intv, smem->mem.n, smem->mem.a);
|
||||
}
|
||||
|
|
@ -246,9 +254,9 @@ static void hyb_seeding(const mem_opt_t* opt, const HybridIndex* hyb, ReadSeq* r
|
|||
|
||||
// fprintf(stderr, "seq-id: %ld\n", seq_id);
|
||||
|
||||
if (seq_id == 4) {
|
||||
fprintf(stderr, "seq-id: %ld\n", seq_id);
|
||||
}
|
||||
//if (seq_id == 4) {
|
||||
// fprintf(stderr, "seq-id: %ld\n", seq_id);
|
||||
//}
|
||||
|
||||
// 1. seeding-1: find all SMEMs
|
||||
PROF_START(seed_1);
|
||||
|
|
@ -260,8 +268,7 @@ static void hyb_seeding(const mem_opt_t* opt, const HybridIndex* hyb, ReadSeq* r
|
|||
hyb_first_seeding(hyb, read_seq, &range, opt->min_seed_len, seeds, tid);
|
||||
seeds_ranges->a[i].end = seeds->n;
|
||||
}
|
||||
|
||||
tprof[T_SEED_LEN][tid] += seeds->n;
|
||||
//tprof[T_SEED_LEN][tid] += seeds->n;
|
||||
PROF_END(tprof[T_SEED_1][tid], seed_1);
|
||||
#if 1
|
||||
// 2. seeding-2: find MEMs inside a long SMEM
|
||||
|
|
@ -422,7 +429,9 @@ void generate_chain(const mem_opt_t* opt, const bwt_t* bwt, const bntseq_t* bns,
|
|||
mem_chain_t tmp, *lower, *upper;
|
||||
mem_seed_t s;
|
||||
int rid, to_add = 0;
|
||||
PROF_START(sa);
|
||||
s.rbeg = tmp.pos = bwt_sa(bwt, p->x[0] + k); // this is the base coordinate in the forward-reverse reference
|
||||
PROF_END(tprof[T_SA][tid], sa);
|
||||
s.qbeg = p->info>>32;
|
||||
s.score= s.len = slen;
|
||||
rid = bns_intv2rid(bns, s.rbeg, s.rbeg + s.len);
|
||||
|
|
@ -932,12 +941,14 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
|
|||
printf("*** Left ref: "); for (j = 0; j < tmp; ++j) putchar("ACGTN"[(int)rseq[tmp - 1 - j]]); putchar('\n');
|
||||
printf("*** Left query: "); for (j = 0; j < s->qbeg; ++j) putchar("ACGTN"[(int)query[s->qbeg - 1 - j]]); putchar('\n');
|
||||
}
|
||||
PROF_START(bsw_ext);
|
||||
#ifndef USE_AVX2_EXT
|
||||
a->score = ksw_extend2(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[0], opt->pen_clip5, opt->zdrop, s->len * opt->a, &qle, &tle, >le, &gscore, &max_off[0]);
|
||||
#else
|
||||
a->score = ksw_extend2_avx2(s->qbeg, query, tmp, rseq, 1, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->a, opt->b,
|
||||
aw[0], opt->pen_clip5, opt->zdrop, s->len * opt->a, &qle, &tle, >le, &gscore, &max_off[0], aux->sw_buf);
|
||||
#endif
|
||||
PROF_END(tprof[T_BSW_EXT][tid], bsw_ext);
|
||||
if (bwa_verbose >= 4) { printf("*** Left extension: prev_score=%d; score=%d; bandwidth=%d; max_off_diagonal_dist=%d\n", prev, a->score, aw[0], max_off[0]); fflush(stdout); }
|
||||
if (a->score == prev || max_off[0] < (aw[0]>>1) + (aw[0]>>2)) break;
|
||||
}
|
||||
|
|
@ -967,11 +978,13 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
|
|||
printf("*** Right ref: "); for (j = 0; j < rmax[1] - rmax[0] - re; ++j) putchar("ACGTN"[(int)rseq[re+j]]); putchar('\n');
|
||||
printf("*** Right query: "); for (j = 0; j < l_query - qe; ++j) putchar("ACGTN"[(int)query[qe+j]]); putchar('\n');
|
||||
}
|
||||
PROF_START(bsw_ext);
|
||||
#ifndef USE_AVX2_EXT
|
||||
a->score = ksw_extend2(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[1], opt->pen_clip3, opt->zdrop, sc0, &qle, &tle, >le, &gscore, &max_off[1]);
|
||||
#else
|
||||
a->score = ksw_extend2_avx2(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 0, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->a, opt->b, aw[1], opt->pen_clip3, opt->zdrop, sc0, &qle, &tle, >le, &gscore, &max_off[1], aux->sw_buf);
|
||||
#endif
|
||||
PROF_END(tprof[T_BSW_EXT][tid], bsw_ext);
|
||||
if (bwa_verbose >= 4) { printf("*** Right extension: prev_score=%d; score=%d; bandwidth=%d; max_off_diagonal_dist=%d\n", prev, a->score, aw[1], max_off[1]); fflush(stdout); }
|
||||
if (a->score == prev || max_off[1] < (aw[1]>>1) + (aw[1]>>2)) break;
|
||||
}
|
||||
|
|
@ -1372,19 +1385,23 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex*
|
|||
if (opt->use_bwt) {
|
||||
smem_v *smem_arr = (smem_v*)seed_arr;
|
||||
// 1. seeding
|
||||
PROF_START(seed_all);
|
||||
PROF_START(seeding);
|
||||
for (i = 0; i < nseq; ++i) {
|
||||
PROF_START(gen_seq);
|
||||
seq = seq_arr[i].seq;
|
||||
l_seq = seq_arr[i].l_seq;
|
||||
for (j = 0; j < l_seq; ++j) {
|
||||
seq[j] = seq[j] < 4 ? seq[j] : nst_nt4_table[(int)seq[j]];
|
||||
}
|
||||
PROF_END(tprof[T_GEN_SEQ][tid], gen_seq);
|
||||
PROF_START(smem);
|
||||
find_smem(opt, bwt, l_seq, (uint8_t*)seq, aux, &smem_arr[i], tid);
|
||||
PROF_END(tprof[T_SMEM][tid], smem);
|
||||
}
|
||||
PROF_END(tprof[T_SEED_ALL][tid], seed_all);
|
||||
PROF_END(tprof[T_SEEDING][tid], seeding);
|
||||
|
||||
// 2. chain
|
||||
PROF_START(chain_all);
|
||||
PROF_START(chain);
|
||||
for (i = 0; i < nseq; ++i) {
|
||||
seq = seq_arr[i].seq;
|
||||
l_seq = seq_arr[i].l_seq;
|
||||
|
|
@ -1400,15 +1417,16 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex*
|
|||
PROF_END(tprof[T_FLT_CHANNED_SEEDS][tid], flt_chained_seeds);
|
||||
if (bwa_verbose >= 4) mem_print_chain(bns, chnp);
|
||||
}
|
||||
PROF_END(tprof[T_CHAIN_ALL][tid], chain_all);
|
||||
PROF_END(tprof[T_CHAIN][tid], chain);
|
||||
} else {
|
||||
HybSeedArr* seeds = (HybSeedArr*)seed_arr;
|
||||
// 1. seeding
|
||||
PROF_START(seed_all);
|
||||
PROF_START(seeding);
|
||||
RangeArr read_ranges = {0};
|
||||
RangeArr seeds_ranges = {0};
|
||||
Range init_range = {0};
|
||||
for (i = 0; i < nseq; ++i) {
|
||||
PROF_START(gen_seq);
|
||||
uint8_t* reverse_seq = aux->reverse_seq->a;
|
||||
uint8_t* for_bits = aux->for_bits->a;
|
||||
uint8_t* back_bits = aux->back_bits->a;
|
||||
|
|
@ -1441,14 +1459,17 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex*
|
|||
create_seq_fb_bits((uint8_t*)seq, l_seq, for_bits, back_bits);
|
||||
ReadSeq read_seq = {l_seq, (uint8_t*)seq, reverse_seq, for_bits, back_bits, aux->seq_id};
|
||||
++aux->seq_id;
|
||||
PROF_END(tprof[T_GEN_SEQ][tid], gen_seq);
|
||||
PROF_START(smem);
|
||||
hyb_seeding(opt, hyb, &read_seq, &read_ranges, &seeds_ranges, &seeds[i], aux->seq_id, tid);
|
||||
PROF_END(tprof[T_SMEM][tid], smem);
|
||||
}
|
||||
kv_destroy(read_ranges);
|
||||
kv_destroy(seeds_ranges);
|
||||
PROF_END(tprof[T_SEED_ALL][tid], seed_all);
|
||||
PROF_END(tprof[T_SEEDING][tid], seeding);
|
||||
|
||||
// 2. chain
|
||||
PROF_START(chain_all);
|
||||
PROF_START(chain);
|
||||
for (i = 0; i < nseq; ++i) {
|
||||
seq = seq_arr[i].seq;
|
||||
l_seq = seq_arr[i].l_seq;
|
||||
|
|
@ -1465,11 +1486,11 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex*
|
|||
if (bwa_verbose >= 4)
|
||||
mem_print_chain(bns, chnp);
|
||||
}
|
||||
PROF_END(tprof[T_CHAIN_ALL][tid], chain_all);
|
||||
PROF_END(tprof[T_CHAIN][tid], chain);
|
||||
}
|
||||
|
||||
// 3. align
|
||||
PROF_START(aln_all);
|
||||
PROF_START(extension);
|
||||
for (i = 0; i < nseq; ++i) {
|
||||
seq = seq_arr[i].seq;
|
||||
l_seq = seq_arr[i].l_seq;
|
||||
|
|
@ -1481,14 +1502,18 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex*
|
|||
mem_chain_t* p = &chnp->a[j];
|
||||
if (bwa_verbose >= 4)
|
||||
err_printf("* ---> Processing chain(%d) <---\n", j);
|
||||
PROF_START(mem_chain2aln);
|
||||
mem_chain2aln(opt, bns, pac, l_seq, (uint8_t*)seq, p, regp, aux, tid);
|
||||
PROF_END(tprof[T_MEM_CHAIN2ALN][tid], mem_chain2aln);
|
||||
free(chnp->a[j].seeds);
|
||||
}
|
||||
|
||||
free(chnp->a);
|
||||
chnp->m = 0;
|
||||
chnp->a = 0;
|
||||
PROF_START(sort_dedup);
|
||||
regp->n = mem_sort_dedup_patch(opt, bns, pac, (uint8_t*)seq, regp->n, regp->a);
|
||||
PROF_END(tprof[T_SORT_DEDUP][tid], sort_dedup);
|
||||
if (bwa_verbose >= 4) {
|
||||
err_printf("* %ld chains remain after removing duplicated chains\n", regp->n);
|
||||
for (j = 0; j < regp->n; ++j) {
|
||||
|
|
@ -1502,7 +1527,7 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex*
|
|||
p->is_alt = 1;
|
||||
}
|
||||
}
|
||||
PROF_END(tprof[T_ALN_ALL][tid], aln_all);
|
||||
PROF_END(tprof[T_EXTENSION][tid], extension);
|
||||
|
||||
// 4. calc insert size
|
||||
#define MIN_RATIO 0.8
|
||||
|
|
@ -1530,7 +1555,7 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex*
|
|||
}
|
||||
}
|
||||
|
||||
static void worker_smem_align(void *data, long i, int tid)
|
||||
static void worker_smem_extension(void *data, long i, int tid)
|
||||
{
|
||||
mem_worker_t *w = (mem_worker_t*)data;
|
||||
int start = i * w->opt->batch_size;
|
||||
|
|
@ -1584,9 +1609,9 @@ void mem_process_seqs(const mem_opt_t* opt, mem_worker_t* w, int64_t n_processed
|
|||
for (j = 0; j < 4; ++j) w->isize_arr[i][j].n = 0;
|
||||
}
|
||||
|
||||
PROF_START(kernel);
|
||||
kt_for(opt->n_threads, worker_smem_align, w, n_batch); // find mapping positions
|
||||
PROF_END(gprof[G_MEM_KERNEL], kernel);
|
||||
PROF_START(seed_and_ext);
|
||||
kt_for(opt->n_threads, worker_smem_extension, w, n_batch); // find mapping positions
|
||||
PROF_END(gprof[G_SEED_AND_EXT], seed_and_ext);
|
||||
|
||||
PROF_START(pestat);
|
||||
if (opt->flag&MEM_F_PE) { // infer insert sizes if not provided
|
||||
|
|
@ -1595,9 +1620,9 @@ void mem_process_seqs(const mem_opt_t* opt, mem_worker_t* w, int64_t n_processed
|
|||
}
|
||||
PROF_END(gprof[G_MEM_PESTAT], pestat);
|
||||
|
||||
PROF_START(mem_sam);
|
||||
PROF_START(gen_sam);
|
||||
kt_for(opt->n_threads, worker_sam, w, (opt->flag & MEM_F_PE) ? n >> 1 : n); // generate alignment
|
||||
PROF_END(gprof[G_MEM_SAM], mem_sam);
|
||||
PROF_END(gprof[G_GEN_SAM], gen_sam);
|
||||
|
||||
if (bwa_verbose >= 3)
|
||||
fprintf(stderr, "[M::%s] Processed %d reads in %.3f CPU sec, %.3f real sec\n", __func__, n, cputime() - ctime, realtime() - rtime);
|
||||
|
|
|
|||
|
|
@ -648,6 +648,8 @@ int main_mem(int argc, char *argv[])
|
|||
close_files();
|
||||
#endif
|
||||
|
||||
PROF_END(gprof[G_ALL], all);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
136
profiling.c
136
profiling.c
|
|
@ -68,70 +68,86 @@ uint64_t get_sum(uint64_t *a, int len) {
|
|||
int display_stats(int nthreads)
|
||||
{
|
||||
#ifdef SHOW_PERF
|
||||
double avg, max, min;
|
||||
int i;
|
||||
|
||||
fprintf(stderr, "[steps in main_mem]\n");
|
||||
fprintf(stderr, "time_parse_arg: %0.2lf s\n", gprof[G_PREPARE] * 1.0 / proc_freq);
|
||||
fprintf(stderr, "time_load_idx: %0.2lf s\n", gprof[G_LOAD_IDX] * 1.0 / proc_freq);
|
||||
fprintf(stderr, "time_pipeline: %0.2lf s\n", gprof[G_PIPELINE] * 1.0 / proc_freq);
|
||||
fprintf(stderr, "time_all: %0.2lf s\n", gprof[G_ALL] * 1.0 / proc_freq);
|
||||
#define FORMAT_PERF_OUT(name, sec, layer) \
|
||||
do { \
|
||||
int i = 0; \
|
||||
for (i = 0; i < layer; i++) fprintf(stderr, " "); \
|
||||
fprintf(stderr, "%s: %0.2lf s\n", name, sec); \
|
||||
} while (0)
|
||||
|
||||
fprintf(stderr, "\n[steps in pipeline]\n");
|
||||
fprintf(stderr, "time_read: %0.2lf s\n", gprof[G_READ] * 1.0 / proc_freq);
|
||||
fprintf(stderr, "time_compute: %0.2lf s\n", gprof[G_COMPUTE] * 1.0 / proc_freq);
|
||||
fprintf(stderr, "time_write: %0.2lf s\n", gprof[G_WRITE] * 1.0 / proc_freq);
|
||||
#define FORMAT_PERF_OUT_3(name, sec_arr, layer) \
|
||||
do { \
|
||||
int i = 0; \
|
||||
double avg, max, min; \
|
||||
for (i = 0; i < layer; i++) fprintf(stderr, " "); \
|
||||
find_opt(sec_arr, nthreads, &max, &min, &avg); \
|
||||
fprintf(stderr, "%s: %0.2lf (%0.2lf, %0.2lf) s\n", name, avg, max, min); \
|
||||
} while (0)
|
||||
|
||||
fprintf(stderr, "\n[steps in mem_process_seqs]\n");
|
||||
fprintf(stderr, "time_mem_prepare: %0.2lf s\n", gprof[G_MEM_PREPARE] * 1.0 / proc_freq);
|
||||
fprintf(stderr, "time_mem_kernel: %0.2lf s\n", gprof[G_MEM_KERNEL] * 1.0 / proc_freq);
|
||||
fprintf(stderr, "time_mem_pestat: %0.2lf s\n", gprof[G_MEM_PESTAT] * 1.0 / proc_freq);
|
||||
fprintf(stderr, "time_mem_sam: %0.2lf s\n", gprof[G_MEM_SAM] * 1.0 / proc_freq);
|
||||
#define FORMAT_PERF_OUT_SUM_3(name, sec_arr, sec_arr1, layer) \
|
||||
do { \
|
||||
int i = 0; \
|
||||
double avg, max, min, avg1, max1, min1; \
|
||||
for (i = 0; i < layer; i++) fprintf(stderr, " "); \
|
||||
find_opt(sec_arr, nthreads, &max, &min, &avg); \
|
||||
find_opt(sec_arr1, nthreads, &max1, &min1, &avg1); \
|
||||
fprintf(stderr, "%s: %0.2lf (%0.2lf, %0.2lf) s\n", name, avg + avg1, max + max1, min + min1); \
|
||||
} while (0)
|
||||
|
||||
fprintf(stderr, "\n[steps in kernel]\n");
|
||||
find_opt(tprof[T_SEED_ALL], nthreads, &max, &min, &avg);
|
||||
fprintf(stderr, "time_seed_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min);
|
||||
find_opt(tprof[T_CHAIN_ALL], nthreads, &max, &min, &avg);
|
||||
fprintf(stderr, "time_chain_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min);
|
||||
find_opt(tprof[T_ALN_ALL], nthreads, &max, &min, &avg);
|
||||
fprintf(stderr, "time_aln_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min);
|
||||
find_opt(tprof[T_INS_SIZE], nthreads, &max, &min, &avg);
|
||||
fprintf(stderr, "time_ins_size_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min);
|
||||
#define FORMAT_PERF_OUT_SUB_3(name, sec_arr, sec_arr1, layer) \
|
||||
do { \
|
||||
int i = 0; \
|
||||
double avg, max, min, avg1, max1, min1; \
|
||||
for (i = 0; i < layer; i++) fprintf(stderr, " "); \
|
||||
find_opt(sec_arr, nthreads, &max, &min, &avg); \
|
||||
find_opt(sec_arr1, nthreads, &max1, &min1, &avg1); \
|
||||
fprintf(stderr, "%s: %0.2lf (%0.2lf, %0.2lf) s\n", name, avg - avg1, max - max1, min - min1); \
|
||||
} while (0)
|
||||
|
||||
fprintf(stderr, "\n[steps in seeding]\n");
|
||||
find_opt(tprof[T_SEED_1], nthreads, &max, &min, &avg);
|
||||
fprintf(stderr, "time_seed_1: %0.2lf s %0.2lf s %0.2lf s\n", max, min, avg);
|
||||
find_opt(tprof[T_SEED_2], nthreads, &max, &min, &avg);
|
||||
fprintf(stderr, "time_seed_2: %0.2lf s\n", avg);
|
||||
find_opt(tprof[T_SEED_3], nthreads, &max, &min, &avg);
|
||||
fprintf(stderr, "time_seed_3: %0.2lf s\n", avg);
|
||||
// for overall pipeline
|
||||
FORMAT_PERF_OUT("all", gprof[G_ALL] * 1.0 / proc_freq, 0);
|
||||
FORMAT_PERF_OUT("load_idx", gprof[G_LOAD_IDX] * 1.0 / proc_freq, 1);
|
||||
FORMAT_PERF_OUT("pipeline", gprof[G_PIPELINE] * 1.0 / proc_freq, 1);
|
||||
FORMAT_PERF_OUT("read", gprof[G_READ] * 1.0 / proc_freq, 2);
|
||||
FORMAT_PERF_OUT("uncompress", gprof[G_UNCOMPRESS] * 1.0 / proc_freq, 3);
|
||||
FORMAT_PERF_OUT("compute", gprof[G_COMPUTE] * 1.0 / proc_freq, 2);
|
||||
FORMAT_PERF_OUT("seeding-extension", gprof[G_SEED_AND_EXT] * 1.0 / proc_freq, 3);
|
||||
FORMAT_PERF_OUT("gen-sam", gprof[G_GEN_SAM] * 1.0 / proc_freq, 3);
|
||||
FORMAT_PERF_OUT("write", gprof[G_WRITE] * 1.0 / proc_freq, 2);
|
||||
|
||||
fprintf(stderr, "\n[steps in chain]\n");
|
||||
find_opt(tprof[T_GEN_CHAIN], nthreads, &max, &min, &avg);
|
||||
fprintf(stderr, "time_gen_chain: %0.2lf s\n", avg);
|
||||
find_opt(tprof[T_FLT_CHAIN], nthreads, &max, &min, &avg);
|
||||
fprintf(stderr, "time_flt_chain: %0.2lf s\n", avg);
|
||||
find_opt(tprof[T_FLT_CHANNED_SEEDS], nthreads, &max, &min, &avg);
|
||||
fprintf(stderr, "time_flt_chained_seeds: %0.2lf s\n", avg);
|
||||
find_opt(tprof[T_SAL], nthreads, &max, &min, &avg);
|
||||
fprintf(stderr, "time_sal: %0.2lf s\n", avg);
|
||||
find_opt(tprof[T_BSW], nthreads, &max, &min, &avg);
|
||||
fprintf(stderr, "time_bsw: %0.2lf s\n", avg);
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
fprintf(stderr, "\n[steps in gen sam]\n");
|
||||
find_opt(tprof[T_SAM_MATESW], nthreads, &max, &min, &avg);
|
||||
fprintf(stderr, "time_mate_sw: %0.2lf s\n", avg);
|
||||
find_opt(tprof[T_KSW_ALIGN2], nthreads, &max, &min, &avg);
|
||||
fprintf(stderr, "time_ksw_align2: %0.2lf s\n", avg);
|
||||
find_opt(tprof[T_KSW_LOOP], nthreads, &max, &min, &avg);
|
||||
fprintf(stderr, "time_ksw_loop: %0.2lf s\n", avg);
|
||||
find_opt(tprof[T_KSW_REVERSE], nthreads, &max, &min, &avg);
|
||||
fprintf(stderr, "time_ksw_reverse: %0.2lf s\n", avg);
|
||||
find_opt(tprof[T_SAM_REG2ALN], nthreads, &max, &min, &avg);
|
||||
fprintf(stderr, "time_reg2aln: %0.2lf s\n", avg);
|
||||
// for seeding
|
||||
FORMAT_PERF_OUT_SUM_3("seed-chain", tprof[T_SEEDING], tprof[T_CHAIN], 0);
|
||||
FORMAT_PERF_OUT_3("seeding", tprof[T_SEEDING], 1);
|
||||
FORMAT_PERF_OUT_3("gen-seq", tprof[T_GEN_SEQ], 2);
|
||||
FORMAT_PERF_OUT_3("smem", tprof[T_SMEM], 2);
|
||||
FORMAT_PERF_OUT_3("seeding-1", tprof[T_SEED_1], 3);
|
||||
FORMAT_PERF_OUT_3("seeding-2", tprof[T_SEED_2], 3);
|
||||
FORMAT_PERF_OUT_3("seeding-3", tprof[T_SEED_3], 3);
|
||||
FORMAT_PERF_OUT_3("chain", tprof[T_CHAIN], 1);
|
||||
FORMAT_PERF_OUT_3("gen-chain", tprof[T_GEN_CHAIN], 2);
|
||||
FORMAT_PERF_OUT_3("sa", tprof[T_SA], 3);
|
||||
FORMAT_PERF_OUT_3("flt-chain", tprof[T_FLT_CHAIN],2);
|
||||
FORMAT_PERF_OUT_SUM_3("smem + sa", tprof[T_SMEM], tprof[T_SA], 1);
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
// for extension
|
||||
FORMAT_PERF_OUT_3("extension", tprof[T_EXTENSION], 0);
|
||||
FORMAT_PERF_OUT_3("mem_chain2aln", tprof[T_MEM_CHAIN2ALN], 1);
|
||||
FORMAT_PERF_OUT_3("bsw_ext", tprof[T_BSW_EXT], 2);
|
||||
FORMAT_PERF_OUT_SUB_3("other", tprof[T_MEM_CHAIN2ALN], tprof[T_BSW_EXT], 2);
|
||||
FORMAT_PERF_OUT_3("sort_dedup", tprof[T_SORT_DEDUP], 1);
|
||||
|
||||
// for gen-sam
|
||||
FORMAT_PERF_OUT("gen-sam", gprof[G_GEN_SAM] * 1.0 / proc_freq, 0);
|
||||
FORMAT_PERF_OUT_3("sam_mate_sw", tprof[T_SAM_MATESW], 1);
|
||||
FORMAT_PERF_OUT_3("sam_reg2aln", tprof[T_SAM_REG2ALN], 1);
|
||||
|
||||
#if 0
|
||||
|
||||
fprintf(stderr, "time_ksw_loop: %0.2lf s\n", gprof[G_KSW_LOOP] * 1.0 / proc_freq);
|
||||
fprintf(stderr, "time_ksw_end_loop: %0.2lf s\n", gprof[G_KSW_END_LOOP] * 1.0 / proc_freq);
|
||||
|
||||
#if SHOW_DATA_PERF
|
||||
fprintf(stderr, "seq num: %ld\n", gdat[0]);
|
||||
|
|
@ -184,6 +200,7 @@ int display_stats(int nthreads)
|
|||
// PRINT_SEED_TIME(3_3_2);
|
||||
#endif
|
||||
double all = 0;
|
||||
int i;
|
||||
for (i = 0; i < 50; ++i) {
|
||||
//all += sum(tdat[i], nthreads);
|
||||
// fprintf(stderr, "sum %d: %ld\n", i, sum(tdat[i], nthreads));
|
||||
|
|
@ -217,8 +234,11 @@ int display_stats(int nthreads)
|
|||
// fprintf(stderr, "%ld,\n", sum(tdat[i], nthreads));
|
||||
// }
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
#endif
|
||||
|
||||
return 1;
|
||||
#endif
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
36
profiling.h
36
profiling.h
|
|
@ -60,34 +60,42 @@ enum {
|
|||
// GLOBAL
|
||||
enum {
|
||||
G_ALL = 0,
|
||||
G_LOAD_IDX,
|
||||
G_PIPELINE,
|
||||
G_READ,
|
||||
G_WRITE,
|
||||
G_COMPUTE,
|
||||
G_PREPARE,
|
||||
G_LOAD_IDX,
|
||||
G_MEM_PREPARE,
|
||||
G_MEM_KERNEL,
|
||||
G_WRITE,
|
||||
G_SEED_AND_EXT,
|
||||
G_MEM_PESTAT,
|
||||
G_MEM_SAM,
|
||||
G_KSW_LOOP,
|
||||
G_KSW_END_LOOP,
|
||||
G_read_seq
|
||||
G_GEN_SAM,
|
||||
G_UNCOMPRESS
|
||||
};
|
||||
|
||||
// THREAD
|
||||
enum {
|
||||
T_SEED_ALL = 0,
|
||||
T_CHAIN_ALL,
|
||||
T_ALN_ALL,
|
||||
T_INS_SIZE,
|
||||
T_SEEDING = 0,
|
||||
T_GEN_SEQ,
|
||||
T_SMEM,
|
||||
T_SEED_1,
|
||||
T_SEED_2,
|
||||
T_SEED_3,
|
||||
T_SAL,
|
||||
T_CHAIN,
|
||||
T_GEN_CHAIN,
|
||||
T_SA,
|
||||
T_FLT_CHAIN,
|
||||
T_FLT_CHANNED_SEEDS,
|
||||
T_EXTENSION,
|
||||
T_MEM_CHAIN2ALN,
|
||||
T_BSW_EXT,
|
||||
T_SORT_DEDUP,
|
||||
T_GEN_SAM,
|
||||
T_MEM_REG2ALN,
|
||||
|
||||
|
||||
T_CHAIN_ALL,
|
||||
T_ALN_ALL,
|
||||
T_INS_SIZE,
|
||||
T_SAL,
|
||||
T_READ_SA,
|
||||
T_BSW,
|
||||
T_BSW_ALL,
|
||||
|
|
|
|||
4
utils.c
4
utils.c
|
|
@ -161,7 +161,7 @@ size_t err_fread_noeof(void *ptr, size_t size, size_t nmemb, FILE *stream)
|
|||
int err_gzread(gzFile file, void *ptr, unsigned int len)
|
||||
{
|
||||
int ret = 0;
|
||||
PROF_START(read);
|
||||
PROF_START(uncompress);
|
||||
#ifdef USE_ASYNC_READ
|
||||
khiter_t k = kh_get(fkv, fHash, (int64_t)file);
|
||||
FileKV* val = &kh_value(fHash, k);
|
||||
|
|
@ -189,7 +189,7 @@ int err_gzread(gzFile file, void *ptr, unsigned int len)
|
|||
#else
|
||||
ret = gzread(file, ptr, len);
|
||||
#endif
|
||||
PROF_END(gprof[G_read_seq], read);
|
||||
PROF_END(gprof[G_UNCOMPRESS], uncompress);
|
||||
|
||||
if (ret < 0)
|
||||
{
|
||||
|
|
|
|||
Loading…
Reference in New Issue