更新了性能测试

This commit is contained in:
zzh 2025-12-04 17:04:11 +08:00
parent 70979c1b60
commit 49028547ac
6 changed files with 160 additions and 104 deletions

1
.gitignore vendored
View File

@ -3,6 +3,7 @@ bwa
test
test64
.*.swp
*.log
Makefile.bak
bwamem-lite
test_index/

View File

@ -184,7 +184,8 @@ static void mem_collect_intv(const mem_opt_t* opt, const bwt_t* bwt, int len, co
int split_len = (int)(opt->min_seed_len * opt->split_factor + .499);
smem->mem.n = 0;
// first pass: find all SMEMs
while (x < len) {
PROF_START(seed_1);
while (x < len) {
if (seq[x] < 4) {
x = bwt_smem1(bwt, len, seq, x, start_width, &a->mem1, a->tmpv);
for (i = 0; i < a->mem1.n; ++i) {
@ -195,7 +196,10 @@ static void mem_collect_intv(const mem_opt_t* opt, const bwt_t* bwt, int len, co
}
} else ++x;
}
PROF_END(tprof[T_SEED_1][tid], seed_1);
// second pass: find MEMs inside a long SMEM
PROF_START(seed_2);
old_n = smem->mem.n;
for (k = 0; k < old_n; ++k) {
bwtintv_t *p = &smem->mem.a[k];
@ -206,8 +210,11 @@ static void mem_collect_intv(const mem_opt_t* opt, const bwt_t* bwt, int len, co
if ((uint32_t)a->mem1.a[i].info - (a->mem1.a[i].info>>32) >= opt->min_seed_len)
kv_push(bwtintv_t, smem->mem, a->mem1.a[i]);
}
PROF_END(tprof[T_SEED_2][tid], seed_2);
// third pass: LAST-like
if (opt->max_mem_intv > 0) {
PROF_START(seed_3);
if (opt->max_mem_intv > 0) {
x = 0;
while (x < len) {
if (seq[x] < 4) {
@ -223,6 +230,7 @@ static void mem_collect_intv(const mem_opt_t* opt, const bwt_t* bwt, int len, co
} else ++x;
}
}
PROF_END(tprof[T_SEED_3][tid], seed_3);
// sort
ks_introsort(mem_intv, smem->mem.n, smem->mem.a);
}
@ -246,9 +254,9 @@ static void hyb_seeding(const mem_opt_t* opt, const HybridIndex* hyb, ReadSeq* r
// fprintf(stderr, "seq-id: %ld\n", seq_id);
if (seq_id == 4) {
fprintf(stderr, "seq-id: %ld\n", seq_id);
}
//if (seq_id == 4) {
// fprintf(stderr, "seq-id: %ld\n", seq_id);
//}
// 1. seeding-1: find all SMEMs
PROF_START(seed_1);
@ -260,8 +268,7 @@ static void hyb_seeding(const mem_opt_t* opt, const HybridIndex* hyb, ReadSeq* r
hyb_first_seeding(hyb, read_seq, &range, opt->min_seed_len, seeds, tid);
seeds_ranges->a[i].end = seeds->n;
}
tprof[T_SEED_LEN][tid] += seeds->n;
//tprof[T_SEED_LEN][tid] += seeds->n;
PROF_END(tprof[T_SEED_1][tid], seed_1);
#if 1
// 2. seeding-2: find MEMs inside a long SMEM
@ -422,7 +429,9 @@ void generate_chain(const mem_opt_t* opt, const bwt_t* bwt, const bntseq_t* bns,
mem_chain_t tmp, *lower, *upper;
mem_seed_t s;
int rid, to_add = 0;
s.rbeg = tmp.pos = bwt_sa(bwt, p->x[0] + k); // this is the base coordinate in the forward-reverse reference
PROF_START(sa);
s.rbeg = tmp.pos = bwt_sa(bwt, p->x[0] + k); // this is the base coordinate in the forward-reverse reference
PROF_END(tprof[T_SA][tid], sa);
s.qbeg = p->info>>32;
s.score= s.len = slen;
rid = bns_intv2rid(bns, s.rbeg, s.rbeg + s.len);
@ -932,12 +941,14 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
printf("*** Left ref: "); for (j = 0; j < tmp; ++j) putchar("ACGTN"[(int)rseq[tmp - 1 - j]]); putchar('\n');
printf("*** Left query: "); for (j = 0; j < s->qbeg; ++j) putchar("ACGTN"[(int)query[s->qbeg - 1 - j]]); putchar('\n');
}
PROF_START(bsw_ext);
#ifndef USE_AVX2_EXT
a->score = ksw_extend2(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[0], opt->pen_clip5, opt->zdrop, s->len * opt->a, &qle, &tle, &gtle, &gscore, &max_off[0]);
#else
a->score = ksw_extend2_avx2(s->qbeg, query, tmp, rseq, 1, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->a, opt->b,
aw[0], opt->pen_clip5, opt->zdrop, s->len * opt->a, &qle, &tle, &gtle, &gscore, &max_off[0], aux->sw_buf);
#endif
PROF_END(tprof[T_BSW_EXT][tid], bsw_ext);
if (bwa_verbose >= 4) { printf("*** Left extension: prev_score=%d; score=%d; bandwidth=%d; max_off_diagonal_dist=%d\n", prev, a->score, aw[0], max_off[0]); fflush(stdout); }
if (a->score == prev || max_off[0] < (aw[0]>>1) + (aw[0]>>2)) break;
}
@ -967,12 +978,14 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
printf("*** Right ref: "); for (j = 0; j < rmax[1] - rmax[0] - re; ++j) putchar("ACGTN"[(int)rseq[re+j]]); putchar('\n');
printf("*** Right query: "); for (j = 0; j < l_query - qe; ++j) putchar("ACGTN"[(int)query[qe+j]]); putchar('\n');
}
PROF_START(bsw_ext);
#ifndef USE_AVX2_EXT
a->score = ksw_extend2(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[1], opt->pen_clip3, opt->zdrop, sc0, &qle, &tle, &gtle, &gscore, &max_off[1]);
#else
a->score = ksw_extend2_avx2(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 0, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->a, opt->b, aw[1], opt->pen_clip3, opt->zdrop, sc0, &qle, &tle, &gtle, &gscore, &max_off[1], aux->sw_buf);
#endif
if (bwa_verbose >= 4) { printf("*** Right extension: prev_score=%d; score=%d; bandwidth=%d; max_off_diagonal_dist=%d\n", prev, a->score, aw[1], max_off[1]); fflush(stdout); }
PROF_END(tprof[T_BSW_EXT][tid], bsw_ext);
if (bwa_verbose >= 4) { printf("*** Right extension: prev_score=%d; score=%d; bandwidth=%d; max_off_diagonal_dist=%d\n", prev, a->score, aw[1], max_off[1]); fflush(stdout); }
if (a->score == prev || max_off[1] < (aw[1]>>1) + (aw[1]>>2)) break;
}
// similar to the above
@ -1372,19 +1385,23 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex*
if (opt->use_bwt) {
smem_v *smem_arr = (smem_v*)seed_arr;
// 1. seeding
PROF_START(seed_all);
PROF_START(seeding);
for (i = 0; i < nseq; ++i) {
PROF_START(gen_seq);
seq = seq_arr[i].seq;
l_seq = seq_arr[i].l_seq;
for (j = 0; j < l_seq; ++j) {
seq[j] = seq[j] < 4 ? seq[j] : nst_nt4_table[(int)seq[j]];
}
PROF_END(tprof[T_GEN_SEQ][tid], gen_seq);
PROF_START(smem);
find_smem(opt, bwt, l_seq, (uint8_t*)seq, aux, &smem_arr[i], tid);
PROF_END(tprof[T_SMEM][tid], smem);
}
PROF_END(tprof[T_SEED_ALL][tid], seed_all);
PROF_END(tprof[T_SEEDING][tid], seeding);
// 2. chain
PROF_START(chain_all);
PROF_START(chain);
for (i = 0; i < nseq; ++i) {
seq = seq_arr[i].seq;
l_seq = seq_arr[i].l_seq;
@ -1400,15 +1417,16 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex*
PROF_END(tprof[T_FLT_CHANNED_SEEDS][tid], flt_chained_seeds);
if (bwa_verbose >= 4) mem_print_chain(bns, chnp);
}
PROF_END(tprof[T_CHAIN_ALL][tid], chain_all);
PROF_END(tprof[T_CHAIN][tid], chain);
} else {
HybSeedArr* seeds = (HybSeedArr*)seed_arr;
// 1. seeding
PROF_START(seed_all);
PROF_START(seeding);
RangeArr read_ranges = {0};
RangeArr seeds_ranges = {0};
Range init_range = {0};
for (i = 0; i < nseq; ++i) {
PROF_START(gen_seq);
uint8_t* reverse_seq = aux->reverse_seq->a;
uint8_t* for_bits = aux->for_bits->a;
uint8_t* back_bits = aux->back_bits->a;
@ -1441,14 +1459,17 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex*
create_seq_fb_bits((uint8_t*)seq, l_seq, for_bits, back_bits);
ReadSeq read_seq = {l_seq, (uint8_t*)seq, reverse_seq, for_bits, back_bits, aux->seq_id};
++aux->seq_id;
PROF_END(tprof[T_GEN_SEQ][tid], gen_seq);
PROF_START(smem);
hyb_seeding(opt, hyb, &read_seq, &read_ranges, &seeds_ranges, &seeds[i], aux->seq_id, tid);
PROF_END(tprof[T_SMEM][tid], smem);
}
kv_destroy(read_ranges);
kv_destroy(seeds_ranges);
PROF_END(tprof[T_SEED_ALL][tid], seed_all);
PROF_END(tprof[T_SEEDING][tid], seeding);
// 2. chain
PROF_START(chain_all);
PROF_START(chain);
for (i = 0; i < nseq; ++i) {
seq = seq_arr[i].seq;
l_seq = seq_arr[i].l_seq;
@ -1465,11 +1486,11 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex*
if (bwa_verbose >= 4)
mem_print_chain(bns, chnp);
}
PROF_END(tprof[T_CHAIN_ALL][tid], chain_all);
PROF_END(tprof[T_CHAIN][tid], chain);
}
// 3. align
PROF_START(aln_all);
PROF_START(extension);
for (i = 0; i < nseq; ++i) {
seq = seq_arr[i].seq;
l_seq = seq_arr[i].l_seq;
@ -1481,14 +1502,18 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex*
mem_chain_t* p = &chnp->a[j];
if (bwa_verbose >= 4)
err_printf("* ---> Processing chain(%d) <---\n", j);
PROF_START(mem_chain2aln);
mem_chain2aln(opt, bns, pac, l_seq, (uint8_t*)seq, p, regp, aux, tid);
PROF_END(tprof[T_MEM_CHAIN2ALN][tid], mem_chain2aln);
free(chnp->a[j].seeds);
}
free(chnp->a);
chnp->m = 0;
chnp->a = 0;
PROF_START(sort_dedup);
regp->n = mem_sort_dedup_patch(opt, bns, pac, (uint8_t*)seq, regp->n, regp->a);
PROF_END(tprof[T_SORT_DEDUP][tid], sort_dedup);
if (bwa_verbose >= 4) {
err_printf("* %ld chains remain after removing duplicated chains\n", regp->n);
for (j = 0; j < regp->n; ++j) {
@ -1502,7 +1527,7 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex*
p->is_alt = 1;
}
}
PROF_END(tprof[T_ALN_ALL][tid], aln_all);
PROF_END(tprof[T_EXTENSION][tid], extension);
// 4. calc insert size
#define MIN_RATIO 0.8
@ -1530,7 +1555,7 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex*
}
}
static void worker_smem_align(void *data, long i, int tid)
static void worker_smem_extension(void *data, long i, int tid)
{
mem_worker_t *w = (mem_worker_t*)data;
int start = i * w->opt->batch_size;
@ -1584,21 +1609,21 @@ void mem_process_seqs(const mem_opt_t* opt, mem_worker_t* w, int64_t n_processed
for (j = 0; j < 4; ++j) w->isize_arr[i][j].n = 0;
}
PROF_START(kernel);
kt_for(opt->n_threads, worker_smem_align, w, n_batch); // find mapping positions
PROF_END(gprof[G_MEM_KERNEL], kernel);
PROF_START(seed_and_ext);
kt_for(opt->n_threads, worker_smem_extension, w, n_batch); // find mapping positions
PROF_END(gprof[G_SEED_AND_EXT], seed_and_ext);
PROF_START(pestat);
PROF_START(pestat);
if (opt->flag&MEM_F_PE) { // infer insert sizes if not provided
if (pes0) memcpy(pes, pes0, 4 * sizeof(mem_pestat_t)); // if pes0 != NULL, set the insert-size distribution as pes0
else mem_pestat(opt, w->bns->l_pac, n, w->isize_arr, pes); // otherwise, infer the insert size distribution from data
}
PROF_END(gprof[G_MEM_PESTAT], pestat);
PROF_START(mem_sam);
PROF_START(gen_sam);
kt_for(opt->n_threads, worker_sam, w, (opt->flag & MEM_F_PE) ? n >> 1 : n); // generate alignment
PROF_END(gprof[G_MEM_SAM], mem_sam);
PROF_END(gprof[G_GEN_SAM], gen_sam);
if (bwa_verbose >= 3)
if (bwa_verbose >= 3)
fprintf(stderr, "[M::%s] Processed %d reads in %.3f CPU sec, %.3f real sec\n", __func__, n, cputime() - ctime, realtime() - rtime);
}

View File

@ -306,7 +306,7 @@ static void update_a(mem_opt_t *opt, const mem_opt_t *opt0)
int main_mem(int argc, char *argv[])
{
mem_opt_t *opt, opt0;
mem_opt_t *opt, opt0;
int fd, fd2, i, c, ignore_alt = 0, no_mt_io = 0;
int fixed_chunk_size = -1;
gzFile fp, fp2 = 0;
@ -648,6 +648,8 @@ int main_mem(int argc, char *argv[])
close_files();
#endif
PROF_END(gprof[G_ALL], all);
return 0;
}

View File

@ -68,70 +68,86 @@ uint64_t get_sum(uint64_t *a, int len) {
int display_stats(int nthreads)
{
#ifdef SHOW_PERF
double avg, max, min;
int i;
fprintf(stderr, "[steps in main_mem]\n");
fprintf(stderr, "time_parse_arg: %0.2lf s\n", gprof[G_PREPARE] * 1.0 / proc_freq);
fprintf(stderr, "time_load_idx: %0.2lf s\n", gprof[G_LOAD_IDX] * 1.0 / proc_freq);
fprintf(stderr, "time_pipeline: %0.2lf s\n", gprof[G_PIPELINE] * 1.0 / proc_freq);
fprintf(stderr, "time_all: %0.2lf s\n", gprof[G_ALL] * 1.0 / proc_freq);
fprintf(stderr, "\n[steps in pipeline]\n");
fprintf(stderr, "time_read: %0.2lf s\n", gprof[G_READ] * 1.0 / proc_freq);
fprintf(stderr, "time_compute: %0.2lf s\n", gprof[G_COMPUTE] * 1.0 / proc_freq);
fprintf(stderr, "time_write: %0.2lf s\n", gprof[G_WRITE] * 1.0 / proc_freq);
#define FORMAT_PERF_OUT(name, sec, layer) \
do { \
int i = 0; \
for (i = 0; i < layer; i++) fprintf(stderr, " "); \
fprintf(stderr, "%s: %0.2lf s\n", name, sec); \
} while (0)
fprintf(stderr, "\n[steps in mem_process_seqs]\n");
fprintf(stderr, "time_mem_prepare: %0.2lf s\n", gprof[G_MEM_PREPARE] * 1.0 / proc_freq);
fprintf(stderr, "time_mem_kernel: %0.2lf s\n", gprof[G_MEM_KERNEL] * 1.0 / proc_freq);
fprintf(stderr, "time_mem_pestat: %0.2lf s\n", gprof[G_MEM_PESTAT] * 1.0 / proc_freq);
fprintf(stderr, "time_mem_sam: %0.2lf s\n", gprof[G_MEM_SAM] * 1.0 / proc_freq);
#define FORMAT_PERF_OUT_3(name, sec_arr, layer) \
do { \
int i = 0; \
double avg, max, min; \
for (i = 0; i < layer; i++) fprintf(stderr, " "); \
find_opt(sec_arr, nthreads, &max, &min, &avg); \
fprintf(stderr, "%s: %0.2lf (%0.2lf, %0.2lf) s\n", name, avg, max, min); \
} while (0)
fprintf(stderr, "\n[steps in kernel]\n");
find_opt(tprof[T_SEED_ALL], nthreads, &max, &min, &avg);
fprintf(stderr, "time_seed_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min);
find_opt(tprof[T_CHAIN_ALL], nthreads, &max, &min, &avg);
fprintf(stderr, "time_chain_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min);
find_opt(tprof[T_ALN_ALL], nthreads, &max, &min, &avg);
fprintf(stderr, "time_aln_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min);
find_opt(tprof[T_INS_SIZE], nthreads, &max, &min, &avg);
fprintf(stderr, "time_ins_size_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min);
#define FORMAT_PERF_OUT_SUM_3(name, sec_arr, sec_arr1, layer) \
do { \
int i = 0; \
double avg, max, min, avg1, max1, min1; \
for (i = 0; i < layer; i++) fprintf(stderr, " "); \
find_opt(sec_arr, nthreads, &max, &min, &avg); \
find_opt(sec_arr1, nthreads, &max1, &min1, &avg1); \
fprintf(stderr, "%s: %0.2lf (%0.2lf, %0.2lf) s\n", name, avg + avg1, max + max1, min + min1); \
} while (0)
fprintf(stderr, "\n[steps in seeding]\n");
find_opt(tprof[T_SEED_1], nthreads, &max, &min, &avg);
fprintf(stderr, "time_seed_1: %0.2lf s %0.2lf s %0.2lf s\n", max, min, avg);
find_opt(tprof[T_SEED_2], nthreads, &max, &min, &avg);
fprintf(stderr, "time_seed_2: %0.2lf s\n", avg);
find_opt(tprof[T_SEED_3], nthreads, &max, &min, &avg);
fprintf(stderr, "time_seed_3: %0.2lf s\n", avg);
fprintf(stderr, "\n[steps in chain]\n");
find_opt(tprof[T_GEN_CHAIN], nthreads, &max, &min, &avg);
fprintf(stderr, "time_gen_chain: %0.2lf s\n", avg);
find_opt(tprof[T_FLT_CHAIN], nthreads, &max, &min, &avg);
fprintf(stderr, "time_flt_chain: %0.2lf s\n", avg);
find_opt(tprof[T_FLT_CHANNED_SEEDS], nthreads, &max, &min, &avg);
fprintf(stderr, "time_flt_chained_seeds: %0.2lf s\n", avg);
find_opt(tprof[T_SAL], nthreads, &max, &min, &avg);
fprintf(stderr, "time_sal: %0.2lf s\n", avg);
find_opt(tprof[T_BSW], nthreads, &max, &min, &avg);
fprintf(stderr, "time_bsw: %0.2lf s\n", avg);
#define FORMAT_PERF_OUT_SUB_3(name, sec_arr, sec_arr1, layer) \
do { \
int i = 0; \
double avg, max, min, avg1, max1, min1; \
for (i = 0; i < layer; i++) fprintf(stderr, " "); \
find_opt(sec_arr, nthreads, &max, &min, &avg); \
find_opt(sec_arr1, nthreads, &max1, &min1, &avg1); \
fprintf(stderr, "%s: %0.2lf (%0.2lf, %0.2lf) s\n", name, avg - avg1, max - max1, min - min1); \
} while (0)
fprintf(stderr, "\n[steps in gen sam]\n");
find_opt(tprof[T_SAM_MATESW], nthreads, &max, &min, &avg);
fprintf(stderr, "time_mate_sw: %0.2lf s\n", avg);
find_opt(tprof[T_KSW_ALIGN2], nthreads, &max, &min, &avg);
fprintf(stderr, "time_ksw_align2: %0.2lf s\n", avg);
find_opt(tprof[T_KSW_LOOP], nthreads, &max, &min, &avg);
fprintf(stderr, "time_ksw_loop: %0.2lf s\n", avg);
find_opt(tprof[T_KSW_REVERSE], nthreads, &max, &min, &avg);
fprintf(stderr, "time_ksw_reverse: %0.2lf s\n", avg);
find_opt(tprof[T_SAM_REG2ALN], nthreads, &max, &min, &avg);
fprintf(stderr, "time_reg2aln: %0.2lf s\n", avg);
// for overall pipeline
FORMAT_PERF_OUT("all", gprof[G_ALL] * 1.0 / proc_freq, 0);
FORMAT_PERF_OUT("load_idx", gprof[G_LOAD_IDX] * 1.0 / proc_freq, 1);
FORMAT_PERF_OUT("pipeline", gprof[G_PIPELINE] * 1.0 / proc_freq, 1);
FORMAT_PERF_OUT("read", gprof[G_READ] * 1.0 / proc_freq, 2);
FORMAT_PERF_OUT("uncompress", gprof[G_UNCOMPRESS] * 1.0 / proc_freq, 3);
FORMAT_PERF_OUT("compute", gprof[G_COMPUTE] * 1.0 / proc_freq, 2);
FORMAT_PERF_OUT("seeding-extension", gprof[G_SEED_AND_EXT] * 1.0 / proc_freq, 3);
FORMAT_PERF_OUT("gen-sam", gprof[G_GEN_SAM] * 1.0 / proc_freq, 3);
FORMAT_PERF_OUT("write", gprof[G_WRITE] * 1.0 / proc_freq, 2);
fprintf(stderr, "\n");
// for seeding
FORMAT_PERF_OUT_SUM_3("seed-chain", tprof[T_SEEDING], tprof[T_CHAIN], 0);
FORMAT_PERF_OUT_3("seeding", tprof[T_SEEDING], 1);
FORMAT_PERF_OUT_3("gen-seq", tprof[T_GEN_SEQ], 2);
FORMAT_PERF_OUT_3("smem", tprof[T_SMEM], 2);
FORMAT_PERF_OUT_3("seeding-1", tprof[T_SEED_1], 3);
FORMAT_PERF_OUT_3("seeding-2", tprof[T_SEED_2], 3);
FORMAT_PERF_OUT_3("seeding-3", tprof[T_SEED_3], 3);
FORMAT_PERF_OUT_3("chain", tprof[T_CHAIN], 1);
FORMAT_PERF_OUT_3("gen-chain", tprof[T_GEN_CHAIN], 2);
FORMAT_PERF_OUT_3("sa", tprof[T_SA], 3);
FORMAT_PERF_OUT_3("flt-chain", tprof[T_FLT_CHAIN],2);
FORMAT_PERF_OUT_SUM_3("smem + sa", tprof[T_SMEM], tprof[T_SA], 1);
fprintf(stderr, "\n");
// for extension
FORMAT_PERF_OUT_3("extension", tprof[T_EXTENSION], 0);
FORMAT_PERF_OUT_3("mem_chain2aln", tprof[T_MEM_CHAIN2ALN], 1);
FORMAT_PERF_OUT_3("bsw_ext", tprof[T_BSW_EXT], 2);
FORMAT_PERF_OUT_SUB_3("other", tprof[T_MEM_CHAIN2ALN], tprof[T_BSW_EXT], 2);
FORMAT_PERF_OUT_3("sort_dedup", tprof[T_SORT_DEDUP], 1);
// for gen-sam
FORMAT_PERF_OUT("gen-sam", gprof[G_GEN_SAM] * 1.0 / proc_freq, 0);
FORMAT_PERF_OUT_3("sam_mate_sw", tprof[T_SAM_MATESW], 1);
FORMAT_PERF_OUT_3("sam_reg2aln", tprof[T_SAM_REG2ALN], 1);
#if 0
fprintf(stderr, "time_ksw_loop: %0.2lf s\n", gprof[G_KSW_LOOP] * 1.0 / proc_freq);
fprintf(stderr, "time_ksw_end_loop: %0.2lf s\n", gprof[G_KSW_END_LOOP] * 1.0 / proc_freq);
#if SHOW_DATA_PERF
fprintf(stderr, "seq num: %ld\n", gdat[0]);
@ -184,6 +200,7 @@ int display_stats(int nthreads)
// PRINT_SEED_TIME(3_3_2);
#endif
double all = 0;
int i;
for (i = 0; i < 50; ++i) {
//all += sum(tdat[i], nthreads);
// fprintf(stderr, "sum %d: %ld\n", i, sum(tdat[i], nthreads));
@ -217,8 +234,11 @@ int display_stats(int nthreads)
// fprintf(stderr, "%ld,\n", sum(tdat[i], nthreads));
// }
fprintf(stderr, "\n");
#endif
return 1;
#endif
fprintf(stderr, "\n");
return 0;
}

View File

@ -60,34 +60,42 @@ enum {
// GLOBAL
enum {
G_ALL = 0,
G_LOAD_IDX,
G_PIPELINE,
G_READ,
G_WRITE,
G_COMPUTE,
G_PREPARE,
G_LOAD_IDX,
G_MEM_PREPARE,
G_MEM_KERNEL,
G_WRITE,
G_SEED_AND_EXT,
G_MEM_PESTAT,
G_MEM_SAM,
G_KSW_LOOP,
G_KSW_END_LOOP,
G_read_seq
G_GEN_SAM,
G_UNCOMPRESS
};
// THREAD
enum {
T_SEED_ALL = 0,
T_CHAIN_ALL,
T_ALN_ALL,
T_INS_SIZE,
T_SEEDING = 0,
T_GEN_SEQ,
T_SMEM,
T_SEED_1,
T_SEED_2,
T_SEED_3,
T_SAL,
T_CHAIN,
T_GEN_CHAIN,
T_SA,
T_FLT_CHAIN,
T_FLT_CHANNED_SEEDS,
T_EXTENSION,
T_MEM_CHAIN2ALN,
T_BSW_EXT,
T_SORT_DEDUP,
T_GEN_SAM,
T_MEM_REG2ALN,
T_CHAIN_ALL,
T_ALN_ALL,
T_INS_SIZE,
T_SAL,
T_READ_SA,
T_BSW,
T_BSW_ALL,

View File

@ -161,7 +161,7 @@ size_t err_fread_noeof(void *ptr, size_t size, size_t nmemb, FILE *stream)
int err_gzread(gzFile file, void *ptr, unsigned int len)
{
int ret = 0;
PROF_START(read);
PROF_START(uncompress);
#ifdef USE_ASYNC_READ
khiter_t k = kh_get(fkv, fHash, (int64_t)file);
FileKV* val = &kh_value(fHash, k);
@ -189,7 +189,7 @@ int err_gzread(gzFile file, void *ptr, unsigned int len)
#else
ret = gzread(file, ptr, len);
#endif
PROF_END(gprof[G_read_seq], read);
PROF_END(gprof[G_UNCOMPRESS], uncompress);
if (ret < 0)
{