更新了性能测试
This commit is contained in:
parent
70979c1b60
commit
49028547ac
|
|
@ -3,6 +3,7 @@ bwa
|
||||||
test
|
test
|
||||||
test64
|
test64
|
||||||
.*.swp
|
.*.swp
|
||||||
|
*.log
|
||||||
Makefile.bak
|
Makefile.bak
|
||||||
bwamem-lite
|
bwamem-lite
|
||||||
test_index/
|
test_index/
|
||||||
|
|
|
||||||
79
bwamem.c
79
bwamem.c
|
|
@ -184,7 +184,8 @@ static void mem_collect_intv(const mem_opt_t* opt, const bwt_t* bwt, int len, co
|
||||||
int split_len = (int)(opt->min_seed_len * opt->split_factor + .499);
|
int split_len = (int)(opt->min_seed_len * opt->split_factor + .499);
|
||||||
smem->mem.n = 0;
|
smem->mem.n = 0;
|
||||||
// first pass: find all SMEMs
|
// first pass: find all SMEMs
|
||||||
while (x < len) {
|
PROF_START(seed_1);
|
||||||
|
while (x < len) {
|
||||||
if (seq[x] < 4) {
|
if (seq[x] < 4) {
|
||||||
x = bwt_smem1(bwt, len, seq, x, start_width, &a->mem1, a->tmpv);
|
x = bwt_smem1(bwt, len, seq, x, start_width, &a->mem1, a->tmpv);
|
||||||
for (i = 0; i < a->mem1.n; ++i) {
|
for (i = 0; i < a->mem1.n; ++i) {
|
||||||
|
|
@ -195,7 +196,10 @@ static void mem_collect_intv(const mem_opt_t* opt, const bwt_t* bwt, int len, co
|
||||||
}
|
}
|
||||||
} else ++x;
|
} else ++x;
|
||||||
}
|
}
|
||||||
|
PROF_END(tprof[T_SEED_1][tid], seed_1);
|
||||||
|
|
||||||
// second pass: find MEMs inside a long SMEM
|
// second pass: find MEMs inside a long SMEM
|
||||||
|
PROF_START(seed_2);
|
||||||
old_n = smem->mem.n;
|
old_n = smem->mem.n;
|
||||||
for (k = 0; k < old_n; ++k) {
|
for (k = 0; k < old_n; ++k) {
|
||||||
bwtintv_t *p = &smem->mem.a[k];
|
bwtintv_t *p = &smem->mem.a[k];
|
||||||
|
|
@ -206,8 +210,11 @@ static void mem_collect_intv(const mem_opt_t* opt, const bwt_t* bwt, int len, co
|
||||||
if ((uint32_t)a->mem1.a[i].info - (a->mem1.a[i].info>>32) >= opt->min_seed_len)
|
if ((uint32_t)a->mem1.a[i].info - (a->mem1.a[i].info>>32) >= opt->min_seed_len)
|
||||||
kv_push(bwtintv_t, smem->mem, a->mem1.a[i]);
|
kv_push(bwtintv_t, smem->mem, a->mem1.a[i]);
|
||||||
}
|
}
|
||||||
|
PROF_END(tprof[T_SEED_2][tid], seed_2);
|
||||||
|
|
||||||
// third pass: LAST-like
|
// third pass: LAST-like
|
||||||
if (opt->max_mem_intv > 0) {
|
PROF_START(seed_3);
|
||||||
|
if (opt->max_mem_intv > 0) {
|
||||||
x = 0;
|
x = 0;
|
||||||
while (x < len) {
|
while (x < len) {
|
||||||
if (seq[x] < 4) {
|
if (seq[x] < 4) {
|
||||||
|
|
@ -223,6 +230,7 @@ static void mem_collect_intv(const mem_opt_t* opt, const bwt_t* bwt, int len, co
|
||||||
} else ++x;
|
} else ++x;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
PROF_END(tprof[T_SEED_3][tid], seed_3);
|
||||||
// sort
|
// sort
|
||||||
ks_introsort(mem_intv, smem->mem.n, smem->mem.a);
|
ks_introsort(mem_intv, smem->mem.n, smem->mem.a);
|
||||||
}
|
}
|
||||||
|
|
@ -246,9 +254,9 @@ static void hyb_seeding(const mem_opt_t* opt, const HybridIndex* hyb, ReadSeq* r
|
||||||
|
|
||||||
// fprintf(stderr, "seq-id: %ld\n", seq_id);
|
// fprintf(stderr, "seq-id: %ld\n", seq_id);
|
||||||
|
|
||||||
if (seq_id == 4) {
|
//if (seq_id == 4) {
|
||||||
fprintf(stderr, "seq-id: %ld\n", seq_id);
|
// fprintf(stderr, "seq-id: %ld\n", seq_id);
|
||||||
}
|
//}
|
||||||
|
|
||||||
// 1. seeding-1: find all SMEMs
|
// 1. seeding-1: find all SMEMs
|
||||||
PROF_START(seed_1);
|
PROF_START(seed_1);
|
||||||
|
|
@ -260,8 +268,7 @@ static void hyb_seeding(const mem_opt_t* opt, const HybridIndex* hyb, ReadSeq* r
|
||||||
hyb_first_seeding(hyb, read_seq, &range, opt->min_seed_len, seeds, tid);
|
hyb_first_seeding(hyb, read_seq, &range, opt->min_seed_len, seeds, tid);
|
||||||
seeds_ranges->a[i].end = seeds->n;
|
seeds_ranges->a[i].end = seeds->n;
|
||||||
}
|
}
|
||||||
|
//tprof[T_SEED_LEN][tid] += seeds->n;
|
||||||
tprof[T_SEED_LEN][tid] += seeds->n;
|
|
||||||
PROF_END(tprof[T_SEED_1][tid], seed_1);
|
PROF_END(tprof[T_SEED_1][tid], seed_1);
|
||||||
#if 1
|
#if 1
|
||||||
// 2. seeding-2: find MEMs inside a long SMEM
|
// 2. seeding-2: find MEMs inside a long SMEM
|
||||||
|
|
@ -422,7 +429,9 @@ void generate_chain(const mem_opt_t* opt, const bwt_t* bwt, const bntseq_t* bns,
|
||||||
mem_chain_t tmp, *lower, *upper;
|
mem_chain_t tmp, *lower, *upper;
|
||||||
mem_seed_t s;
|
mem_seed_t s;
|
||||||
int rid, to_add = 0;
|
int rid, to_add = 0;
|
||||||
s.rbeg = tmp.pos = bwt_sa(bwt, p->x[0] + k); // this is the base coordinate in the forward-reverse reference
|
PROF_START(sa);
|
||||||
|
s.rbeg = tmp.pos = bwt_sa(bwt, p->x[0] + k); // this is the base coordinate in the forward-reverse reference
|
||||||
|
PROF_END(tprof[T_SA][tid], sa);
|
||||||
s.qbeg = p->info>>32;
|
s.qbeg = p->info>>32;
|
||||||
s.score= s.len = slen;
|
s.score= s.len = slen;
|
||||||
rid = bns_intv2rid(bns, s.rbeg, s.rbeg + s.len);
|
rid = bns_intv2rid(bns, s.rbeg, s.rbeg + s.len);
|
||||||
|
|
@ -932,12 +941,14 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
|
||||||
printf("*** Left ref: "); for (j = 0; j < tmp; ++j) putchar("ACGTN"[(int)rseq[tmp - 1 - j]]); putchar('\n');
|
printf("*** Left ref: "); for (j = 0; j < tmp; ++j) putchar("ACGTN"[(int)rseq[tmp - 1 - j]]); putchar('\n');
|
||||||
printf("*** Left query: "); for (j = 0; j < s->qbeg; ++j) putchar("ACGTN"[(int)query[s->qbeg - 1 - j]]); putchar('\n');
|
printf("*** Left query: "); for (j = 0; j < s->qbeg; ++j) putchar("ACGTN"[(int)query[s->qbeg - 1 - j]]); putchar('\n');
|
||||||
}
|
}
|
||||||
|
PROF_START(bsw_ext);
|
||||||
#ifndef USE_AVX2_EXT
|
#ifndef USE_AVX2_EXT
|
||||||
a->score = ksw_extend2(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[0], opt->pen_clip5, opt->zdrop, s->len * opt->a, &qle, &tle, >le, &gscore, &max_off[0]);
|
a->score = ksw_extend2(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[0], opt->pen_clip5, opt->zdrop, s->len * opt->a, &qle, &tle, >le, &gscore, &max_off[0]);
|
||||||
#else
|
#else
|
||||||
a->score = ksw_extend2_avx2(s->qbeg, query, tmp, rseq, 1, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->a, opt->b,
|
a->score = ksw_extend2_avx2(s->qbeg, query, tmp, rseq, 1, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->a, opt->b,
|
||||||
aw[0], opt->pen_clip5, opt->zdrop, s->len * opt->a, &qle, &tle, >le, &gscore, &max_off[0], aux->sw_buf);
|
aw[0], opt->pen_clip5, opt->zdrop, s->len * opt->a, &qle, &tle, >le, &gscore, &max_off[0], aux->sw_buf);
|
||||||
#endif
|
#endif
|
||||||
|
PROF_END(tprof[T_BSW_EXT][tid], bsw_ext);
|
||||||
if (bwa_verbose >= 4) { printf("*** Left extension: prev_score=%d; score=%d; bandwidth=%d; max_off_diagonal_dist=%d\n", prev, a->score, aw[0], max_off[0]); fflush(stdout); }
|
if (bwa_verbose >= 4) { printf("*** Left extension: prev_score=%d; score=%d; bandwidth=%d; max_off_diagonal_dist=%d\n", prev, a->score, aw[0], max_off[0]); fflush(stdout); }
|
||||||
if (a->score == prev || max_off[0] < (aw[0]>>1) + (aw[0]>>2)) break;
|
if (a->score == prev || max_off[0] < (aw[0]>>1) + (aw[0]>>2)) break;
|
||||||
}
|
}
|
||||||
|
|
@ -967,12 +978,14 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
|
||||||
printf("*** Right ref: "); for (j = 0; j < rmax[1] - rmax[0] - re; ++j) putchar("ACGTN"[(int)rseq[re+j]]); putchar('\n');
|
printf("*** Right ref: "); for (j = 0; j < rmax[1] - rmax[0] - re; ++j) putchar("ACGTN"[(int)rseq[re+j]]); putchar('\n');
|
||||||
printf("*** Right query: "); for (j = 0; j < l_query - qe; ++j) putchar("ACGTN"[(int)query[qe+j]]); putchar('\n');
|
printf("*** Right query: "); for (j = 0; j < l_query - qe; ++j) putchar("ACGTN"[(int)query[qe+j]]); putchar('\n');
|
||||||
}
|
}
|
||||||
|
PROF_START(bsw_ext);
|
||||||
#ifndef USE_AVX2_EXT
|
#ifndef USE_AVX2_EXT
|
||||||
a->score = ksw_extend2(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[1], opt->pen_clip3, opt->zdrop, sc0, &qle, &tle, >le, &gscore, &max_off[1]);
|
a->score = ksw_extend2(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[1], opt->pen_clip3, opt->zdrop, sc0, &qle, &tle, >le, &gscore, &max_off[1]);
|
||||||
#else
|
#else
|
||||||
a->score = ksw_extend2_avx2(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 0, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->a, opt->b, aw[1], opt->pen_clip3, opt->zdrop, sc0, &qle, &tle, >le, &gscore, &max_off[1], aux->sw_buf);
|
a->score = ksw_extend2_avx2(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 0, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->a, opt->b, aw[1], opt->pen_clip3, opt->zdrop, sc0, &qle, &tle, >le, &gscore, &max_off[1], aux->sw_buf);
|
||||||
#endif
|
#endif
|
||||||
if (bwa_verbose >= 4) { printf("*** Right extension: prev_score=%d; score=%d; bandwidth=%d; max_off_diagonal_dist=%d\n", prev, a->score, aw[1], max_off[1]); fflush(stdout); }
|
PROF_END(tprof[T_BSW_EXT][tid], bsw_ext);
|
||||||
|
if (bwa_verbose >= 4) { printf("*** Right extension: prev_score=%d; score=%d; bandwidth=%d; max_off_diagonal_dist=%d\n", prev, a->score, aw[1], max_off[1]); fflush(stdout); }
|
||||||
if (a->score == prev || max_off[1] < (aw[1]>>1) + (aw[1]>>2)) break;
|
if (a->score == prev || max_off[1] < (aw[1]>>1) + (aw[1]>>2)) break;
|
||||||
}
|
}
|
||||||
// similar to the above
|
// similar to the above
|
||||||
|
|
@ -1372,19 +1385,23 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex*
|
||||||
if (opt->use_bwt) {
|
if (opt->use_bwt) {
|
||||||
smem_v *smem_arr = (smem_v*)seed_arr;
|
smem_v *smem_arr = (smem_v*)seed_arr;
|
||||||
// 1. seeding
|
// 1. seeding
|
||||||
PROF_START(seed_all);
|
PROF_START(seeding);
|
||||||
for (i = 0; i < nseq; ++i) {
|
for (i = 0; i < nseq; ++i) {
|
||||||
|
PROF_START(gen_seq);
|
||||||
seq = seq_arr[i].seq;
|
seq = seq_arr[i].seq;
|
||||||
l_seq = seq_arr[i].l_seq;
|
l_seq = seq_arr[i].l_seq;
|
||||||
for (j = 0; j < l_seq; ++j) {
|
for (j = 0; j < l_seq; ++j) {
|
||||||
seq[j] = seq[j] < 4 ? seq[j] : nst_nt4_table[(int)seq[j]];
|
seq[j] = seq[j] < 4 ? seq[j] : nst_nt4_table[(int)seq[j]];
|
||||||
}
|
}
|
||||||
|
PROF_END(tprof[T_GEN_SEQ][tid], gen_seq);
|
||||||
|
PROF_START(smem);
|
||||||
find_smem(opt, bwt, l_seq, (uint8_t*)seq, aux, &smem_arr[i], tid);
|
find_smem(opt, bwt, l_seq, (uint8_t*)seq, aux, &smem_arr[i], tid);
|
||||||
|
PROF_END(tprof[T_SMEM][tid], smem);
|
||||||
}
|
}
|
||||||
PROF_END(tprof[T_SEED_ALL][tid], seed_all);
|
PROF_END(tprof[T_SEEDING][tid], seeding);
|
||||||
|
|
||||||
// 2. chain
|
// 2. chain
|
||||||
PROF_START(chain_all);
|
PROF_START(chain);
|
||||||
for (i = 0; i < nseq; ++i) {
|
for (i = 0; i < nseq; ++i) {
|
||||||
seq = seq_arr[i].seq;
|
seq = seq_arr[i].seq;
|
||||||
l_seq = seq_arr[i].l_seq;
|
l_seq = seq_arr[i].l_seq;
|
||||||
|
|
@ -1400,15 +1417,16 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex*
|
||||||
PROF_END(tprof[T_FLT_CHANNED_SEEDS][tid], flt_chained_seeds);
|
PROF_END(tprof[T_FLT_CHANNED_SEEDS][tid], flt_chained_seeds);
|
||||||
if (bwa_verbose >= 4) mem_print_chain(bns, chnp);
|
if (bwa_verbose >= 4) mem_print_chain(bns, chnp);
|
||||||
}
|
}
|
||||||
PROF_END(tprof[T_CHAIN_ALL][tid], chain_all);
|
PROF_END(tprof[T_CHAIN][tid], chain);
|
||||||
} else {
|
} else {
|
||||||
HybSeedArr* seeds = (HybSeedArr*)seed_arr;
|
HybSeedArr* seeds = (HybSeedArr*)seed_arr;
|
||||||
// 1. seeding
|
// 1. seeding
|
||||||
PROF_START(seed_all);
|
PROF_START(seeding);
|
||||||
RangeArr read_ranges = {0};
|
RangeArr read_ranges = {0};
|
||||||
RangeArr seeds_ranges = {0};
|
RangeArr seeds_ranges = {0};
|
||||||
Range init_range = {0};
|
Range init_range = {0};
|
||||||
for (i = 0; i < nseq; ++i) {
|
for (i = 0; i < nseq; ++i) {
|
||||||
|
PROF_START(gen_seq);
|
||||||
uint8_t* reverse_seq = aux->reverse_seq->a;
|
uint8_t* reverse_seq = aux->reverse_seq->a;
|
||||||
uint8_t* for_bits = aux->for_bits->a;
|
uint8_t* for_bits = aux->for_bits->a;
|
||||||
uint8_t* back_bits = aux->back_bits->a;
|
uint8_t* back_bits = aux->back_bits->a;
|
||||||
|
|
@ -1441,14 +1459,17 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex*
|
||||||
create_seq_fb_bits((uint8_t*)seq, l_seq, for_bits, back_bits);
|
create_seq_fb_bits((uint8_t*)seq, l_seq, for_bits, back_bits);
|
||||||
ReadSeq read_seq = {l_seq, (uint8_t*)seq, reverse_seq, for_bits, back_bits, aux->seq_id};
|
ReadSeq read_seq = {l_seq, (uint8_t*)seq, reverse_seq, for_bits, back_bits, aux->seq_id};
|
||||||
++aux->seq_id;
|
++aux->seq_id;
|
||||||
|
PROF_END(tprof[T_GEN_SEQ][tid], gen_seq);
|
||||||
|
PROF_START(smem);
|
||||||
hyb_seeding(opt, hyb, &read_seq, &read_ranges, &seeds_ranges, &seeds[i], aux->seq_id, tid);
|
hyb_seeding(opt, hyb, &read_seq, &read_ranges, &seeds_ranges, &seeds[i], aux->seq_id, tid);
|
||||||
|
PROF_END(tprof[T_SMEM][tid], smem);
|
||||||
}
|
}
|
||||||
kv_destroy(read_ranges);
|
kv_destroy(read_ranges);
|
||||||
kv_destroy(seeds_ranges);
|
kv_destroy(seeds_ranges);
|
||||||
PROF_END(tprof[T_SEED_ALL][tid], seed_all);
|
PROF_END(tprof[T_SEEDING][tid], seeding);
|
||||||
|
|
||||||
// 2. chain
|
// 2. chain
|
||||||
PROF_START(chain_all);
|
PROF_START(chain);
|
||||||
for (i = 0; i < nseq; ++i) {
|
for (i = 0; i < nseq; ++i) {
|
||||||
seq = seq_arr[i].seq;
|
seq = seq_arr[i].seq;
|
||||||
l_seq = seq_arr[i].l_seq;
|
l_seq = seq_arr[i].l_seq;
|
||||||
|
|
@ -1465,11 +1486,11 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex*
|
||||||
if (bwa_verbose >= 4)
|
if (bwa_verbose >= 4)
|
||||||
mem_print_chain(bns, chnp);
|
mem_print_chain(bns, chnp);
|
||||||
}
|
}
|
||||||
PROF_END(tprof[T_CHAIN_ALL][tid], chain_all);
|
PROF_END(tprof[T_CHAIN][tid], chain);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. align
|
// 3. align
|
||||||
PROF_START(aln_all);
|
PROF_START(extension);
|
||||||
for (i = 0; i < nseq; ++i) {
|
for (i = 0; i < nseq; ++i) {
|
||||||
seq = seq_arr[i].seq;
|
seq = seq_arr[i].seq;
|
||||||
l_seq = seq_arr[i].l_seq;
|
l_seq = seq_arr[i].l_seq;
|
||||||
|
|
@ -1481,14 +1502,18 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex*
|
||||||
mem_chain_t* p = &chnp->a[j];
|
mem_chain_t* p = &chnp->a[j];
|
||||||
if (bwa_verbose >= 4)
|
if (bwa_verbose >= 4)
|
||||||
err_printf("* ---> Processing chain(%d) <---\n", j);
|
err_printf("* ---> Processing chain(%d) <---\n", j);
|
||||||
|
PROF_START(mem_chain2aln);
|
||||||
mem_chain2aln(opt, bns, pac, l_seq, (uint8_t*)seq, p, regp, aux, tid);
|
mem_chain2aln(opt, bns, pac, l_seq, (uint8_t*)seq, p, regp, aux, tid);
|
||||||
|
PROF_END(tprof[T_MEM_CHAIN2ALN][tid], mem_chain2aln);
|
||||||
free(chnp->a[j].seeds);
|
free(chnp->a[j].seeds);
|
||||||
}
|
}
|
||||||
|
|
||||||
free(chnp->a);
|
free(chnp->a);
|
||||||
chnp->m = 0;
|
chnp->m = 0;
|
||||||
chnp->a = 0;
|
chnp->a = 0;
|
||||||
|
PROF_START(sort_dedup);
|
||||||
regp->n = mem_sort_dedup_patch(opt, bns, pac, (uint8_t*)seq, regp->n, regp->a);
|
regp->n = mem_sort_dedup_patch(opt, bns, pac, (uint8_t*)seq, regp->n, regp->a);
|
||||||
|
PROF_END(tprof[T_SORT_DEDUP][tid], sort_dedup);
|
||||||
if (bwa_verbose >= 4) {
|
if (bwa_verbose >= 4) {
|
||||||
err_printf("* %ld chains remain after removing duplicated chains\n", regp->n);
|
err_printf("* %ld chains remain after removing duplicated chains\n", regp->n);
|
||||||
for (j = 0; j < regp->n; ++j) {
|
for (j = 0; j < regp->n; ++j) {
|
||||||
|
|
@ -1502,7 +1527,7 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex*
|
||||||
p->is_alt = 1;
|
p->is_alt = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
PROF_END(tprof[T_ALN_ALL][tid], aln_all);
|
PROF_END(tprof[T_EXTENSION][tid], extension);
|
||||||
|
|
||||||
// 4. calc insert size
|
// 4. calc insert size
|
||||||
#define MIN_RATIO 0.8
|
#define MIN_RATIO 0.8
|
||||||
|
|
@ -1530,7 +1555,7 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex*
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void worker_smem_align(void *data, long i, int tid)
|
static void worker_smem_extension(void *data, long i, int tid)
|
||||||
{
|
{
|
||||||
mem_worker_t *w = (mem_worker_t*)data;
|
mem_worker_t *w = (mem_worker_t*)data;
|
||||||
int start = i * w->opt->batch_size;
|
int start = i * w->opt->batch_size;
|
||||||
|
|
@ -1584,21 +1609,21 @@ void mem_process_seqs(const mem_opt_t* opt, mem_worker_t* w, int64_t n_processed
|
||||||
for (j = 0; j < 4; ++j) w->isize_arr[i][j].n = 0;
|
for (j = 0; j < 4; ++j) w->isize_arr[i][j].n = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
PROF_START(kernel);
|
PROF_START(seed_and_ext);
|
||||||
kt_for(opt->n_threads, worker_smem_align, w, n_batch); // find mapping positions
|
kt_for(opt->n_threads, worker_smem_extension, w, n_batch); // find mapping positions
|
||||||
PROF_END(gprof[G_MEM_KERNEL], kernel);
|
PROF_END(gprof[G_SEED_AND_EXT], seed_and_ext);
|
||||||
|
|
||||||
PROF_START(pestat);
|
PROF_START(pestat);
|
||||||
if (opt->flag&MEM_F_PE) { // infer insert sizes if not provided
|
if (opt->flag&MEM_F_PE) { // infer insert sizes if not provided
|
||||||
if (pes0) memcpy(pes, pes0, 4 * sizeof(mem_pestat_t)); // if pes0 != NULL, set the insert-size distribution as pes0
|
if (pes0) memcpy(pes, pes0, 4 * sizeof(mem_pestat_t)); // if pes0 != NULL, set the insert-size distribution as pes0
|
||||||
else mem_pestat(opt, w->bns->l_pac, n, w->isize_arr, pes); // otherwise, infer the insert size distribution from data
|
else mem_pestat(opt, w->bns->l_pac, n, w->isize_arr, pes); // otherwise, infer the insert size distribution from data
|
||||||
}
|
}
|
||||||
PROF_END(gprof[G_MEM_PESTAT], pestat);
|
PROF_END(gprof[G_MEM_PESTAT], pestat);
|
||||||
|
|
||||||
PROF_START(mem_sam);
|
PROF_START(gen_sam);
|
||||||
kt_for(opt->n_threads, worker_sam, w, (opt->flag & MEM_F_PE) ? n >> 1 : n); // generate alignment
|
kt_for(opt->n_threads, worker_sam, w, (opt->flag & MEM_F_PE) ? n >> 1 : n); // generate alignment
|
||||||
PROF_END(gprof[G_MEM_SAM], mem_sam);
|
PROF_END(gprof[G_GEN_SAM], gen_sam);
|
||||||
|
|
||||||
if (bwa_verbose >= 3)
|
if (bwa_verbose >= 3)
|
||||||
fprintf(stderr, "[M::%s] Processed %d reads in %.3f CPU sec, %.3f real sec\n", __func__, n, cputime() - ctime, realtime() - rtime);
|
fprintf(stderr, "[M::%s] Processed %d reads in %.3f CPU sec, %.3f real sec\n", __func__, n, cputime() - ctime, realtime() - rtime);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -306,7 +306,7 @@ static void update_a(mem_opt_t *opt, const mem_opt_t *opt0)
|
||||||
|
|
||||||
int main_mem(int argc, char *argv[])
|
int main_mem(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
mem_opt_t *opt, opt0;
|
mem_opt_t *opt, opt0;
|
||||||
int fd, fd2, i, c, ignore_alt = 0, no_mt_io = 0;
|
int fd, fd2, i, c, ignore_alt = 0, no_mt_io = 0;
|
||||||
int fixed_chunk_size = -1;
|
int fixed_chunk_size = -1;
|
||||||
gzFile fp, fp2 = 0;
|
gzFile fp, fp2 = 0;
|
||||||
|
|
@ -648,6 +648,8 @@ int main_mem(int argc, char *argv[])
|
||||||
close_files();
|
close_files();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
PROF_END(gprof[G_ALL], all);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
136
profiling.c
136
profiling.c
|
|
@ -68,70 +68,86 @@ uint64_t get_sum(uint64_t *a, int len) {
|
||||||
int display_stats(int nthreads)
|
int display_stats(int nthreads)
|
||||||
{
|
{
|
||||||
#ifdef SHOW_PERF
|
#ifdef SHOW_PERF
|
||||||
double avg, max, min;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
fprintf(stderr, "[steps in main_mem]\n");
|
#define FORMAT_PERF_OUT(name, sec, layer) \
|
||||||
fprintf(stderr, "time_parse_arg: %0.2lf s\n", gprof[G_PREPARE] * 1.0 / proc_freq);
|
do { \
|
||||||
fprintf(stderr, "time_load_idx: %0.2lf s\n", gprof[G_LOAD_IDX] * 1.0 / proc_freq);
|
int i = 0; \
|
||||||
fprintf(stderr, "time_pipeline: %0.2lf s\n", gprof[G_PIPELINE] * 1.0 / proc_freq);
|
for (i = 0; i < layer; i++) fprintf(stderr, " "); \
|
||||||
fprintf(stderr, "time_all: %0.2lf s\n", gprof[G_ALL] * 1.0 / proc_freq);
|
fprintf(stderr, "%s: %0.2lf s\n", name, sec); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
fprintf(stderr, "\n[steps in pipeline]\n");
|
#define FORMAT_PERF_OUT_3(name, sec_arr, layer) \
|
||||||
fprintf(stderr, "time_read: %0.2lf s\n", gprof[G_READ] * 1.0 / proc_freq);
|
do { \
|
||||||
fprintf(stderr, "time_compute: %0.2lf s\n", gprof[G_COMPUTE] * 1.0 / proc_freq);
|
int i = 0; \
|
||||||
fprintf(stderr, "time_write: %0.2lf s\n", gprof[G_WRITE] * 1.0 / proc_freq);
|
double avg, max, min; \
|
||||||
|
for (i = 0; i < layer; i++) fprintf(stderr, " "); \
|
||||||
|
find_opt(sec_arr, nthreads, &max, &min, &avg); \
|
||||||
|
fprintf(stderr, "%s: %0.2lf (%0.2lf, %0.2lf) s\n", name, avg, max, min); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
fprintf(stderr, "\n[steps in mem_process_seqs]\n");
|
#define FORMAT_PERF_OUT_SUM_3(name, sec_arr, sec_arr1, layer) \
|
||||||
fprintf(stderr, "time_mem_prepare: %0.2lf s\n", gprof[G_MEM_PREPARE] * 1.0 / proc_freq);
|
do { \
|
||||||
fprintf(stderr, "time_mem_kernel: %0.2lf s\n", gprof[G_MEM_KERNEL] * 1.0 / proc_freq);
|
int i = 0; \
|
||||||
fprintf(stderr, "time_mem_pestat: %0.2lf s\n", gprof[G_MEM_PESTAT] * 1.0 / proc_freq);
|
double avg, max, min, avg1, max1, min1; \
|
||||||
fprintf(stderr, "time_mem_sam: %0.2lf s\n", gprof[G_MEM_SAM] * 1.0 / proc_freq);
|
for (i = 0; i < layer; i++) fprintf(stderr, " "); \
|
||||||
|
find_opt(sec_arr, nthreads, &max, &min, &avg); \
|
||||||
|
find_opt(sec_arr1, nthreads, &max1, &min1, &avg1); \
|
||||||
|
fprintf(stderr, "%s: %0.2lf (%0.2lf, %0.2lf) s\n", name, avg + avg1, max + max1, min + min1); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
fprintf(stderr, "\n[steps in kernel]\n");
|
#define FORMAT_PERF_OUT_SUB_3(name, sec_arr, sec_arr1, layer) \
|
||||||
find_opt(tprof[T_SEED_ALL], nthreads, &max, &min, &avg);
|
do { \
|
||||||
fprintf(stderr, "time_seed_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min);
|
int i = 0; \
|
||||||
find_opt(tprof[T_CHAIN_ALL], nthreads, &max, &min, &avg);
|
double avg, max, min, avg1, max1, min1; \
|
||||||
fprintf(stderr, "time_chain_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min);
|
for (i = 0; i < layer; i++) fprintf(stderr, " "); \
|
||||||
find_opt(tprof[T_ALN_ALL], nthreads, &max, &min, &avg);
|
find_opt(sec_arr, nthreads, &max, &min, &avg); \
|
||||||
fprintf(stderr, "time_aln_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min);
|
find_opt(sec_arr1, nthreads, &max1, &min1, &avg1); \
|
||||||
find_opt(tprof[T_INS_SIZE], nthreads, &max, &min, &avg);
|
fprintf(stderr, "%s: %0.2lf (%0.2lf, %0.2lf) s\n", name, avg - avg1, max - max1, min - min1); \
|
||||||
fprintf(stderr, "time_ins_size_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min);
|
} while (0)
|
||||||
|
|
||||||
fprintf(stderr, "\n[steps in seeding]\n");
|
// for overall pipeline
|
||||||
find_opt(tprof[T_SEED_1], nthreads, &max, &min, &avg);
|
FORMAT_PERF_OUT("all", gprof[G_ALL] * 1.0 / proc_freq, 0);
|
||||||
fprintf(stderr, "time_seed_1: %0.2lf s %0.2lf s %0.2lf s\n", max, min, avg);
|
FORMAT_PERF_OUT("load_idx", gprof[G_LOAD_IDX] * 1.0 / proc_freq, 1);
|
||||||
find_opt(tprof[T_SEED_2], nthreads, &max, &min, &avg);
|
FORMAT_PERF_OUT("pipeline", gprof[G_PIPELINE] * 1.0 / proc_freq, 1);
|
||||||
fprintf(stderr, "time_seed_2: %0.2lf s\n", avg);
|
FORMAT_PERF_OUT("read", gprof[G_READ] * 1.0 / proc_freq, 2);
|
||||||
find_opt(tprof[T_SEED_3], nthreads, &max, &min, &avg);
|
FORMAT_PERF_OUT("uncompress", gprof[G_UNCOMPRESS] * 1.0 / proc_freq, 3);
|
||||||
fprintf(stderr, "time_seed_3: %0.2lf s\n", avg);
|
FORMAT_PERF_OUT("compute", gprof[G_COMPUTE] * 1.0 / proc_freq, 2);
|
||||||
|
FORMAT_PERF_OUT("seeding-extension", gprof[G_SEED_AND_EXT] * 1.0 / proc_freq, 3);
|
||||||
|
FORMAT_PERF_OUT("gen-sam", gprof[G_GEN_SAM] * 1.0 / proc_freq, 3);
|
||||||
|
FORMAT_PERF_OUT("write", gprof[G_WRITE] * 1.0 / proc_freq, 2);
|
||||||
|
|
||||||
fprintf(stderr, "\n[steps in chain]\n");
|
fprintf(stderr, "\n");
|
||||||
find_opt(tprof[T_GEN_CHAIN], nthreads, &max, &min, &avg);
|
|
||||||
fprintf(stderr, "time_gen_chain: %0.2lf s\n", avg);
|
|
||||||
find_opt(tprof[T_FLT_CHAIN], nthreads, &max, &min, &avg);
|
|
||||||
fprintf(stderr, "time_flt_chain: %0.2lf s\n", avg);
|
|
||||||
find_opt(tprof[T_FLT_CHANNED_SEEDS], nthreads, &max, &min, &avg);
|
|
||||||
fprintf(stderr, "time_flt_chained_seeds: %0.2lf s\n", avg);
|
|
||||||
find_opt(tprof[T_SAL], nthreads, &max, &min, &avg);
|
|
||||||
fprintf(stderr, "time_sal: %0.2lf s\n", avg);
|
|
||||||
find_opt(tprof[T_BSW], nthreads, &max, &min, &avg);
|
|
||||||
fprintf(stderr, "time_bsw: %0.2lf s\n", avg);
|
|
||||||
|
|
||||||
fprintf(stderr, "\n[steps in gen sam]\n");
|
// for seeding
|
||||||
find_opt(tprof[T_SAM_MATESW], nthreads, &max, &min, &avg);
|
FORMAT_PERF_OUT_SUM_3("seed-chain", tprof[T_SEEDING], tprof[T_CHAIN], 0);
|
||||||
fprintf(stderr, "time_mate_sw: %0.2lf s\n", avg);
|
FORMAT_PERF_OUT_3("seeding", tprof[T_SEEDING], 1);
|
||||||
find_opt(tprof[T_KSW_ALIGN2], nthreads, &max, &min, &avg);
|
FORMAT_PERF_OUT_3("gen-seq", tprof[T_GEN_SEQ], 2);
|
||||||
fprintf(stderr, "time_ksw_align2: %0.2lf s\n", avg);
|
FORMAT_PERF_OUT_3("smem", tprof[T_SMEM], 2);
|
||||||
find_opt(tprof[T_KSW_LOOP], nthreads, &max, &min, &avg);
|
FORMAT_PERF_OUT_3("seeding-1", tprof[T_SEED_1], 3);
|
||||||
fprintf(stderr, "time_ksw_loop: %0.2lf s\n", avg);
|
FORMAT_PERF_OUT_3("seeding-2", tprof[T_SEED_2], 3);
|
||||||
find_opt(tprof[T_KSW_REVERSE], nthreads, &max, &min, &avg);
|
FORMAT_PERF_OUT_3("seeding-3", tprof[T_SEED_3], 3);
|
||||||
fprintf(stderr, "time_ksw_reverse: %0.2lf s\n", avg);
|
FORMAT_PERF_OUT_3("chain", tprof[T_CHAIN], 1);
|
||||||
find_opt(tprof[T_SAM_REG2ALN], nthreads, &max, &min, &avg);
|
FORMAT_PERF_OUT_3("gen-chain", tprof[T_GEN_CHAIN], 2);
|
||||||
fprintf(stderr, "time_reg2aln: %0.2lf s\n", avg);
|
FORMAT_PERF_OUT_3("sa", tprof[T_SA], 3);
|
||||||
|
FORMAT_PERF_OUT_3("flt-chain", tprof[T_FLT_CHAIN],2);
|
||||||
|
FORMAT_PERF_OUT_SUM_3("smem + sa", tprof[T_SMEM], tprof[T_SA], 1);
|
||||||
|
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
|
||||||
|
// for extension
|
||||||
|
FORMAT_PERF_OUT_3("extension", tprof[T_EXTENSION], 0);
|
||||||
|
FORMAT_PERF_OUT_3("mem_chain2aln", tprof[T_MEM_CHAIN2ALN], 1);
|
||||||
|
FORMAT_PERF_OUT_3("bsw_ext", tprof[T_BSW_EXT], 2);
|
||||||
|
FORMAT_PERF_OUT_SUB_3("other", tprof[T_MEM_CHAIN2ALN], tprof[T_BSW_EXT], 2);
|
||||||
|
FORMAT_PERF_OUT_3("sort_dedup", tprof[T_SORT_DEDUP], 1);
|
||||||
|
|
||||||
|
// for gen-sam
|
||||||
|
FORMAT_PERF_OUT("gen-sam", gprof[G_GEN_SAM] * 1.0 / proc_freq, 0);
|
||||||
|
FORMAT_PERF_OUT_3("sam_mate_sw", tprof[T_SAM_MATESW], 1);
|
||||||
|
FORMAT_PERF_OUT_3("sam_reg2aln", tprof[T_SAM_REG2ALN], 1);
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
|
||||||
fprintf(stderr, "time_ksw_loop: %0.2lf s\n", gprof[G_KSW_LOOP] * 1.0 / proc_freq);
|
|
||||||
fprintf(stderr, "time_ksw_end_loop: %0.2lf s\n", gprof[G_KSW_END_LOOP] * 1.0 / proc_freq);
|
|
||||||
|
|
||||||
#if SHOW_DATA_PERF
|
#if SHOW_DATA_PERF
|
||||||
fprintf(stderr, "seq num: %ld\n", gdat[0]);
|
fprintf(stderr, "seq num: %ld\n", gdat[0]);
|
||||||
|
|
@ -184,6 +200,7 @@ int display_stats(int nthreads)
|
||||||
// PRINT_SEED_TIME(3_3_2);
|
// PRINT_SEED_TIME(3_3_2);
|
||||||
#endif
|
#endif
|
||||||
double all = 0;
|
double all = 0;
|
||||||
|
int i;
|
||||||
for (i = 0; i < 50; ++i) {
|
for (i = 0; i < 50; ++i) {
|
||||||
//all += sum(tdat[i], nthreads);
|
//all += sum(tdat[i], nthreads);
|
||||||
// fprintf(stderr, "sum %d: %ld\n", i, sum(tdat[i], nthreads));
|
// fprintf(stderr, "sum %d: %ld\n", i, sum(tdat[i], nthreads));
|
||||||
|
|
@ -217,8 +234,11 @@ int display_stats(int nthreads)
|
||||||
// fprintf(stderr, "%ld,\n", sum(tdat[i], nthreads));
|
// fprintf(stderr, "%ld,\n", sum(tdat[i], nthreads));
|
||||||
// }
|
// }
|
||||||
|
|
||||||
fprintf(stderr, "\n");
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return 1;
|
#endif
|
||||||
|
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
36
profiling.h
36
profiling.h
|
|
@ -60,34 +60,42 @@ enum {
|
||||||
// GLOBAL
|
// GLOBAL
|
||||||
enum {
|
enum {
|
||||||
G_ALL = 0,
|
G_ALL = 0,
|
||||||
|
G_LOAD_IDX,
|
||||||
G_PIPELINE,
|
G_PIPELINE,
|
||||||
G_READ,
|
G_READ,
|
||||||
G_WRITE,
|
|
||||||
G_COMPUTE,
|
G_COMPUTE,
|
||||||
G_PREPARE,
|
G_WRITE,
|
||||||
G_LOAD_IDX,
|
G_SEED_AND_EXT,
|
||||||
G_MEM_PREPARE,
|
|
||||||
G_MEM_KERNEL,
|
|
||||||
G_MEM_PESTAT,
|
G_MEM_PESTAT,
|
||||||
G_MEM_SAM,
|
G_GEN_SAM,
|
||||||
G_KSW_LOOP,
|
G_UNCOMPRESS
|
||||||
G_KSW_END_LOOP,
|
|
||||||
G_read_seq
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// THREAD
|
// THREAD
|
||||||
enum {
|
enum {
|
||||||
T_SEED_ALL = 0,
|
T_SEEDING = 0,
|
||||||
T_CHAIN_ALL,
|
T_GEN_SEQ,
|
||||||
T_ALN_ALL,
|
T_SMEM,
|
||||||
T_INS_SIZE,
|
|
||||||
T_SEED_1,
|
T_SEED_1,
|
||||||
T_SEED_2,
|
T_SEED_2,
|
||||||
T_SEED_3,
|
T_SEED_3,
|
||||||
T_SAL,
|
T_CHAIN,
|
||||||
T_GEN_CHAIN,
|
T_GEN_CHAIN,
|
||||||
|
T_SA,
|
||||||
T_FLT_CHAIN,
|
T_FLT_CHAIN,
|
||||||
T_FLT_CHANNED_SEEDS,
|
T_FLT_CHANNED_SEEDS,
|
||||||
|
T_EXTENSION,
|
||||||
|
T_MEM_CHAIN2ALN,
|
||||||
|
T_BSW_EXT,
|
||||||
|
T_SORT_DEDUP,
|
||||||
|
T_GEN_SAM,
|
||||||
|
T_MEM_REG2ALN,
|
||||||
|
|
||||||
|
|
||||||
|
T_CHAIN_ALL,
|
||||||
|
T_ALN_ALL,
|
||||||
|
T_INS_SIZE,
|
||||||
|
T_SAL,
|
||||||
T_READ_SA,
|
T_READ_SA,
|
||||||
T_BSW,
|
T_BSW,
|
||||||
T_BSW_ALL,
|
T_BSW_ALL,
|
||||||
|
|
|
||||||
4
utils.c
4
utils.c
|
|
@ -161,7 +161,7 @@ size_t err_fread_noeof(void *ptr, size_t size, size_t nmemb, FILE *stream)
|
||||||
int err_gzread(gzFile file, void *ptr, unsigned int len)
|
int err_gzread(gzFile file, void *ptr, unsigned int len)
|
||||||
{
|
{
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
PROF_START(read);
|
PROF_START(uncompress);
|
||||||
#ifdef USE_ASYNC_READ
|
#ifdef USE_ASYNC_READ
|
||||||
khiter_t k = kh_get(fkv, fHash, (int64_t)file);
|
khiter_t k = kh_get(fkv, fHash, (int64_t)file);
|
||||||
FileKV* val = &kh_value(fHash, k);
|
FileKV* val = &kh_value(fHash, k);
|
||||||
|
|
@ -189,7 +189,7 @@ int err_gzread(gzFile file, void *ptr, unsigned int len)
|
||||||
#else
|
#else
|
||||||
ret = gzread(file, ptr, len);
|
ret = gzread(file, ptr, len);
|
||||||
#endif
|
#endif
|
||||||
PROF_END(gprof[G_read_seq], read);
|
PROF_END(gprof[G_UNCOMPRESS], uncompress);
|
||||||
|
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue