From 74dc4829b8187419b852ba61f563390cdd538994 Mon Sep 17 00:00:00 2001 From: zzh Date: Tue, 13 Jan 2026 01:15:33 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BA=86=E4=B8=80=E4=BA=9B?= =?UTF-8?q?=E6=80=A7=E8=83=BD=E6=B5=8B=E8=AF=95=E4=BB=A3=E7=A0=81=EF=BC=8C?= =?UTF-8?q?=E5=A6=82gen=20sam=E8=BF=87=E7=A8=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 1 + .vscode/launch.json | 14 +++++-------- .vscode/tasks.json | 2 +- Makefile | 5 +++-- bwamem.c | 37 ++++++++++++++++++---------------- bwamem_pair.c | 32 ++++++++++++++++++------------ fastmap.c | 3 +++ hyb_idx.h | 6 +++--- hyb_utils.c | 2 ++ ksw.c | 48 ++++++++++++++++++++++++++++++--------------- ksw.h | 5 +++-- profiling.c | 15 +++++++++++++- profiling.h | 16 +++++++++++++-- 13 files changed, 121 insertions(+), 65 deletions(-) diff --git a/.gitignore b/.gitignore index 54233a1..0d40cd3 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ test64 Makefile.bak bwamem-lite test_index/ +ecoli_index/ index/ orig_index/ output/ diff --git a/.vscode/launch.json b/.vscode/launch.json index bfc37a4..fd57c23 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -18,19 +18,15 @@ "-R", "'@RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa'", "/home/zzh/work/bioinfo/hyb-align/index/human_g1k_v37_decoy.fasta", - //"./b1.fq", - //"./b2.fq", - //"./b1.fq", - //"~/data/dataset/real/D1/n1.fq", - //"~/data/dataset/real/D1/n2.fq", + //"./b1.fq", "./b2.fq", + //"~/data/dataset/real/D1/n1.fq", "~/data/dataset/real/D1/n2.fq", //"~/data1/fastq/dataset/zy_wgs/E150010395_L01_690_1.fq", //"~/data1/fastq/dataset/zy_wgs/E150010395_L01_690_2.fq", //"~/data/dataset/real/D3/n1.fq", //"~/data/dataset/real/D3/n2.fq", - //"~/data/dataset/real/D1/n1.fq.gz", - //"~/data/dataset/real/D1/n2.fq.gz", - "~/data/dataset/real/D3/1w1.fq", - "~/data/dataset/real/D3/1w2.fq", + // "~/data/dataset/real/D2/n1.fq.gz", "~/data/dataset/real/D2/n2.fq.gz", + //"~/data/dataset/real/D2/n1.fq.gz", "~/data/dataset/real/D2/n2.fq.gz", + "~/data/dataset/real/D3/1w1.fq", "~/data/dataset/real/D3/1w2.fq", "-o", "/dev/null", // "-g", diff --git a/.vscode/tasks.json b/.vscode/tasks.json index f76ae19..2ea391f 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -6,7 +6,7 @@ { "label": "Build", "type": "shell", - "command": "make clean; make -j 16", + "command": "make -j 16", "problemMatcher": [], "group": { "kind": "build", diff --git a/Makefile b/Makefile index 626a936..5369d09 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ CC= gcc CFLAGS= -g -Wall -Wno-unused-function -mavx2 -O3 WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS AR= ar -DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC) -DUSE_AVX2_EXT -DSHOW_PERF -DDEBUG_FILE_OUTPUT +DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC) -DUSE_AVX2_EXT -DSHOW_PERF -DSHOW_DATA_PERF -DDEBUG_FILE_OUTPUT HYBOBJS= hyb_bwa.o hyb_utils.o hyb_seeding_1.o hyb_seeding_2.o hyb_seeding_3.o hyb_create_idx.o debug.o profiling.o share_mem.o yarn.o \ ksw_extend2_avx2.o ksw_extend2_avx2_u8.o LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o \ @@ -17,6 +17,7 @@ PROG= hybalign INCLUDES= LIBS= -lm -lz -lpthread SUBDIRS= . +JE_MALLOC=/home/zzh/work/bioinfo/libjemalloc.a ifeq ($(shell uname -s),Linux) LIBS += -lrt @@ -38,7 +39,7 @@ endif all:$(PROG) $(PROG):libbwa.a $(AOBJS) main.o - $(CC) $(CFLAGS) $(LDFLAGS) $(AOBJS) main.o -o $@ -L. -lbwa $(LIBS) + $(CC) $(CFLAGS) $(LDFLAGS) $(AOBJS) $(JE_MALLOC) main.o -o $@ -L. -lbwa $(LIBS) bwamem-lite:libbwa.a example.o $(CC) $(CFLAGS) $(LDFLAGS) example.o -o $@ -L. -lbwa $(LIBS) diff --git a/bwamem.c b/bwamem.c index 7ac2f3b..9f2845c 100644 --- a/bwamem.c +++ b/bwamem.c @@ -477,13 +477,16 @@ void hyb_generate_chain(const mem_opt_t *opt, const HybridIndex *hyb, const bnts if (sb > e) l_rep += e - b, b = sb, e = se; else e = e > se ? e : se; } - l_rep += e - b; + l_rep += e - b; // 大部分时候都是零,有啥用?是不是对应重叠区的,好像只有重叠区的匹配个数会超过max_occ for (i = 0; i < seeds->n; ++i) { - HybSeed *seed = &kv_A(*seeds, i); + HybSeed *seed = &kv_A(*seeds, i); int step, count; // seed length int64_t k; step = seed->ref_pos_arr.n > opt->max_occ ? seed->ref_pos_arr.n / opt->max_occ : 1; for (k = count = 0; k < seed->ref_pos_arr.n && count < opt->max_occ; k += step, ++count) { +#ifdef SHOW_DATA_PERF + tdat[TD_SEED_CNT][tid] += 1; +#endif mem_chain_t tmp, *lower, *upper; mem_seed_t s; int rid, to_add = 0; @@ -778,7 +781,7 @@ int mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id #define MEM_MINSC_COEF 5.5f #define MEM_SEEDSW_COEF 0.05f -int mem_seed_sw(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const uint8_t *query, const mem_seed_t *s) +int mem_seed_sw(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const uint8_t *query, const mem_seed_t *s, int tid) { int qb, qe, rid; int64_t rb, re, mid, l_pac = bns->l_pac; @@ -800,12 +803,12 @@ int mem_seed_sw(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, i if (qe - qb >= MEM_SHORT_LEN || re - rb >= MEM_SHORT_LEN) return -1; // the seed seems good enough; no need to do SW rseq = bns_fetch_seq(bns, pac, &rb, mid, &re, &rid); - x = ksw_align2(qe - qb, (uint8_t*)query + qb, re - rb, rseq, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, KSW_XSTART, 0); - free(rseq); + x = ksw_align2(qe - qb, (uint8_t*)query + qb, re - rb, rseq, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, KSW_XSTART, 0, tid); + free(rseq); return x.score; } -void mem_flt_chained_seeds(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const uint8_t *query, int n_chn, mem_chain_t *a) +void mem_flt_chained_seeds(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const uint8_t *query, int n_chn, mem_chain_t *a, int tid) { double min_l = opt->min_chain_weight? MEM_HSP_COEF * opt->min_chain_weight : MEM_MINSC_COEF * log(l_query); int i, j, k, min_HSP_score = (int)(opt->a * min_l + .499); @@ -814,7 +817,7 @@ void mem_flt_chained_seeds(const mem_opt_t *opt, const bntseq_t *bns, const uint mem_chain_t *c = &a[i]; for (j = k = 0; j < c->n; ++j) { mem_seed_t *s = &c->seeds[j]; - s->score = mem_seed_sw(opt, bns, pac, l_query, query, s); + s->score = mem_seed_sw(opt, bns, pac, l_query, query, s, tid); if (s->score < 0 || s->score >= min_HSP_score) { s->score = s->score < 0? s->len * opt->a : s->score; c->seeds[k++] = *s; @@ -854,7 +857,7 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac for (i = 0; i < c->n; ++i) { int64_t b, e; const mem_seed_t *t = &c->seeds[i]; - b = t->rbeg - (t->qbeg + cal_max_gap(opt, t->qbeg)); + b = t->rbeg - (t->qbeg + cal_max_gap(opt, t->qbeg)); // 应该是计算对应seed的ref区间,因为要进行extension,所以ref一定要大于seed一些 e = t->rbeg + t->len + ((l_query - t->qbeg - t->len) + cal_max_gap(opt, l_query - t->qbeg - t->len)); rmax[0] = rmax[0] < b? rmax[0] : b; rmax[1] = rmax[1] > e? rmax[1] : e; @@ -870,14 +873,14 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac rseq = bns_fetch_seq(bns, pac, &rmax[0], c->seeds[0].rbeg, &rmax[1], &rid); assert(c->rid == rid); - srt = malloc(c->n * 8); + srt = malloc(c->n * 8); // 将chain中的seeds按照score从小到大排序 for (i = 0; i < c->n; ++i) srt[i] = (uint64_t)c->seeds[i].score<<32 | i; ks_introsort_64(c->n, srt); for (k = c->n - 1; k >= 0; --k) { mem_alnreg_t *a; - s = &c->seeds[(uint32_t)srt[k]]; + s = &c->seeds[(uint32_t)srt[k]]; // 先从分数最高的seed开始计算 for (i = 0; i < av->n; ++i) { // test whether extension has been made before mem_alnreg_t *p = &av->a[i]; @@ -923,7 +926,7 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac a->rid = c->rid; if (bwa_verbose >= 4) err_printf("** ---> Extending from seed(%d) [%ld;%ld,%ld] @ %s <---\n", k, (long)s->len, (long)s->qbeg, (long)s->rbeg, bns->anns[c->rid].name); - if (s->qbeg) { // left extension + if (s->qbeg) { // left extension,如果seed就是从0开始的,左边没有碱基了,就不用向左扩展了 int qle, tle, gtle, gscore; tmp = s->rbeg - rmax[0]; #ifndef USE_AVX2_EXT @@ -965,12 +968,12 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac #endif } else a->score = a->truesc = s->len * opt->a, a->qb = 0, a->rb = s->rbeg; - if (s->qbeg + s->len != l_query) { // right extension + if (s->qbeg + s->len != l_query) { // right extension,如果种子右边没有到read的终点,那就是右边还有碱基需要扩展 int qle, tle, qe, re, gtle, gscore, sc0 = a->score; - qe = s->qbeg + s->len; + qe = s->qbeg + s->len; // 计算右端开始的位置 re = s->rbeg + s->len - rmax[0]; assert(re >= 0); - for (i = 0; i < MAX_BAND_TRY; ++i) { + for (i = 0; i < MAX_BAND_TRY; ++i) { // 如果第一次bsw计算的分数超过初始score而且出现在band边界位置,需要扩大band,继续找一次。大部分时候好像一次就够了 int prev = a->score; aw[1] = opt->w << i; if (bwa_verbose >= 4) { @@ -999,7 +1002,7 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac } else a->qe = l_query, a->re = s->rbeg + s->len; if (bwa_verbose >= 4) printf("*** Added alignment region: [%d,%d) <=> [%ld,%ld); score=%d; {left,right}_bandwidth={%d,%d}\n", a->qb, a->qe, (long)a->rb, (long)a->re, a->score, aw[0], aw[1]); - // compute seedcov + // compute seedcov,计算覆盖范围,当seed完全包含在align里边时,有效 for (i = 0, a->seedcov = 0; i < c->n; ++i) { const mem_seed_t *t = &c->seeds[i]; if (t->qbeg >= a->qb && t->qbeg + t->len <= a->qe && t->rbeg >= a->rb && t->rbeg + t->len <= a->re) // seed fully contained @@ -1413,7 +1416,7 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex* chnp->n = mem_chain_flt(opt, chnp->n, chnp->a); PROF_END(tprof[T_FLT_CHAIN][tid], flt_chain); PROF_START(flt_chained_seeds); - mem_flt_chained_seeds(opt, bns, pac, l_seq, (uint8_t*)seq, chnp->n, chnp->a); + mem_flt_chained_seeds(opt, bns, pac, l_seq, (uint8_t*)seq, chnp->n, chnp->a, tid); PROF_END(tprof[T_FLT_CHANNED_SEEDS][tid], flt_chained_seeds); if (bwa_verbose >= 4) mem_print_chain(bns, chnp); } @@ -1481,7 +1484,7 @@ void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex* chnp->n = mem_chain_flt(opt, chnp->n, chnp->a); PROF_END(tprof[T_FLT_CHAIN][tid], flt_chain); PROF_START(flt_chained_seeds); - mem_flt_chained_seeds(opt, bns, pac, l_seq, (uint8_t*)seq, chnp->n, chnp->a); + mem_flt_chained_seeds(opt, bns, pac, l_seq, (uint8_t*)seq, chnp->n, chnp->a, tid); PROF_END(tprof[T_FLT_CHANNED_SEEDS][tid], flt_chained_seeds); if (bwa_verbose >= 4) mem_print_chain(bns, chnp); diff --git a/bwamem_pair.c b/bwamem_pair.c index 81269cc..91c4cc2 100644 --- a/bwamem_pair.c +++ b/bwamem_pair.c @@ -127,7 +127,7 @@ void mem_pestat(const mem_opt_t* opt, int64_t l_pac, int n, uint64_v** isize_arr } } -int mem_matesw(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], const mem_alnreg_t *a, int l_ms, const uint8_t *ms, mem_alnreg_v *ma) +int mem_matesw(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], const mem_alnreg_t *a, int l_ms, const uint8_t *ms, mem_alnreg_v *ma, int tid) { extern int mem_sort_dedup_patch(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int n, mem_alnreg_t *a); int64_t l_pac = bns->l_pac; @@ -167,8 +167,11 @@ int mem_matesw(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co kswr_t aln; mem_alnreg_t b; int tmp, xtra = KSW_XSUBO | KSW_XSTART | (l_ms * opt->a < 250? KSW_XBYTE : 0) | (opt->min_seed_len * opt->a); - aln = ksw_align2(l_ms, seq, re - rb, ref, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, xtra, 0); - memset(&b, 0, sizeof(mem_alnreg_t)); + aln = ksw_align2(l_ms, seq, re - rb, ref, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, xtra, 0, tid); +#ifdef SHOW_DATA_PERF + tdat[TD_MATESW_CNT][tid] += 1; +#endif + memset(&b, 0, sizeof(mem_alnreg_t)); if (aln.score >= opt->min_seed_len && aln.qb >= 0) { // something goes wrong if aln.qb < 0 b.rid = a->rid; b.is_alt = a->is_alt; @@ -290,8 +293,9 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co if (a[i].a[j].score >= a[i].a[0].score - opt->pen_unpaired) kv_push(mem_alnreg_t, b[i], a[i].a[j]); for (i = 0; i < 2; ++i) - for (j = 0; j < b[i].n && j < opt->max_matesw; ++j) - n += mem_matesw(opt, bns, pac, pes, &b[i].a[j], s[!i].l_seq, (uint8_t*)s[!i].seq, &a[!i]); + for (j = 0; j < b[i].n && j < opt->max_matesw; ++j) { + n += mem_matesw(opt, bns, pac, pes, &b[i].a[j], s[!i].l_seq, (uint8_t*)s[!i].seq, &a[!i], tid); + } free(b[0].a); free(b[1].a); } PROF_END(tprof[T_SAM_MATESW][tid], matesw); @@ -352,15 +356,16 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co a[i].a[z[i]].secondary_all = -1; } } - if (!(opt->flag & MEM_F_ALL)) { + PROF_START(gen_alt); + if (!(opt->flag & MEM_F_ALL)) { for (i = 0; i < 2; ++i) XA[i] = mem_gen_alt(opt, bns, pac, &a[i], s[i].l_seq, s[i].seq); } else XA[0] = XA[1] = 0; - // write SAM - for (i = 0; i < 2; ++i) { - PROF_START(reg2aln); + PROF_END(tprof[T_SAM_GEN_ALT][tid], gen_alt); + // write SAM + PROF_START(reg2aln); + for (i = 0; i < 2; ++i) { h[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, &a[i].a[z[i]]); - PROF_END(tprof[T_SAM_REG2ALN][tid], reg2aln); h[i].mapq = q_se[i]; h[i].flag |= 0x40<= 3) fprintf(stderr, "[M::%s] read %d sequences (%ld bp)...\n", __func__, ret->n_seqs, (long)size); +#ifdef SHOW_DATA_PERF + gdat[GD_READ_CNT] += ret->n_seqs; +#endif return ret; } diff --git a/hyb_idx.h b/hyb_idx.h index c08d504..db6b187 100644 --- a/hyb_idx.h +++ b/hyb_idx.h @@ -66,8 +66,8 @@ typedef kvec_t(ReadSeq) ReadSeqArr; // seeding过程生成的SMEM typedef struct { - int first_len; - int seed_start; // 左闭右开区间 + int first_len; // 记录第一次匹配时的长度,应该是用来减少seeding-3的计算的 + int seed_start; // 左闭右开区间,种子开始位置,对应在序列上的位置 int seed_end; int read_start; // 左闭右开区间, 默认是[0, read_len), 如果有N, 则是[N_pos + 1, next_N_pos) int read_end; @@ -132,7 +132,7 @@ typedef kvec_t(Range) RangeArr; // functions // 加载hybrid index -HybridIndex* load_hybrid_idx(const char* prefix); +// HybridIndex* load_hybrid_idx(const char* prefix); // 创建正向反向互补bits void create_seq_fb_bits(uint8_t* bs, int len, uint8_t* forward_bits, uint8_t* re); diff --git a/hyb_utils.c b/hyb_utils.c index 825a329..11c3173 100644 --- a/hyb_utils.c +++ b/hyb_utils.c @@ -9,6 +9,7 @@ ///////////////////////////////////////////////////// // 使用hybrid-index的工具函数 +/* // 加载hybrid index HybridIndex* load_hybrid_idx(const char* prefix) { HybridIndex* hyb = NULL; @@ -66,6 +67,7 @@ HybridIndex* load_hybrid_idx(const char* prefix) { // fprintf(stderr, "文件大小为: %ld 字节, %.2f GB\n", st.st_size, (double)st.st_size / (1024 * 1024 * 1024)); return hyb; } +*/ // 创建正向反向互补bits void create_seq_fb_bits(uint8_t* bs, int len, uint8_t* fs, uint8_t* rs) { diff --git a/ksw.c b/ksw.c index 1e584e9..66ab8ca 100644 --- a/ksw.c +++ b/ksw.c @@ -39,13 +39,15 @@ # include "malloc_wrap.h" #endif -#ifdef __GNUC__ -#define LIKELY(x) __builtin_expect((x),1) -#define UNLIKELY(x) __builtin_expect((x),0) -#else -#define LIKELY(x) (x) -#define UNLIKELY(x) (x) -#endif +#include "profiling.h" + +// #ifdef __GNUC__ +// #define LIKELY(x) __builtin_expect((x),1) +// #define UNLIKELY(x) __builtin_expect((x),0) +// #else +// #define LIKELY(x) (x) +// #define UNLIKELY(x) (x) +// #endif const kswr_t g_defr = { 0, -1, -1, -1, -1, -1, -1 }; @@ -88,7 +90,7 @@ kswq_t *ksw_qinit(int size, int qlen, const uint8_t *query, int m, const int8_t if (mat[a] > (int8_t)q->mdiff) q->mdiff = mat[a]; } q->max = q->mdiff; - q->shift = 256 - q->shift; // NB: q->shift is uint8_t + q->shift = 256 - q->shift; // NB: q->shift is uint8_t,应该是为了避免负数的罚分 q->mdiff += q->shift; // this is the difference between the min and max scores // An example: p=8, qlen=19, slen=3 and segmentation: // {{0,3,6,9,12,15,18,-1},{1,4,7,10,13,16,-1,-1},{2,5,8,11,14,17,-1,-1}} @@ -212,7 +214,11 @@ kswr_t ksw_u8(kswq_t *q, int tlen, const uint8_t *target, int _o_del, int _e_del end_loop16: //int k;for (k=0;k<16;++k)printf("%d ", ((uint8_t*)&max)[k]);printf("\n"); __max_16(imax, max); // imax is the maximum number in max - if (imax >= minsc) { // write the b array; this condition adds branching unfornately + //if (imax >= 4) { + // fprintf(stderr, "max row: %d, tlen: %d\n", i, tlen); + // break; + //} + if (imax >= minsc) { // write the b array; this condition adds branching unfornately if (n_b == 0 || (int32_t)b[n_b-1] + 1 != i) { // then append if (n_b == m_b) { m_b = m_b? m_b<<1 : 8; @@ -376,9 +382,9 @@ static inline void revseq(int l, uint8_t *s) t = s[i], s[i] = s[l - 1 - i], s[l - 1 - i] = t; } -kswr_t ksw_align2(int qlen, uint8_t *query, int tlen, uint8_t *target, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int xtra, kswq_t **qry) -{ - int size; +kswr_t ksw_align2(int qlen, uint8_t* query, int tlen, uint8_t* target, int m, const int8_t* mat, int o_del, int e_del, int o_ins, int e_ins, int xtra, + kswq_t** qry, int tid) { + int size; kswq_t *q; kswr_t r, rr; kswr_t (*func)(kswq_t*, int, const uint8_t*, int, int, int, int, int); @@ -387,13 +393,23 @@ kswr_t ksw_align2(int qlen, uint8_t *query, int tlen, uint8_t *target, int m, co if (qry && *qry == 0) *qry = q; func = q->size == 2? ksw_i16 : ksw_u8; size = q->size; - r = func(q, tlen, target, o_del, e_del, o_ins, e_ins, xtra); + PROF_START(msw_1); + r = func(q, tlen, target, o_del, e_del, o_ins, e_ins, xtra); + PROF_END(tprof[T_MSW_1][tid], msw_1); +#ifdef SHOW_DATA_PERF + tdat[TD_ALIGN_1_CNT][tid] += 1; +#endif if (qry == 0) free(q); if ((xtra&KSW_XSTART) == 0 || ((xtra&KSW_XSUBO) && r.score < (xtra&0xffff))) return r; revseq(r.qe + 1, query); revseq(r.te + 1, target); // +1 because qe/te points to the exact end, not the position after the end q = ksw_qinit(size, r.qe + 1, query, m, mat); - rr = func(q, tlen, target, o_del, e_del, o_ins, e_ins, KSW_XSTOP | r.score); - revseq(r.qe + 1, query); revseq(r.te + 1, target); + PROF_START(msw_2); + rr = func(q, tlen, target, o_del, e_del, o_ins, e_ins, KSW_XSTOP | r.score); + PROF_END(tprof[T_MSW_2][tid], msw_2); +#ifdef SHOW_DATA_PERF + tdat[TD_ALIGN_2_CNT][tid] += 1; +#endif + revseq(r.qe + 1, query); revseq(r.te + 1, target); free(q); if (r.score == rr.score) r.tb = r.te - rr.te, r.qb = r.qe - rr.qe; @@ -402,7 +418,7 @@ kswr_t ksw_align2(int qlen, uint8_t *query, int tlen, uint8_t *target, int m, co kswr_t ksw_align(int qlen, uint8_t *query, int tlen, uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int xtra, kswq_t **qry) { - return ksw_align2(qlen, query, tlen, target, m, mat, gapo, gape, gapo, gape, xtra, qry); + return ksw_align2(qlen, query, tlen, target, m, mat, gapo, gape, gapo, gape, xtra, qry, 0); } /******************** diff --git a/ksw.h b/ksw.h index f5626c5..6bec7be 100644 --- a/ksw.h +++ b/ksw.h @@ -63,9 +63,10 @@ extern "C" { * query profile will be deallocated in ksw_align(). */ kswr_t ksw_align(int qlen, uint8_t *query, int tlen, uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int xtra, kswq_t **qry); - kswr_t ksw_align2(int qlen, uint8_t *query, int tlen, uint8_t *target, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int xtra, kswq_t **qry); + kswr_t ksw_align2(int qlen, uint8_t* query, int tlen, uint8_t* target, int m, const int8_t* mat, int o_del, int e_del, int o_ins, int e_ins, + int xtra, kswq_t** qry, int tid); - /** + /** * Banded global alignment * * @param qlen query length diff --git a/profiling.c b/profiling.c index 9352582..fd23eed 100644 --- a/profiling.c +++ b/profiling.c @@ -144,7 +144,11 @@ int display_stats(int nthreads) // for gen-sam FORMAT_PERF_OUT("gen-sam", gprof[G_GEN_SAM] * 1.0 / proc_freq, 0); FORMAT_PERF_OUT_3("sam_mate_sw", tprof[T_SAM_MATESW], 1); + FORMAT_PERF_OUT_3("mate_sw_1", tprof[T_MSW_1], 2); + FORMAT_PERF_OUT_3("mate_sw_2", tprof[T_MSW_2], 2); FORMAT_PERF_OUT_3("sam_reg2aln", tprof[T_SAM_REG2ALN], 1); + FORMAT_PERF_OUT_3("sam_gen_alt", tprof[T_SAM_GEN_ALT], 1); + FORMAT_PERF_OUT_3("sam_aln2sam", tprof[T_SAM_ALN2SAM], 1); #if 0 @@ -237,8 +241,17 @@ int display_stats(int nthreads) #endif +#endif + +#ifdef SHOW_DATA_PERF + fprintf(stderr, "\n"); + fprintf(stderr, "average seed cnt: %0.2lf\n", get_sum(tdat[TD_SEED_CNT], nthreads) * 1.0 / gdat[GD_READ_CNT]); + fprintf(stderr, "average matesw cnt: %0.2lf\n", get_sum(tdat[TD_MATESW_CNT], nthreads) * 1.0 / gdat[GD_READ_CNT]); + fprintf(stderr, "align 1 cnt: %ld\n", get_sum(tdat[TD_ALIGN_1_CNT], nthreads)); + fprintf(stderr, "align 2 cnt: %ld\n", get_sum(tdat[TD_ALIGN_2_CNT], nthreads)); + #endif fprintf(stderr, "\n"); return 0; -} +} \ No newline at end of file diff --git a/profiling.h b/profiling.h index 46c8af8..71b28f7 100644 --- a/profiling.h +++ b/profiling.h @@ -55,6 +55,15 @@ enum { TD_SEED_1_3, TD_SEED_1_4, TD_SEED_1_5, + TD_SEED_CNT, + TD_MATESW_CNT, + TD_ALIGN_1_CNT, + TD_ALIGN_2_CNT, +}; + +// gdat +enum { + GD_READ_CNT = 0, }; // GLOBAL @@ -91,7 +100,6 @@ enum { T_GEN_SAM, T_MEM_REG2ALN, - T_CHAIN_ALL, T_ALN_ALL, T_INS_SIZE, @@ -103,6 +111,8 @@ enum { T_KSW_ALIGN2, T_KSW_REVERSE, T_SAM_REG2ALN, + T_SAM_ALN2SAM, + T_SAM_GEN_ALT, T_KSW_LOOP, T_SEED_LEN, T_SEED_1_ALL, @@ -132,7 +142,9 @@ enum { T_SEED_3_3, T_SEED_3_3_0, T_SEED_3_3_1, - T_SEED_3_3_2 + T_SEED_3_3_2, + T_MSW_1, + T_MSW_2, }; int display_stats(int);