diff --git a/.vscode/launch.json b/.vscode/launch.json index 0a30413..496eac7 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -18,11 +18,10 @@ "-R", "'@RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa'", "~/data1/fmt_ref/human_g1k_v37_decoy.fasta", - "~/data/fastq/dataset/na12878_wes_144/1w_1.fq", - "~/data/fastq/dataset/na12878_wes_144/1w_2.fq", + "~/data/dataset/real/D3/n1.fq", + "~/data/dataset/real/D3/n2.fq", "-o", "/dev/null", - "-Z" ], "cwd": "${workspaceFolder}", // 当前工作路径:当前文件所在的工作空间 }, diff --git a/.vscode/settings.json b/.vscode/settings.json index d9cb6da..923a1b1 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -42,6 +42,8 @@ "ertindex.h": "c", "ertseeding.h": "c", "algorithm": "c", - "filesystem": "c" + "filesystem": "c", + "chrono": "c", + "queue": "c" } } \ No newline at end of file diff --git a/Makefile b/Makefile index 9506ced..f5af91e 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ CC= gcc -CFLAGS= -g -Wall -Wno-unused-function -mavx2 #-O2 +CFLAGS= -g -Wall -Wno-unused-function -mavx2 -O2 WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS SHOW_PERF= -DSHOW_PERF diff --git a/bwa.h b/bwa.h index 583423b..8048268 100644 --- a/bwa.h +++ b/bwa.h @@ -95,7 +95,7 @@ extern "C" { uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM); uint32_t *bwa_gen_cigar2(const int8_t mat[25], int o_del, int e_del, int o_ins, int e_ins, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM); - int bwa_idx_build(const char *fa, const char *prefix, int algo_type, int block_size); + int bwa_idx_build(const char *fa, const char *prefix, int algo_type, int block_size, int num_thread); char *bwa_idx_infer_prefix(const char *hint); bwt_t *bwa_idx_load_bwt(const char *hint); diff --git a/bwamem.c b/bwamem.c index 1977fd4..b854042 100644 --- a/bwamem.c +++ b/bwamem.c @@ -684,6 +684,8 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac uint8_t *rseq = 0; uint64_t *srt; smem_aux_t *aux = (smem_aux_t*)buf; + kvec_t(int) qlenv; + kv_init(qlenv); if (c->n == 0) return; // get the max possible span @@ -763,6 +765,10 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac if (bwa_verbose >= 4) err_printf("** ---> Extending from seed(%d) [%ld;%ld,%ld] @ %s <---\n", k, (long)s->len, (long)s->qbeg, (long)s->rbeg, bns->anns[c->rid].name); if (s->qbeg) { // left extension + // s-qbeg是seed左侧需要扩展的长度 + //tdat[s->qbeg][tid]++; + kv_push(int, qlenv, s->qbeg); + fprintf(stderr, "%d,\n", s->qbeg); int qle, tle, gtle, gscore; tmp = s->rbeg - rmax[0]; #ifndef USE_AVX2 @@ -807,6 +813,9 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac int qle, tle, qe, re, gtle, gscore, sc0 = a->score; qe = s->qbeg + s->len; re = s->rbeg + s->len - rmax[0]; + kv_push(int, qlenv, l_query - qe); + fprintf(stderr, "%d,\n", l_query - qe); + // tdat[l_query - qe][tid]++; assert(re >= 0); for (i = 0; i < MAX_BAND_TRY; ++i) { int prev = a->score; @@ -848,6 +857,21 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac a->frac_rep = c->frac_rep; } + + tdat[qlenv.n][tid] ++; + // 计算标准差 + double avgl = 0, stdl = 0; + for (i=0; imin_seed_len, 0, &a->mem1, a->tmpv[0]); + //x = fmt_smem(fmt, len, seq, x, start_width, opt->min_seed_len, 0, &a->mem1, a->tmpv[0]); + x = fmt_smem_new(fmt, len, seq, x, start_width, opt->min_seed_len, 0, &a->mem1, a->tmpv[0]); for (i = 0; i < a->mem1.n; ++i) { bwtintv_t *p = &a->mem1.a[i]; int slen = (uint32_t)p->info - (p->info >> 32); // seed length max_seed_len = MAX(max_seed_len, slen); if (slen >= opt->min_seed_len) { kv_push(bwtintv_t, smem->mem, *p); + //fprintf(gf[0], "seed: %ld\t%d\t%ld\t%ld\n", p->info >> 32, (uint32_t)p->info, (-(p->info >> 32) + (uint32_t)p->info), p->x[2]); + //fprintf(stderr, "seed: %ld\t%d\t%ld\t%ld\n", p->info >> 32, (uint32_t)p->info, (-(p->info >> 32) + (uint32_t)p->info), p->x[2]); } } // break; // for test @@ -1322,30 +1350,40 @@ static void mem_collect_intv_batch(const mem_opt_t *opt, const FMTIndex *fmt, in if (start_flag) ++start_N_num; } } - PROF_END(tprof[T_SEED_1][tid], seed_1); + // && smem->mem.a[0].x[2] == 1 + if (max_seed_len == len - start_N_num && smem->mem.n > 0 && smem->mem.a[0].x[2] == 1) { + __sync_fetch_and_add(&gdat[1], 1); + } + PROF_END(tprof[T_SEED_1][tid], seed_1); //goto third_seed; #ifdef FILTER_FULL_MATCH if (max_seed_len == len - start_N_num) { goto collect_intv_end; } #endif - // 2. second pass: find MEMs inside a long SMEM - PROF_START(seed_2); + //fprintf(gf[0], "second %ld\n", ++debug_num); + // 2. second pass: find MEMs inside a long SMEM + // fprintf(stderr, "%ld\n", debug_num++); + PROF_START(seed_2); old_n = smem->mem.n; for (k = 0; k < old_n; ++k) { bwtintv_t *p = &smem->mem.a[k]; int start = p->info>>32, end = (int32_t)p->info; if (end - start < split_len || p->x[2] > opt->split_width) continue; - fmt_smem(fmt, len, seq, (start + end) >> 1, p->x[2] + 1, opt->min_seed_len, p, &a->mem1, a->tmpv[0]); - for (i = 0; i < a->mem1.n; ++i) { + //fmt_smem(fmt, len, seq, (start + end) >> 1, p->x[2] + 1, opt->min_seed_len, p, &a->mem1, a->tmpv[0]); + fmt_smem_new(fmt, len, seq, (start + end) >> 1, p->x[2] + 1, opt->min_seed_len, p, &a->mem1, a->tmpv[0]); +// fprintf(stderr, "%ld\n", debug_num++); + for (i = 0; i < a->mem1.n; ++i) { bwtintv_t *p = &a->mem1.a[i]; int slen = (uint32_t)p->info - (p->info >> 32); if (slen >= opt->min_seed_len) { kv_push(bwtintv_t, smem->mem, a->mem1.a[i]); - } + //fprintf(gf[0], "seed: %ld\t%d\t%ld\t%ld\n", p->info >> 32, (uint32_t)p->info, (-(p->info >> 32) + (uint32_t)p->info), p->x[2]); + } } } - PROF_END(tprof[T_SEED_2][tid], seed_2); + //fprintf(gf[0], "\n"); + PROF_END(tprof[T_SEED_2][tid], seed_2); //third_seed: // 3. third pass: LAST-like @@ -1358,7 +1396,8 @@ static void mem_collect_intv_batch(const mem_opt_t *opt, const FMTIndex *fmt, in x = fmt_seed_strategy1(fmt, len, seq, x, opt->min_seed_len, opt->max_mem_intv, &m); if (m.x[2] > 0) { kv_push(bwtintv_t, smem->mem, m); - } + //fprintf(stderr, "seed_3: %ld %d %ld %ld\n", m.info >> 32, (uint32_t)m.info, (-(m.info >> 32) + (uint32_t)m.info), m.x[2]); + } } else ++x; } } @@ -1369,6 +1408,11 @@ collect_intv_end: #endif // sort ks_introsort(mem_intv, smem->mem.n, smem->mem.a); +// for (i = 0; i < smem->mem.n; ++i) { +// bwtintv_t *p = &smem->mem.a[i]; +// fprintf(gf[0], "seed: %ld\t%d\t%ld\t%ld\n", p->info >> 32, (uint32_t)p->info, (-(p->info >> 32) + (uint32_t)p->info), p->x[2]); +// } +// fprintf(gf[0], "\n"); } void find_smem(const mem_opt_t *opt, const FMTIndex *fmt, int len, const uint8_t *seq, smem_aux_t *aux, smem_v *smemv, int tid) diff --git a/bwt.c b/bwt.c index 4b44bb1..4afa9a7 100644 --- a/bwt.c +++ b/bwt.c @@ -62,7 +62,8 @@ static inline bwtint_t bwt_invPsi(const bwt_t *bwt, bwtint_t k) // compute inver // 设置某一行的排序值,sa的索引有效值从1开始,(0设置为-1, 小端模式) void inline bwt_set_sa(uint8_t *sa_arr, bwtint_t k, bwtint_t val) { - const bwtint_t block_idx = (k >> 3) * 33; // 8个数为一组,共享33个字节 + //const bwtint_t block_idx = (k >> 3) * 33; + const bwtint_t block_idx = ((k >> 3) << 5) + (k>>3); // 8个数为一组,共享33个字节 const int val_idx_in_block = k & 7; const bwtint_t start_byte_idx = block_idx + (val_idx_in_block << 2); bwtint_t *sa_addr = (bwtint_t *)(sa_arr + start_byte_idx); @@ -73,13 +74,20 @@ void inline bwt_set_sa(uint8_t *sa_arr, bwtint_t k, bwtint_t val) // 获取某一行的排序值(小端模式) bwtint_t bwt_get_sa(uint8_t *sa_arr, bwtint_t k) { - const bwtint_t block_idx = (k >> 3) * 33; // 8个数为一组,共享33个字节 - const int val_idx_in_block = k & 7; + // const bwtint_t block_idx = (k >> 3) * 33; + const bwtint_t block_idx = ((k >> 3) << 5) + (k >> 3); // 8个数为一组,共享33个字节 + const int val_idx_in_block = k & 7; const bwtint_t start_byte_idx = block_idx + (val_idx_in_block << 2); bwtint_t val = *(bwtint_t *)(sa_arr + start_byte_idx); val = (val >> val_idx_in_block) & 8589934591; return val; } +bwtint_t bwt_get_sa_2(uint8_t *sa_arr, bwtint_t k) { + const bwtint_t start_byte = ((k << 5) + k) >> 3; // 存储这个sa数据的起始字节 + bwtint_t val = *(bwtint_t *)(sa_arr + start_byte); + val = (val >> (k & 7)) & 8589934591; + return val; +} // 获取kmer的fmt匹配信息 inline void kmer_getval_at(uint8_t *mem_addr, bwtintv_t *ok, int pos) @@ -147,7 +155,7 @@ inline void bwt_kmer_get(const KmerHash *kmer_hash, bwtintv_t *ok, uint32_t qbit } // bwt->bwt and bwt->occ must be precalculated -void bwt_cal_byte_sa(bwt_t *bwt, int intv) +void bwt_cal_byte_sa(bwt_t *bwt, int intv, int num_thread) { bwtint_t isa, sa, i, block_size; // S(isa) = sa isa是后缀数组的索引,sa表示位置 double tmp_time, elapsed_time; @@ -184,7 +192,7 @@ void bwt_cal_byte_sa(bwt_t *bwt, int intv) } if (isa % intv == 0) bwt_set_sa(bwt->byte_sa, isa / intv, sa); // bwt_set_sa(bwt->byte_sa, 0, (bwtint_t)-1); // 赋值成-1也没问题,set_sa那里已经修正了 - bwt_set_sa(bwt->byte_sa, 0, 8589934591); // before this line, bwt->sa[0] = bwt->seq_len + bwt_set_sa(bwt->byte_sa, 0, 8589934591); // 2的33次方-1 before this line, bwt->sa[0] = bwt->seq_len } // bwt->bwt and bwt->occ must be precalculated diff --git a/bwt.h b/bwt.h index 036a71a..4d2440f 100644 --- a/bwt.h +++ b/bwt.h @@ -103,7 +103,7 @@ typedef struct { typedef struct { size_t n, m; bwtintv_t *a; } bwtintv_v; -#define SA_BYTES(n_sa) ((((33 * (n_sa) + 7) / 8) & (~7L)) + 8) +#define SA_BYTES(n_sa) ((((33 * (n_sa) + 7) / 8) & (~7L)) + 8)/* 8字节对齐*/ /* For general OCC_INTERVAL, the following is correct: #define bwt_bwt(b, k) ((b)->bwt[(k)/OCC_INTERVAL * (OCC_INTERVAL/(sizeof(uint32_t)*8/2) + sizeof(bwtint_t)/4*4) + sizeof(bwtint_t)/4*4 + (k)%OCC_INTERVAL/16]) @@ -137,7 +137,7 @@ extern "C" { void bwt_bwtgen(const char *fn_pac, const char *fn_bwt); // from BWT-SW void bwt_bwtgen2(const char *fn_pac, const char *fn_bwt, int block_size); // from BWT-SW void bwt_cal_sa(bwt_t *bwt, int intv); - void bwt_cal_byte_sa(bwt_t *bwt, int intv); + void bwt_cal_byte_sa(bwt_t *bwt, int intv, int num_thread); void bwt_bwtupdate_core(bwt_t *bwt); diff --git a/bwtindex.c b/bwtindex.c index eadd6b8..265faea 100644 --- a/bwtindex.c +++ b/bwtindex.c @@ -261,10 +261,14 @@ int bwa_bwt2sa(int argc, char *argv[]) // the "bwt2sa" command int bwa_bwt2bytesa(int argc, char *argv[]) // the "bwt2bytesa" command { bwt_t *bwt; - int c, sa_intv = 32; + int c, sa_intv = 32, num_thread = 1; while ((c = getopt(argc, argv, "i:")) >= 0) { switch (c) { case 'i': sa_intv = atoi(optarg); break; + case 't': + num_thread = atoi(optarg); + if (num_thread < 1) num_thread = 1; + break; default: return 1; } } @@ -273,7 +277,7 @@ int bwa_bwt2bytesa(int argc, char *argv[]) // the "bwt2bytesa" command return 1; } bwt = bwt_restore_bwt(argv[optind]); - bwt_cal_byte_sa(bwt, sa_intv); + bwt_cal_byte_sa(bwt, sa_intv, num_thread); bwt_dump_byte_sa(argv[optind+1], bwt); bwt_destroy(bwt); return 0; @@ -389,12 +393,12 @@ int bwa_index(int argc, char *argv[]) // the "index" command strcpy(prefix, argv[optind]); if (is_64) strcat(prefix, ".64"); } - bwa_idx_build(argv[optind], prefix, algo_type, block_size); + bwa_idx_build(argv[optind], prefix, algo_type, block_size, num_threads); free(prefix); return 0; } -int bwa_idx_build(const char *fa, const char *prefix, int algo_type, int block_size) +int bwa_idx_build(const char *fa, const char *prefix, int algo_type, int block_size, int num_threads) { extern void bwa_pac_rev_core(const char *fn, const char *fn_rev); @@ -475,7 +479,7 @@ int bwa_idx_build(const char *fa, const char *prefix, int algo_type, int block_s if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); // create byte sa t = clock(); - bwt_cal_byte_sa(bwt, 4); + bwt_cal_byte_sa(bwt, 4, num_threads); strcpy(str, prefix); strcat(str, ".bytesa"); bwt_dump_byte_sa(str, bwt); if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); diff --git a/debug.c b/debug.c index 0bdb44a..9028e01 100644 --- a/debug.c +++ b/debug.c @@ -13,6 +13,8 @@ FILE *gf[4] = {0}, *gft[4] = {0}, *gfi[4] = {0}; +uint64_t debug_num = 0; + int open_qti_files() { char fn[1024] = {0}; diff --git a/debug.h b/debug.h index 9f285ca..8f0da35 100644 --- a/debug.h +++ b/debug.h @@ -6,6 +6,7 @@ Date : 2024/04/09 ***********************************************************************************************/ #include +#include ////////////////// for debug and test ////////////////////////// @@ -21,6 +22,8 @@ extern FILE *gf[4], *gfq[4], *gft[4], *gfi[4]; +extern uint64_t debug_num; + int open_qti_files(); int open_debug_files(); diff --git a/fastmap.c b/fastmap.c index eaa3c07..dc4ea02 100644 --- a/fastmap.c +++ b/fastmap.c @@ -344,6 +344,7 @@ static void update_a(mem_opt_t *opt, const mem_opt_t *opt0) int main_mem(int argc, char *argv[]) { + open_debug_files(); #ifdef DEBUG_FILE_OUTPUT open_debug_files(); // open_debug_files @@ -676,8 +677,9 @@ int main_mem(int argc, char *argv[]) #ifdef DEBUG_FILE_OUTPUT close_files(); #endif + close_files(); - return 0; + return 0; } int main_fastmap(int argc, char *argv[]) diff --git a/fmt_idx.c b/fmt_idx.c index 55efc8a..6dec171 100644 --- a/fmt_idx.c +++ b/fmt_idx.c @@ -1104,6 +1104,171 @@ fmt_smem_end: return ret; } +// 找smem(seed)(lm: long_smem) +int fmt_smem_new(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv, int min_seed_len, bwtintv_t *lm, + bwtintv_v *mem, bwtintv_v *tmpvec) { + if (x == 0 || q[x - 1] > 3) + return fmt_smem_forward(fmt, len, q, x, min_intv, min_seed_len, mem); + + // int flag = 0; + int i, ret = x+1, kmer_len; + bwtintv_t ik = {0}, ok1 = {0}, ok2 = {0}; + bwtintv_t mt = {0}; + uint32_t qbit = 0; + mem->n = 0; + if (q[x] > 3) + return x + 1; + + if (min_intv < 1) + min_intv = 1; // the interval size should be at least 1 + + +#define CHECK_INTV_UPDATE(iv, ov, end_pos) \ + if (ov.x[2] != iv.x[2]) { \ + if (ov.x[2] < min_intv) \ + break; \ + } \ + iv = ov; \ + iv.info = end_pos + + // 通过反复用forward和backward搜索来找到每个SMEM,直到不包含pivot(即x) + int start = x; + while (1) { + qbit = build_forward_kmer(&q[start], len - start, HASH_KMER_LEN, &kmer_len); + if (kmer_len < HASH_KMER_LEN) { + // 扩展到最远的位置 + if (start < x) + break; // 遇到左侧N了 + ret = start + kmer_len; + start = ret - HASH_KMER_LEN; + if (start < 0 || q[x-1] > 3) + break; + continue; + } + + // forward search + bwt_kmer_get(&fmt->kmer_hash, &ik, qbit, kmer_len - 1); // 初始碱基位置 + ik.info = start + kmer_len; + if (ik.x[2] < min_intv) { + if (start == x) + ret = ik.info; + if (start > 0) { + --start; + continue; + } else + break; + } + + for (i = (int)ik.info; i + 1 < len; i += 2) { + if (q[i] < 4 && q[i + 1] < 4) { + fmt_extend2(fmt, &ik, &ok1, &ok2, 0, 3 - q[i], 3 - q[i + 1]); + __builtin_prefetch(fmt_occ_intv(fmt, ok2.x[1] - 1), 0, 2); + __builtin_prefetch(fmt_occ_intv(fmt, ok2.x[1] - 1 + ok2.x[2]), 0, 2); + CHECK_INTV_UPDATE(ik, ok1, i + 1); + CHECK_INTV_UPDATE(ik, ok2, i + 2); +#if 0 + // 在这里进行判断是否只有一个候选了 + if (min_intv == 1 && ok2.x[2] == min_intv) { + direct_extend(fmt, len, q, start, i + 2, ok2.x[0], fmt_sa(fmt, ok2.x[0]), &mt, 0); + if (mt.rm[0].qe <= x) + goto smem_end; + kv_push(bwtintv_t, *mem, mt); // 这里可以判断一下是否大于min_seed_len + if (start == x) + ret = (uint32_t)mt.info; + if (mt.rm[0].qs == 0 || q[mt.rm[0].qs - 1] > 3) + goto smem_end; + i = mt.rm[0].qs - 1; + goto smem_next; + } +#endif + } else if (q[i] < 4) { // q[i+1] >= 4 + fmt_extend1(fmt, &ik, &ok1, 0, 3 - q[i]); + CHECK_INTV_UPDATE(ik, ok1, i + 1); + goto smem_backward_search; + } else { + goto smem_backward_search; + } + } + for (; i == len - 1; ++i) { // 扩展到了最后一个碱基 + if (q[i] < 4) { + fmt_extend1(fmt, &ik, &ok1, 0, 3 - q[i]); + CHECK_INTV_UPDATE(ik, ok1, i + 1); + } else { + goto smem_backward_search; + } + } + if (start == x) ret = ik.info; + if (ik.info <= x) // 不再包含pivot,退出 + goto smem_end; + + // backward search +smem_backward_search: + +#define CHECK_INTV_BREAK(ik, ok, spos) \ +if (ok.x[2] < min_intv) { \ + i = spos; \ + goto smem_next; \ +} \ +ok.info = ik.info; \ +ik = ok; + +#define CHECK_INTV_END(ik, ok) \ +if (ok.x[2] < min_intv) { \ + CHECK_ADD_MEM(i + 1, ik, mem); \ + goto smem_end; \ +} \ +ok.info = ik.info; \ +ik = ok; + + i = start - 1; + for (; i > 0; i -= 2) { + if (q[i] < 4 && q[i - 1] < 4) // 两个都可以扩展 + { + fmt_direct2_extend2(fmt, &ik, &ok1, &ok2, 1, q[i], q[i - 1]); + __builtin_prefetch(fmt_occ_intv(fmt, ok2.x[0] - 1), 0, 2); + __builtin_prefetch(fmt_occ_intv(fmt, ok2.x[0] - 1 + ok2.x[2]), 0, 2); + CHECK_INTV_BREAK(ik, ok1, i); + CHECK_INTV_BREAK(ik, ok2, i-1); + } else if (q[i] < 4) // 只能扩展一个 + { + fmt_direct_extend1(fmt, &ik, &ok1, 1, q[i]); + CHECK_INTV_END(ik, ok1); + --i; // 避免进行i-2 + break; + } else { // 不能扩展 + CHECK_ADD_MEM(i + 1, ik, mem); + goto smem_end; + } + } + for (; i == 0; --i) { // 扩展到了第一个碱基 + if (q[i] < 4) { + fmt_direct_extend1(fmt, &ik, &ok1, 1, q[i]); + CHECK_INTV_BREAK(ik, ok1, i); + } else { + CHECK_ADD_MEM(i + 1, ik, mem); + goto smem_end; + } + } + if (i == -1) { + CHECK_ADD_MEM(i + 1, ik, mem); + goto smem_end; + } + +smem_next: + ik.info |= (uint64_t)(i+1) << 32; + CHECK_ADD_MEM(i + 1, ik, mem); + if (start < 0 || q[start] > 3) { + break; + } + start = i; + } + +smem_end: + fmt_reverse_intvs(mem); // s.t. sorted by the start coordinate + // printf("%d\n", ret); + return ret; +} + int fmt_seed_strategy1(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_len, int max_intv, bwtintv_t *mem) { int i, kmer_len, first_extend_len; @@ -1310,4 +1475,4 @@ bwtint_t fmt_sa_offset(const FMTIndex *fmt, bwtint_t k) } sa = (sa << 48) | (k / fmt->sa_intv); return sa; -} \ No newline at end of file +} diff --git a/fmt_idx.h b/fmt_idx.h index 26b4c4d..12ef0ec 100644 --- a/fmt_idx.h +++ b/fmt_idx.h @@ -110,6 +110,7 @@ void kmer_getval_at(uint8_t *mem_addr, bwtintv_t *ok, int pos); void fmt_kmer_get(const FMTIndex *fmt, bwtintv_t *ok, uint32_t qbit, int pos); // 找smem(seed) int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv, int min_seed_len, bwtintv_t *lm, bwtintv_v *mem, bwtintv_v *tmpvec); +int fmt_smem_new(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv, int min_seed_len, bwtintv_t *lm, bwtintv_v *mem, bwtintv_v *tmpvec); int fmt_smem_2(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv, int min_seed_len, bwtintv_t *lm, bwtintv_v *mem, bwtintv_v *tmpvec, uint32_v qev); int fmt_seed_strategy1(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_len, int max_intv, bwtintv_t *mem); diff --git a/kstring.c b/kstring.c index 2871310..de06552 100644 --- a/kstring.c +++ b/kstring.c @@ -2,10 +2,6 @@ #include #include "kstring.h" -#ifdef USE_MALLOC_WRAPPERS -# include "malloc_wrap.h" -#endif - int ksprintf(kstring_t *s, const char *fmt, ...) { va_list ap; diff --git a/profiling.c b/profiling.c index f77a238..e8da646 100644 --- a/profiling.c +++ b/profiling.c @@ -23,10 +23,20 @@ tdat[0]: read nums tdat[1]: seed-1 full match nums */ int64_t tdat[LIM_THREAD_DATA_TYPE][LIM_THREAD] = {0}; +int64_t t_sd[LIM_THREAD_DATA_TYPE][LIM_THREAD] = {0}; int64_t gdat[LIM_GLOBAL_DATA_TYPE] = {0}; #endif +int64_t sum(int64_t *a, int len) { + int64_t res = 0; + int i = 0; + for (i=0; i seed.txt #-Z