diff --git a/Makefile b/Makefile index 27f3eef..ce158fc 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ CC= g++ NOWARN= -Wno-unused-result -Wno-unused-function CFLAGS= #-g -Wall $(NOWARN) #-O2 -CPPFLAGS= -g -Wall $(NOWARN) -O2 +CPPFLAGS= -g -Wall $(NOWARN) -O3 WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS SHOW_PERF= -DSHOW_PERF AR= ar diff --git a/fmt_index.cpp b/fmt_index.cpp index da4913e..4be4fe7 100644 --- a/fmt_index.cpp +++ b/fmt_index.cpp @@ -204,7 +204,7 @@ FMTIndex *create_fmt_from_bwt(bwt_t *bwt) { uint32_t pre_bwt_16_seq = 0; // 16个pre-bwt碱基串 uint32_t *bwt_addr = bwt_occ_intv(bwt, i) + 4; // 这里加4还是加8要看保存occ的是是uint32还是uint64,bwt字符串i对应的基准行,因为原始的bwt-cp(check point)包含由4个uint32_t(8个uint32_t)组成的occ信息 - int offset = (i % FMT_OCC_INTERVAL) / 16; // 每OCC_INTERVAL个碱基共享同一个基准地址,每16个碱基共用一个uint32整型,因此需要偏移量来获取当前碱基串的首地址 + int offset = (i % OCC_INTERVAL) / 16; // 每OCC_INTERVAL个碱基共享同一个基准地址,每16个碱基共用一个uint32整型,因此需要偏移量来获取当前碱基串的首地址 uint32_t bwt_16_seq = *(bwt_addr + offset); // 待处理的当前16个碱基串的首地址 for (j = 0; j < 16; ++j) // 对于bwt碱基串,一个一个碱基分别处理 { @@ -317,7 +317,7 @@ void fmt_e2_occ(const FMTIndex *fmt, bwtint_t k, int b1, int b2, bwtint_t cnt[4] cnt[3] = q[b2]; p += 20; // 该地址是bwt和pre_bwt字符串数据的首地址 end = p + ((k >> 3) - ((k & ~FMT_OCC_INTV_MASK) >> 3)); // this is the end point of the following loop - // p = end - (end - p) / 4; + //p = end - (end - p) / 4; // cout << "k - kbase: " << k - bwt_k_base_line << endl; for (x = 0; p < end; ++p) { @@ -460,6 +460,7 @@ int main_fmtidx(int argc, char **argv) //string fmt_idx = string(argv[1]) + ".fmt"; // string fmt_idx = string(argv[1]) + ".256.fmt"; + // string fmt_idx = string(argv[1]) + ".128.fmt"; string fmt_idx = string(argv[1]) + ".64.fmt"; // string fmt_idx = string(argv[1]) + ".32.fmt"; diff --git a/fmt_index.h b/fmt_index.h index b5fedab..490557f 100644 --- a/fmt_index.h +++ b/fmt_index.h @@ -5,6 +5,8 @@ #define FMT_OCC_INTERVAL (1LL << FMT_OCC_INTV_SHIFT) #define FMT_OCC_INTV_MASK (FMT_OCC_INTERVAL - 1) +#define FMT_MID_INTV 16 + // 获取碱基c(待查找序列的首个碱基)和对应的互补碱基对应的行,以及间隔 #define fmt_set_intv(fmt, c, ik) ((ik).x[0] = (fmt)->L2[(int)(c)] + 1, (ik).x[2] = (fmt)->L2[(int)(c) + 1] - (fmt)->L2[(int)(c)], (ik).x[1] = (fmt)->L2[3 - (c)] + 1, (ik).info = 0) // k行(bwt str行(不包含$))对应的check point occ数据起始地址(小于k且是OCC_INTERVAL的整数倍)