解决了创建fmt时候的bug,获取bwt str地址时候应该用bwt 的 occ-interval计算,而不是fmt的occ interval
This commit is contained in:
parent
fe34be5d3a
commit
a500d76eaa
2
Makefile
2
Makefile
|
|
@ -1,7 +1,7 @@
|
||||||
CC= g++
|
CC= g++
|
||||||
NOWARN= -Wno-unused-result -Wno-unused-function
|
NOWARN= -Wno-unused-result -Wno-unused-function
|
||||||
CFLAGS= #-g -Wall $(NOWARN) #-O2
|
CFLAGS= #-g -Wall $(NOWARN) #-O2
|
||||||
CPPFLAGS= -g -Wall $(NOWARN) -O2
|
CPPFLAGS= -g -Wall $(NOWARN) -O3
|
||||||
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
|
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
|
||||||
SHOW_PERF= -DSHOW_PERF
|
SHOW_PERF= -DSHOW_PERF
|
||||||
AR= ar
|
AR= ar
|
||||||
|
|
|
||||||
|
|
@ -204,7 +204,7 @@ FMTIndex *create_fmt_from_bwt(bwt_t *bwt)
|
||||||
{
|
{
|
||||||
uint32_t pre_bwt_16_seq = 0; // 16个pre-bwt碱基串
|
uint32_t pre_bwt_16_seq = 0; // 16个pre-bwt碱基串
|
||||||
uint32_t *bwt_addr = bwt_occ_intv(bwt, i) + 4; // 这里加4还是加8要看保存occ的是是uint32还是uint64,bwt字符串i对应的基准行,因为原始的bwt-cp(check point)包含由4个uint32_t(8个uint32_t)组成的occ信息
|
uint32_t *bwt_addr = bwt_occ_intv(bwt, i) + 4; // 这里加4还是加8要看保存occ的是是uint32还是uint64,bwt字符串i对应的基准行,因为原始的bwt-cp(check point)包含由4个uint32_t(8个uint32_t)组成的occ信息
|
||||||
int offset = (i % FMT_OCC_INTERVAL) / 16; // 每OCC_INTERVAL个碱基共享同一个基准地址,每16个碱基共用一个uint32整型,因此需要偏移量来获取当前碱基串的首地址
|
int offset = (i % OCC_INTERVAL) / 16; // 每OCC_INTERVAL个碱基共享同一个基准地址,每16个碱基共用一个uint32整型,因此需要偏移量来获取当前碱基串的首地址
|
||||||
uint32_t bwt_16_seq = *(bwt_addr + offset); // 待处理的当前16个碱基串的首地址
|
uint32_t bwt_16_seq = *(bwt_addr + offset); // 待处理的当前16个碱基串的首地址
|
||||||
for (j = 0; j < 16; ++j) // 对于bwt碱基串,一个一个碱基分别处理
|
for (j = 0; j < 16; ++j) // 对于bwt碱基串,一个一个碱基分别处理
|
||||||
{
|
{
|
||||||
|
|
@ -317,7 +317,7 @@ void fmt_e2_occ(const FMTIndex *fmt, bwtint_t k, int b1, int b2, bwtint_t cnt[4]
|
||||||
cnt[3] = q[b2];
|
cnt[3] = q[b2];
|
||||||
p += 20; // 该地址是bwt和pre_bwt字符串数据的首地址
|
p += 20; // 该地址是bwt和pre_bwt字符串数据的首地址
|
||||||
end = p + ((k >> 3) - ((k & ~FMT_OCC_INTV_MASK) >> 3)); // this is the end point of the following loop
|
end = p + ((k >> 3) - ((k & ~FMT_OCC_INTV_MASK) >> 3)); // this is the end point of the following loop
|
||||||
// p = end - (end - p) / 4;
|
//p = end - (end - p) / 4;
|
||||||
// cout << "k - kbase: " << k - bwt_k_base_line << endl;
|
// cout << "k - kbase: " << k - bwt_k_base_line << endl;
|
||||||
for (x = 0; p < end; ++p)
|
for (x = 0; p < end; ++p)
|
||||||
{
|
{
|
||||||
|
|
@ -460,6 +460,7 @@ int main_fmtidx(int argc, char **argv)
|
||||||
|
|
||||||
//string fmt_idx = string(argv[1]) + ".fmt";
|
//string fmt_idx = string(argv[1]) + ".fmt";
|
||||||
// string fmt_idx = string(argv[1]) + ".256.fmt";
|
// string fmt_idx = string(argv[1]) + ".256.fmt";
|
||||||
|
// string fmt_idx = string(argv[1]) + ".128.fmt";
|
||||||
string fmt_idx = string(argv[1]) + ".64.fmt";
|
string fmt_idx = string(argv[1]) + ".64.fmt";
|
||||||
// string fmt_idx = string(argv[1]) + ".32.fmt";
|
// string fmt_idx = string(argv[1]) + ".32.fmt";
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,8 @@
|
||||||
#define FMT_OCC_INTERVAL (1LL << FMT_OCC_INTV_SHIFT)
|
#define FMT_OCC_INTERVAL (1LL << FMT_OCC_INTV_SHIFT)
|
||||||
#define FMT_OCC_INTV_MASK (FMT_OCC_INTERVAL - 1)
|
#define FMT_OCC_INTV_MASK (FMT_OCC_INTERVAL - 1)
|
||||||
|
|
||||||
|
#define FMT_MID_INTV 16
|
||||||
|
|
||||||
// 获取碱基c(待查找序列的首个碱基)和对应的互补碱基对应的行,以及间隔
|
// 获取碱基c(待查找序列的首个碱基)和对应的互补碱基对应的行,以及间隔
|
||||||
#define fmt_set_intv(fmt, c, ik) ((ik).x[0] = (fmt)->L2[(int)(c)] + 1, (ik).x[2] = (fmt)->L2[(int)(c) + 1] - (fmt)->L2[(int)(c)], (ik).x[1] = (fmt)->L2[3 - (c)] + 1, (ik).info = 0)
|
#define fmt_set_intv(fmt, c, ik) ((ik).x[0] = (fmt)->L2[(int)(c)] + 1, (ik).x[2] = (fmt)->L2[(int)(c) + 1] - (fmt)->L2[(int)(c)], (ik).x[1] = (fmt)->L2[3 - (c)] + 1, (ik).info = 0)
|
||||||
// k行(bwt str行(不包含$))对应的check point occ数据起始地址(小于k且是OCC_INTERVAL的整数倍)
|
// k行(bwt str行(不包含$))对应的check point occ数据起始地址(小于k且是OCC_INTERVAL的整数倍)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue