解决了一些bug，目前看来fmt可以正常工作

2024-01-27 00:42:47 +08:00 · 2024-01-27 00:42:47 +08:00 · c69cb901bb
parent 8e1576a2cd
commit c69cb901bb
7 changed files with 453 additions and 276 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,6 +1,7 @@
 # specific for bwa_perf
 *.txt
 fmtidx
 *.fmt
 # ---> C
 # Prerequisites
--- a/bwt.cpp
+++ b/bwt.cpp
@ -10,6 +10,17 @@
 using namespace std;
 void bwt_dump_bwt(const char *fn, const bwt_t *bwt)
 {
    FILE *fp;
    fp = xopen(fn, "wb");
    err_fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp);
    err_fwrite(bwt->L2 + 1, sizeof(bwtint_t), 4, fp);
    err_fwrite(bwt->bwt, 4, bwt->bwt_size, fp);
    err_fflush(fp);
    err_fclose(fp);
 }
 // 计算一个字节构成的T,G,C,A序列，对应的每个碱基的个数(按T,G,C,A顺序存储在32位整数中，每个占8位)
 void bwt_gen_cnt_table(bwt_t *bwt)
 {
@ -23,8 +34,8 @@ void bwt_gen_cnt_table(bwt_t *bwt)
    }
 }
-// 解析两bit的bwt碱基序列
+// 解析两bit的bwt碱基序列，这个只有bwt str，可以包含也可不包含occ check point
-bwt_t *restore_bwt_str(const char *fn)
+bwt_t *restore_bwt(const char *fn)
 {
    bwt_t *bwt;
    bwt = (bwt_t *)calloc(1, sizeof(bwt_t));
@ -202,6 +213,8 @@ void bwt_extend(const bwt_t *bwt, const bwtintv_t *ik, bwtintv_t ok[4], int is_b
    bwtint_t tk[4], tl[4];
    int i;
    bwt_2occ4(bwt, ik->x[!is_back] - 1, ik->x[!is_back] - 1 + ik->x[2], tk, tl); // tk表示在k行之前所有各个碱基累积出现次数，tl表示在l行之前的累积
    // cout << "bwt-1-d: " << ik->x[!is_back] - 1 << '\t' << tk[0] << '\t' << tk[1] << '\t' << tk[2] << '\t' << tk[3] << endl;
    // cout << "bwt-1-d: " << ik->x[!is_back] - 1 + ik->x[2] << '\t' << tl[0] << '\t' << tl[1] << '\t' << tl[2] << '\t' << tl[3] << endl;
    // 这里是反向扩展
    for (i = 0; i != 4; ++i)
    {
--- a/bwt.h
+++ b/bwt.h
@ -20,7 +20,7 @@ using std::string;
 // 从构建完成的bwt（包含occ check point）获取k行（不含$，这里的k不输出bwt mtx的行，是bwt字符串的行）的碱基
 #define bwt_B0(b, k) (bwt_bwt(b, k) >> ((~(k) & 0xf) << 1) & 3)
-// 获取碱基c（待查找序列的首）和对应的互补碱基对应的行，以及间隔
+// 获取碱基c（待查找序列的首个碱基）和对应的互补碱基对应的行，以及间隔
 #define bwt_set_intv(bwt, c, ik) ((ik).x[0] = (bwt)->L2[(int)(c)] + 1, (ik).x[2] = (bwt)->L2[(int)(c) + 1] - (bwt)->L2[(int)(c)], (ik).x[1] = (bwt)->L2[3 - (c)] + 1, (ik).info = 0)
 // The following two lines are ONLY correct when OCC_INTERVAL==0x80
@ -56,7 +56,7 @@ bwtint_t bwt_occ(const bwt_t *bwt, bwtint_t k, uint8_t c);
 // 统计k行（bwt mtx行，包含k行本身）之前4种碱基累积数量，这里的k是bwt矩阵里的行，比bwt字符串多1
 void bwt_occ4(const bwt_t *bwt, bwtint_t k, bwtint_t cnt[4]);
 // 解析两bit的bwt碱基序列
-bwt_t *restore_bwt_str(const char *fn);
+bwt_t *restore_bwt(const char *fn);
 // 根据原始的字符串bwt创建interval-bwt
 void create_interval_occ_bwt(bwt_t *bwt);
--- a/fmt_index.cpp
+++ b/fmt_index.cpp
@ -44,35 +44,44 @@ void print_base_uint32(uint32_t p)
 	}
 }
 // 随机生成长度为len的序列
 string generate_rand_seq(int len)
 {
 	string seq(len, 'A');
 	for (int i = 0; i < len; ++i)
 	{
 		seq[i] = BASE[rand() % 4];
 	}
 	return seq;
 }
 // 创建bwt矩阵
 void create_bwt_mtx(string &seq)
 {
-	cout << "seq size: " << seq.size() + 1 << endl;
+	bwtint_t seq_len = seq.size() + 1;
-	string sarr[seq.size() + 1];
+	string sarr[seq_len];
 	sarr[0] = seq + '$';
-	for (int i = 1; i < sarr[0].size(); ++i)
+	for (int i = 1; i < seq_len; ++i)
 	{
 		sarr[i] = sarr[0].substr(i) + sarr[0].substr(0, i);
 	}
-	std::sort(sarr, sarr + seq.size() + 1);
+	std::sort(sarr, sarr + seq_len);
 	// bwt matrix
 	for (int i = 0; i < sarr[0].size(); ++i)
 	{
 		// cout << i << ' ' << sarr[i] << endl;
 		cout << sarr[i] << endl;
 	}
 	// print bwt matrix
 	// for (int i = 0; i < seq_len; ++i)
 	//{
 	//	// cout << i << ' ' << sarr[i] << endl;
 	//	cout << sarr[i] << endl;
 	//}
 	// cout << "bwt string" << endl;
-	// for (int i = 0; i < sarr[0].size(); ++i)
+	// for (int i = 0; i < seq_len; ++i)
 	// {
 	// 	cout << sarr[i].back();
 	// }
 	// cout << endl;
 	// cout << "pre bwt string" << endl;
-	// for (int i = 0; i < sarr[0].size(); ++i)
+	// for (int i = 0; i < seq_len; ++i)
 	// {
-	// 	cout << sarr[i][sarr[0].size() - 2];
+	// 	cout << sarr[i][seq_len - 2];
 	// }
 	// cout << endl;
 }
@ -98,7 +107,43 @@ void fmt_gen_cnt_table(FMTIndex *fmt)
 	}
 }
 void dump_fmt(const char *fn, const FMTIndex *fmt)
 {
 	FILE *fp;
 	fp = xopen(fn, "wb");
 	err_fwrite(&fmt->primary, sizeof(bwtint_t), 1, fp);
 	err_fwrite(&fmt->sec_primary, sizeof(bwtint_t), 1, fp);
 	err_fwrite(&fmt->sec_bcp, sizeof(uint8_t), 1, fp);
 	err_fwrite(&fmt->first_base, sizeof(uint8_t), 1, fp);
 	err_fwrite(&fmt->last_base, sizeof(uint8_t), 1, fp);
 	err_fwrite(fmt->L2 + 1, sizeof(bwtint_t), 4, fp);
 	err_fwrite(fmt->bwt, 4, fmt->bwt_size, fp);
 	err_fflush(fp);
 	err_fclose(fp);
 }
 FMTIndex *restore_fmt(const char *fn)
 {
 	FMTIndex *fmt;
 	fmt = (FMTIndex *)calloc(1, sizeof(FMTIndex));
 	FILE *fp = fopen(fn, "rb");
 	fseek(fp, 0, SEEK_END);
 	fmt->bwt_size = (ftell(fp) - sizeof(bwtint_t) * 6 - 3) >> 2; // 以32位word为单位计算的size
 	fmt->bwt = (uint32_t *)calloc(fmt->bwt_size, 4);
 	fseek(fp, 0, SEEK_SET);
 	fread(&fmt->primary, sizeof(bwtint_t), 1, fp);
 	fread(&fmt->sec_primary, sizeof(bwtint_t), 1, fp);
 	fread(&fmt->sec_bcp, sizeof(uint8_t), 1, fp);
 	fread(&fmt->first_base, sizeof(uint8_t), 1, fp);
 	fread(&fmt->last_base, sizeof(uint8_t), 1, fp);
 	fread(fmt->L2 + 1, sizeof(bwtint_t), 4, fp);
 	fread_fix(fp, fmt->bwt_size << 2, fmt->bwt);
 	fmt->seq_len = fmt->L2[4];
 	fclose(fp);
 	fmt_gen_cnt_table(fmt); // 字节所能表示的各种碱基组合中，各个碱基的累积数量
 	return fmt;
 }
 // 根据interval-bwt创建fmt-index
 FMTIndex *create_fmt_from_bwt(bwt_t *bwt)
@ -108,13 +153,13 @@ FMTIndex *create_fmt_from_bwt(bwt_t *bwt)
 	fmt_gen_cnt_table(fmt);
 	bwtint_t i, j, k, m, n, n_occ, cnt[4], cnt2[4];
-	uint32_t c[4], c2[16] /*保存AA..TT*/;
+	uint32_t c[4], c2[16]; /*c用来保存原来的bwt碱基串的累积值，c2用来保存pre-bwt和bwt碱基对的累计值，如AA..TT*/
-	uint32_t *buf;
+	uint32_t *buf; /* 计算之后变成fmt结构中bwt部分 */
-	fmt->seq_len = bwt->seq_len;
+	fmt->seq_len = bwt->seq_len; // bwt碱基序列的长度，不包含$字符，也就是该长度比bwt matrix长度少1
 	for (i = 0; i < 5; ++i)
-		fmt->L2[i] = bwt->L2[i];
+		fmt->L2[i] = bwt->L2[i]; // 每个碱基的总累积值
-	fmt->primary = bwt->primary;
+	fmt->primary = bwt->primary; // $在末尾的行，在bwt matrix行中的排序位置
 	n_occ = (bwt->seq_len + OCC_INTERVAL - 1) / OCC_INTERVAL + 1; // check point 个数
 	fmt->bwt_size = (fmt->seq_len * 2  + 15) >> 4; // 要保存最后两列碱基
@ -129,146 +174,95 @@ FMTIndex *create_fmt_from_bwt(bwt_t *bwt)
 		bwt_occ4(bwt, before_first_line, cnt);
 		for (j = i * 4, k = 0; k < 4; ++j, ++k)
 			c2[j] = cnt[k];
 		// cout << "start: " << BASE[i] << " line: " << before_first_line << " occ: " << cnt[0] << '\t' << cnt[1] << '\t' << cnt[2] << '\t' << cnt[3] << endl;
 	}
 	// cout << "c2: ";
 	// for (m = 0; m < 16; ++m)
 	// 	cout << c2[m] << ' ';
 	// cout << endl;
 	// k表示buf存储的偏移量
 	for (i = k = 0; i < bwt->seq_len; ++i)
 	{
 		// 记录occ
 		if (i % OCC_INTERVAL == 0)
 		{
-			memcpy(buf + k, c, sizeof(uint32_t) * 4); // 保存occ
+			memcpy(buf + k, c, sizeof(uint32_t) * 4); // bwt str中各个碱基的occ
 			k += 4;
-			memcpy(buf + k, c2, sizeof(uint32_t) * 16); // 二次计算的occ
+			memcpy(buf + k, c2, sizeof(uint32_t) * 16); // pre-bwt:bwt碱基对的occ
 			k += 16;
 		}
-		// 每个32位整数保存8个倒数第二列碱基和8个倒数第一列(bwt)碱基
+		// 每个32位整数保存8个倒数第二列碱基（pre-bwt）和8个倒数第一列(bwt)碱基
 		if (i % 16 == 0) // 每个32位整数可以包含16个碱基，每次需要处理16个碱基，也就是间隔最小可以设置为16
 		{
-			uint32_t bwt_16_seq = bwt->bwt[i / 16];
+			uint32_t pre_bwt_16_seq = 0; // 16个pre-bwt碱基串
-			uint32_t pre_bwt_16_seq = 0;
+			uint32_t *bwt_addr = bwt_occ_intv(bwt, i) + 8; // bwt字符串i对应的基准行，因为原始的bwt-cp（check point）包含由4个uint64_t(8个uint32_t)组成的occ信息
-			uint32_t *bwt_addr = bwt_occ_intv(bwt, i) + 8; // bwt字符串i对应的基准行
+			int offset = (i % OCC_INTERVAL) / 16;		   // 每OCC_INTERVAL个碱基共享同一个基准地址，每16个碱基共用一个uint32整型，因此需要偏移量来获取当前碱基串的首地址
-			int offset = (i % OCC_INTERVAL) / 16;
+			uint32_t bwt_16_seq = *(bwt_addr + offset);    // 待处理的当前16个碱基串的首地址
-			bwt_16_seq = *(bwt_addr + offset);
+			for (j = 0; j < 16; ++j) // 对于bwt碱基串，一个一个碱基分别处理
 			for (j = 0; j < 16; ++j)
 			{
-				bwtint_t cur_line = i + j;
+				bwtint_t cur_str_line = i + j;  // 当前碱基在bwt str中的行排序
-				if (cur_line < bwt->seq_len) // 因为bwt序列里除去了$符号，所以bwt序列个数比原版bwt少1
+				if (cur_str_line < bwt->seq_len) // 当前碱基行不应超出bwt str总碱基长度（bwt str长度比bwt matrix长度少1，因为bwt str不包含$）
 				{
-					uint8_t bwt_base = bwt_B0(bwt, cur_line); // 对应行的bwt的碱基
+					uint8_t bwt_base = bwt_B0(bwt, cur_str_line); // 对应行的bwt的碱基
-					// 先求出该碱基对应在第一列的行
+					// 先求出该碱基对应在第一列的行（对应的bwt matrix行）
-					if (cur_line >= bwt->primary) // 因为bwt序列里除去了$符号，所以，超过$所在行之后，对应的seq位置应该加一，才是真正对应的行
+					bwtint_t cur_mtx_line = cur_str_line;
-						cur_line += 1;
+					if (cur_str_line >= bwt->primary) // 因为bwt序列里除去了$符号，所以，超过$所在行之后，对应的seq位置应该加一，才是真正对应bwt matrix的行
-					bwtint_t origin_base_line = bwt->L2[bwt_base] + 1 + bwt_occ(bwt, cur_line - 1, bwt_base); // bwt矩阵行
+						cur_mtx_line += 1;
-					bwtint_t base_line = origin_base_line;
+					bwt_occ4(bwt, cur_mtx_line, cnt);	  // 获取原来bwt-checkpoint中的occ值	  
-					if (base_line >= bwt->primary) // base_line表示在bwt字符中的位置，所以超出$为最尾所在行之后，要减掉1
+					for (m=0; m<4; ++m)
-						base_line -= 1;			   // bwt碱基序列行（不包含$）
+						c[m] = (uint32_t)cnt[m]; // 碱基m在cur_bwt_mtx_line(包含)之前的累积值，直接拷贝原bwt中的occ即可
-					uint32_t pre_bwt_base = bwt_B0(bwt, base_line); // bwt列碱基对应的前一个碱基
+
-					if (origin_base_line == bwt->primary)
+					cnt[bwt_base] -= 1; // 得到cur_bwt_mtx_line(不包含)之前的累积量，即bwt_occ4(bwt, cur_bwt_mtx_line-1, cnt)
 					bwtint_t bwt_base_mtx_line = bwt->L2[bwt_base] + 1 + cnt[bwt_base]; // bwt_base对应的bwt matrix行（LF变换）
 					bwt_occ4(bwt, bwt_base_mtx_line, cnt2); // 计算bwt_base_mtx_line之前的occ
 					for (n = 0; n < 4; ++n)
 					{
 						int c2_idx = bwt_base << 2 | n; // bwt base放在前边
 						c2[c2_idx] = (uint32_t)cnt2[n]; // pre-bwt:bwt 碱基对的累计值
 					}
 					bwtint_t bwt_base_str_line = bwt_base_mtx_line; // bwt-str中对应的行排序
 					if (bwt_base_str_line >= bwt->primary)			// base_line表示在bwt str中的位置，所以超出$为最尾所在行之后，要减掉1
 						bwt_base_str_line -= 1;						// bwt碱基序列行（不包含$）
 					uint32_t pre_bwt_base = bwt_B0(bwt, bwt_base_str_line); // bwt列碱基对应的前一个碱基pre-bwt
 					// 此时，bwt_base对应的bwt matrix首行，是$排在最尾的行，说明bwt_base就是序列的第一个碱基，
 					// 此时计算出来的pre_bwt_base就是primary前一行的bwt base，以此来代替$字符，在后续的计算过程中需要考虑
 					if (bwt_base_mtx_line == bwt->primary)
 					{
 						// 计算sec_bcp
 						fmt->sec_bcp = pre_bwt_base << 2 | bwt_base; // 因为把$当成A处理了
-						fmt->sec_primary = cur_line;
+						fmt->sec_primary = cur_mtx_line;		 // pre-bwt base为$的行排序（bwt-matrix行）
-						fmt->first_base = bwt_base;
+						fmt->first_base = bwt_base; // 原始序列第一个碱基
-						fmt->last_base = pre_bwt_base;
+						fmt->last_base = pre_bwt_base; // 计算后替代$字符的碱基（应该是primary行上边一行对应的bwt base）
 					}
-					// 暂存
+					// 暂存 pre-bwt碱基序列
-					pre_bwt_16_seq = pre_bwt_16_seq | (pre_bwt_base << (15-j)*2);
+					pre_bwt_16_seq = pre_bwt_16_seq | (pre_bwt_base << (15-j)*2); // 序列靠前的碱基排在uint32_t数据中的高位
 					if (base_line >= bwt->primary)
 						base_line += 1; // bwt矩阵行
 					bwtint_t pre_base_line = bwt->L2[pre_bwt_base] + 1 + bwt_occ(bwt, base_line - 1, pre_bwt_base);
 					// 获取c
 					bwt_occ4(bwt, cur_line, cnt);
 					for (m = 0; m < 4; ++m)
 					{
 						c[m] = (uint32_t)cnt[m]; // 碱基m在cur_line(包含)之前的累积值
 					}
 					// 求出c2
 					cnt[bwt_base] -= 1; // 得到cur_line(不包含)之前的累积量
 					// bwtint_t m_first_line = bwt->L2[bwt_base] + cnt[bwt_base]; // 该bwt_base对应的在bwt矩阵中行的前一行
 					// bwt_occ4(bwt, m_first_line, cnt2);
 					// for (n = 0; n < 4; ++n) // 只计算bwt_base对应的二级occ，其他用之前的值
 					// {
 					// 	int c2_idx = bwt_base << 2 | n;
 					// 	c2[c2_idx] = (uint32_t)cnt2[n];
 					// }
-					for (m = 0; m < 4; ++m)
+					// 输出调试信息
-					{
+					// cout << "mtx line: " << cur_mtx_line << ' ' << c[0] << ' ' << c[1] << ' ' << c[2] << ' ' << c[3] << ' ';
 						bwtint_t m_first_line = -1;
 					  	// if (m == bwt_base || cnt[m] > 0)
 						if (m == bwt_base)
 						{
 							m_first_line = bwt->L2[m] + 1 + cnt[m]; // m是否与bwt_base相同，这里需要想清楚，情况不一样的
 							if (m_first_line >= bwt->seq_len)
 								m_first_line = bwt->seq_len;
 							// cout << cur_line << '\t' << BASE[m] << '\t' << m_first_line << endl;
 							bwt_occ4(bwt, m_first_line, cnt2);
 							for (n = 0; n < 4; ++n)
 							{
 								int c2_idx = m << 2 | n;
 								c2[c2_idx] = (uint32_t)cnt2[n];
 							}
 						}
 					}
 					cnt[bwt_base] += 1; // cur_line(包含)之前
 					// cout << cur_line << '\t'
 					// 	 << base_line << '\t'
 					// 	 << pre_base_line << '\t'
 					// 	 << BASE[pre_bwt_base] << '\t'
 					// 	 << BASE[bwt_base] << '\t'
 					// 	 << cnt[0] << ' ' << cnt[1] << ' ' << cnt[2] << ' ' << cnt[3] << "\t\t";
 					// for (m = 0; m < 16; ++m)
 					// 	cout << c2[m] << ' ';
 					// cout << endl;
 					// for (m = 0; m < 16; ++m)
 					// 	fprintf(fmt_out, "%-4d", c2[m]);
 					// fprintf(fmt_out, "\n");
 				}
 				else
 					break;
 			}
 			//print_base_uint32(pre_bwt_16_seq);
 			//cout << endl;
 			//print_base_uint32(bwt_16_seq);
 			// 保存bwt和pre_bwt
-			uint32_t tmp_seq = 0;
+			uint32_t pre_and_bwt_seq = 0;
-			tmp_seq = (((pre_bwt_16_seq & (3 << 30)) >> 0) | ((bwt_16_seq & (3 << 30)) >> 2)) 
+			for (m = 0; m < 8; ++m)
-				    | (((pre_bwt_16_seq & (3 << 28)) >> 2) | ((bwt_16_seq & (3 << 28)) >> 4))
+			{
-					| (((pre_bwt_16_seq & (3 << 26)) >> 4) | ((bwt_16_seq & (3 << 26)) >> 6))
+				int lshift_bit = 30 - 2 * m;
-					| (((pre_bwt_16_seq & (3 << 24)) >> 6) | ((bwt_16_seq & (3 << 24)) >> 8))
+				pre_and_bwt_seq |= (((pre_bwt_16_seq & (3 << lshift_bit)) >> (m * 2)) | ((bwt_16_seq & (3 << lshift_bit)) >> ((m * 2) + 2)));
-					| (((pre_bwt_16_seq & (3 << 22)) >> 8) | ((bwt_16_seq & (3 << 22)) >> 10)) 
+			}
-				    | (((pre_bwt_16_seq & (3 << 20)) >> 10) | ((bwt_16_seq & (3 << 20)) >> 12))
+			buf[k++] = pre_and_bwt_seq;
 					| (((pre_bwt_16_seq & (3 << 18)) >> 12) | ((bwt_16_seq & (3 << 18)) >> 14))
 					| (((pre_bwt_16_seq & (3 << 16)) >> 14) | ((bwt_16_seq & (3 << 16)) >> 16));
 			buf[k++] = tmp_seq;
 			//cout << i << endl;
 			//print_base_uint32(tmp_seq);
 			if (j > 8)
 			{
-				// cout << "j: " << j << endl;
+				pre_and_bwt_seq = 0;
-				tmp_seq = (((pre_bwt_16_seq & (3 << 14)) << 16) | ((bwt_16_seq & (3 << 14)) << 14)) 
+				for (m = 8; m > 0; --m)
-					    | (((pre_bwt_16_seq & (3 << 12)) << 14) | ((bwt_16_seq & (3 << 12)) << 12))
+				{
-						| (((pre_bwt_16_seq & (3 << 10)) << 12) | ((bwt_16_seq & (3 << 10)) << 10))
+					int lshift_bit = 2 * m - 2;
-						| (((pre_bwt_16_seq & (3 << 8)) << 10) | ((bwt_16_seq & (3 << 8)) << 8))
+					pre_and_bwt_seq |= (((pre_bwt_16_seq & (3 << lshift_bit)) << (m * 2)) | ((bwt_16_seq & (3 << lshift_bit)) << (m * 2 - 2)));
-						| (((pre_bwt_16_seq & (3 << 6)) << 8) | ((bwt_16_seq & (3 << 6)) << 6)) 
+				}
-					    | (((pre_bwt_16_seq & (3 << 4)) << 6) | ((bwt_16_seq & (3 << 4)) << 4))
+				buf[k++] = pre_and_bwt_seq;
 						| (((pre_bwt_16_seq & (3 << 2)) << 4) | ((bwt_16_seq & (3 << 2)) << 2))
 						| (((pre_bwt_16_seq & (3 << 0)) << 2) | ((bwt_16_seq & (3 << 0)) << 0));
 				buf[k++] = tmp_seq;
 				//print_base_uint32(tmp_seq);
 			}
 		}
 	}
 	// the last element
 	// cout << c[0] << '\t' << c[1] << '\t' << c[2] << '\t' << c[3] << endl;
 	memcpy(buf + k, c, sizeof(uint32_t) * 4);
 	k += 4;
 	memcpy(buf + k, c2, sizeof(uint32_t) * 16);
@ -278,103 +272,149 @@ FMTIndex *create_fmt_from_bwt(bwt_t *bwt)
 	xassert(k == fmt->bwt_size, "inconsistent bwt_size");
 	// update fmt
 	fmt->bwt = buf; 
 	return fmt;
 }
-
+// 扩展两个个碱基，计算bwt base为b的pre-bwt str中各个碱基的occ
-
+void fmt_e2_occ4(const FMTIndex *fmt, bwtint_t k, int b, uint32_t cnt1[4], uint32_t cnt2[4])
 #define fmt_set_intv(fmt, c, ik) ((ik).x[0] = (fmt)->L2[(int)(c)] + 1, (ik).x[2] = (fmt)->L2[(int)(c) + 1] - (fmt)->L2[(int)(c)], (ik).x[1] = (fmt)->L2[3 - (c)] + 1, (ik).info = 0)
 #define fmt_occ_intv(b, k) ((b)->bwt + (k) / OCC_INTERVAL * (OCC_INTERVAL / 8 + 20))
 void fmt_occ4(const FMTIndex *fmt, bwtint_t k, int b, uint32_t cnt1[4], uint32_t cnt2[4])
 {
-	bwtint_t x;
+	uint32_t x1, x2;
 	uint32_t *p, tmp, *end;
 	bwtint_t bwt_k_line = k, bwt_k_base_line = k >> OCC_INTV_SHIFT << OCC_INTV_SHIFT;
 	if (k == (bwtint_t)(-1))
 	{
 		p = fmt->bwt + 4 + b * 4;
 		memset(cnt1, 0, 4 * sizeof(uint32_t));
-		memset(cnt2, 0, 4 * sizeof(uint32_t));
+		memcpy(cnt2, p, 4 * sizeof(uint32_t));
 		return;
 	}
 	k -= (k >= fmt->primary); // k由bwt矩阵对应的行转换成bwt字符串对应的行（去掉了$，所以大于$的行，都减掉1）
 	p = fmt_occ_intv(fmt, k);
 	cout << "base: " << BASE[b] << endl;
 	cout << "k: " << k << "; p: " << (uint64_t)p << endl;
 	// cout << "0: " << (uint64_t)fmt_occ_intv(fmt, 0)
 	// 	 << " ;31: " << (uint64_t)fmt_occ_intv(fmt, 31)
 	// 	 << " ;32: " << (uint64_t)fmt_occ_intv(fmt, 32)
 	// 	 << " ;64: " << (uint64_t)fmt_occ_intv(fmt, 64)
 	// 	 << " ;96: " << (uint64_t)fmt_occ_intv(fmt, 96) << endl;
 	memcpy(cnt1, p, 4 * sizeof(uint32_t));
 	memcpy(cnt2, p + 4 + b * 4, 4 * sizeof(uint32_t));
 	cout << "cnt1: " << cnt1[0] << '\t' << cnt1[1] << '\t' << cnt1[2] << '\t' << cnt1[3] << endl;
 	cout << "cnt2: " << cnt2[0] << '\t' << cnt2[1] << '\t' << cnt2[2] << '\t' << cnt2[3] << endl;
 	p += 20;											// 该地址是bwt和pre_bwt字符串数据的首地址
-	end = p + ((k >> 4) - ((k & ~OCC_INTV_MASK) >> 4)); // this is the end point of the following loop
+	end = p + ((k >> 3) - ((k & ~OCC_INTV_MASK) >> 3)); // this is the end point of the following loop
-	// for (x = 0; p < end; ++p)
+
-	// 	x += __occ_aux4(bwt, *p);
+	for (x1 = 0, x2 = 0; p < end; ++p)
-	// tmp = *p & ~((1U << ((~k & 15) << 1)) - 1);
+	{
-	// x += __occ_aux4(bwt, tmp) - (~k & 15);
+		x1 += __fmt_occ_e2_aux4(fmt, 4, *p);
-	// cnt[0] += x & 0xff;
+		x2 += __fmt_occ_e2_aux4(fmt, b, *p);
-	// cnt[1] += x >> 8 & 0xff;
+	}
-	// cnt[2] += x >> 16 & 0xff;
+	tmp = *p & ~((1U << ((~k & 7) << 2)) - 1);
-	// cnt[3] += x >> 24;
+	x1 += __fmt_occ_e2_aux4(fmt, 4, tmp) - (~k & 7);
 	x2 += __fmt_occ_e2_aux4(fmt, b, tmp);
 	if (b == 0)
 		x2 -= ~k & 7;
 	// 如果跨过了second_primary,那么可能需要减掉一次累积值
 	if (b == fmt->first_base && bwt_k_base_line < fmt->sec_primary && bwt_k_line >= fmt->sec_primary)
 	{
 		x2 -= 1 << (fmt->last_base << 3);
 	}
 	cnt1[0] += x1 & 0xff;
 	cnt1[1] += x1 >> 8 & 0xff;
 	cnt1[2] += x1 >> 16 & 0xff;
 	cnt1[3] += x1 >> 24;
 	cnt2[0] += x2 & 0xff;
 	cnt2[1] += x2 >> 8 & 0xff;
 	cnt2[2] += x2 >> 16 & 0xff;
 	cnt2[3] += x2 >> 24;
 	// cout << "fmt-occ: " << k << '\t' << cnt1[0] << '\t' << cnt1[1] << '\t' << cnt1[2] << '\t' << cnt1[3] << endl;
 	// cout << "fmt-occ-2: " << k << '\t' << cnt2[0] << '\t' << cnt2[1] << '\t' << cnt2[2] << '\t' << cnt2[3] << endl;
 	// cout << "bwt_k_base_line: " << bwt_k_base_line << endl;
 	// cout << "bwt_k_line: " << bwt_k_line << endl;
 	// cout << "sec_primary: " << fmt->sec_primary << endl;
 }
-void fmt_2occ4(const FMTIndex *fmt, bwtint_t k, bwtint_t l, int b,
+// 对k行和l行同时计算occ，如果k和l落在同一个interval区间，可以减少一些计算量和访存
-			   uint32_t cntk1[4], uint32_t cntl1[4], uint32_t cntk2[4], uint32_t cntl2[4])
+void fmt_e2_2occ4(const FMTIndex *fmt, bwtint_t k, bwtint_t l, int b,
 			   uint32_t cntk1[4], uint32_t cntk2[4], uint32_t cntl1[4], uint32_t cntl2[4])
 {
 	// fmt_e2_occ4(fmt, k, b, cntk1, cntk2);
 	// fmt_e2_occ4(fmt, l, b, cntl1, cntl2);
 	// return;
 	bwtint_t _k, _l;
 	_k = k - (k >= fmt->primary); // 换算成了seq的行
 	_l = l - (l >= fmt->primary);
-//	if (_l >> OCC_INTV_SHIFT != _k >> OCC_INTV_SHIFT || k == (bwtint_t)(-1) || l == (bwtint_t)(-1))
+	if (_l >> OCC_INTV_SHIFT != _k >> OCC_INTV_SHIFT || k == (bwtint_t)(-1) || l == (bwtint_t)(-1))
-//	{
+	{
-		fmt_occ4(fmt, k, b, cntk1, cntk2);
+		fmt_e2_occ4(fmt, k, b, cntk1, cntk2);
-		fmt_occ4(fmt, l, b, cntl1, cntk1);
+		fmt_e2_occ4(fmt, l, b, cntl1, cntl2);
-//	}
+	}
-//	else
+	else
-//	{
+	{
-//		bwtint_t x, y;
+		uint32_t x1, x2, y1, y2;
-//		uint32_t *p, tmp, *endk, *endl;
+		uint32_t *p, tmp, *ek, *el;
-//		k -= (k >= bwt->primary); // because $ is not in bwt
+		bwtint_t bwt_k_line = k, bwt_l_line = l, bwt_base_line = k >> OCC_INTV_SHIFT << OCC_INTV_SHIFT;
-//		l -= (l >= bwt->primary);
+
-//		p = bwt_occ_intv(bwt, k);
+		k -= (k >= fmt->primary); // because $ is not in bwt
-//		memcpy(cntk, p, 4 * sizeof(bwtint_t));
+		l -= (l >= fmt->primary);
-//		p += sizeof(bwtint_t); // sizeof(bwtint_t) = 4*(sizeof(bwtint_t)/sizeof(uint32_t))
+		p = fmt_occ_intv(fmt, k);
-//		// prepare cntk[]
+		memcpy(cntk1, p, 4 * sizeof(uint32_t));
-//		endk = p + ((k >> 4) - ((k & ~OCC_INTV_MASK) >> 4));
+		memcpy(cntk2, p + 4 + b * 4, 4 * sizeof(uint32_t));
-//		endl = p + ((l >> 4) - ((l & ~OCC_INTV_MASK) >> 4));
+		memcpy(cntl1, cntk1, 4 * sizeof(uint32_t));
-//		for (x = 0; p < endk; ++p)
+		memcpy(cntl2, cntk2, 4 * sizeof(uint32_t));
-//			x += __occ_aux4(bwt, *p);
+		p += 20;
-//		y = x;
+		// prepare cntk[]
-//		tmp = *p & ~((1U << ((~k & 15) << 1)) - 1);
+		ek = p + ((k >> 3) - ((k & ~OCC_INTV_MASK) >> 3));
-//		x += __occ_aux4(bwt, tmp) - (~k & 15);
+		el = p + ((l >> 3) - ((l & ~OCC_INTV_MASK) >> 3));
-//		// calculate cntl[] and finalize cntk[]
+		for (x1 = 0, x2 = 0; p < ek; ++p)
-//		for (; p < endl; ++p)
+		{
-//			y += __occ_aux4(bwt, *p);
+			x1 += __fmt_occ_e2_aux4(fmt, 4, *p);
-//		tmp = *p & ~((1U << ((~l & 15) << 1)) - 1);
+			x2 += __fmt_occ_e2_aux4(fmt, b, *p);
-//		y += __occ_aux4(bwt, tmp) - (~l & 15);
+		}
-//		memcpy(cntl, cntk, 4 * sizeof(bwtint_t));
+		y1 = x1;
-//		cntk[0] += x & 0xff;
+		y2 = x2;
-//		cntk[1] += x >> 8 & 0xff;
+		tmp = *p & ~((1U << ((~k & 7) << 2)) - 1);
-//		cntk[2] += x >> 16 & 0xff;
+		x1 += __fmt_occ_e2_aux4(fmt, 4, tmp) - (~k & 7);
-//		cntk[3] += x >> 24;
+		x2 += __fmt_occ_e2_aux4(fmt, b, tmp);
-//		cntl[0] += y & 0xff;
+		if (b == 0)
-//		cntl[1] += y >> 8 & 0xff;
+			x2 -= ~k & 7;
-//		cntl[2] += y >> 16 & 0xff;
+		for (; p < el; ++p)
-//		cntl[3] += y >> 24;
+		{
-//	}
+			y1 += __fmt_occ_e2_aux4(fmt, 4, *p);
 			y2 += __fmt_occ_e2_aux4(fmt, b, *p);
 		}
 		tmp = *p & ~((1U << ((~l & 7) << 2)) - 1);
 		y1 += __fmt_occ_e2_aux4(fmt, 4, tmp) - (~l & 7);
 		y2 += __fmt_occ_e2_aux4(fmt, b, tmp);
 		if (b == 0)
 			y2 -= ~l & 7;
 		// 如果跨过了second_primary,那么可能需要减掉一次累积值
 		if (b == fmt->first_base && bwt_base_line < fmt->sec_primary)
 		{
 			if (bwt_k_line >= fmt->sec_primary)
 				x2 -= 1 << (fmt->last_base << 3);
 			if (bwt_l_line >= fmt->sec_primary)
 				y2 -= 1 << (fmt->last_base << 3);
 		}
 		cntk1[0] += x1 & 0xff;
 		cntk1[1] += x1 >> 8 & 0xff;
 		cntk1[2] += x1 >> 16 & 0xff;
 		cntk1[3] += x1 >> 24;
 		cntk2[0] += x2 & 0xff;
 		cntk2[1] += x2 >> 8 & 0xff;
 		cntk2[2] += x2 >> 16 & 0xff;
 		cntk2[3] += x2 >> 24;
 		cntl1[0] += y1 & 0xff;
 		cntl1[1] += y1 >> 8 & 0xff;
 		cntl1[2] += y1 >> 16 & 0xff;
 		cntl1[3] += y1 >> 24;
 		cntl2[0] += y2 & 0xff;
 		cntl2[1] += y2 >> 8 & 0xff;
 		cntl2[2] += y2 >> 16 & 0xff;
 		cntl2[3] += y2 >> 24;
 		// cout << "fmt-occ: " << k << '\t' << cntk1[0] << '\t' << cntk1[1] << '\t' << cntk1[2] << '\t' << cntk1[3] << endl;
 		// cout << "fmt-occ-2: " << k << '\t' << cntk2[0] << '\t' << cntk2[1] << '\t' << cntk2[2] << '\t' << cntk2[3] << endl;
 		// cout << "fmt-occ: " << l << '\t' << cntl1[0] << '\t' << cntl1[1] << '\t' << cntl1[2] << '\t' << cntl1[3] << endl;
 		// cout << "fmt-occ-2: " << l << '\t' << cntl2[0] << '\t' << cntl2[1] << '\t' << cntl2[2] << '\t' << cntl2[3] << endl;
 	}
 }
-
+// 扩展一个碱基，计算bwt str中各个碱基的occ
 #define __fmt_occ_e2_aux4(fmt, b, val) \
 	((fmt)->cnt_table[(b)][(val) & 0xff] + (fmt)->cnt_table[b][(val) >> 8 & 0xff] + (fmt)->cnt_table[b][(val) >> 16 & 0xff] + (fmt)->cnt_table[b][(val) >> 24])
 void fmt_e1_occ4(const FMTIndex *fmt, bwtint_t k, uint32_t cnt[4])
 {
 	uint32_t x;
@ -402,69 +442,58 @@ void fmt_e1_occ4(const FMTIndex *fmt, bwtint_t k, uint32_t cnt[4])
 	cnt[3] += x >> 24;
 }
-void fmt_e2_occ4(const FMTIndex *fmt, bwtint_t k, int b, uint32_t cnt1[4], uint32_t cnt2[4])
+// 对k行和l行同时计算bwt str的occ，如果k和l落在同一个interval区间，可以减少一些计算量和访存
 void fmt_e1_2occ4(const FMTIndex *fmt, bwtint_t k, bwtint_t l, uint32_t cntk[4], uint32_t cntl[4])
 {
-	uint32_t x1, x2;
+	bwtint_t _k, _l;
-	uint32_t *p, tmp, *end;
+	_k = k - (k >= fmt->primary); // 换算成了seq的行
-	bwtint_t bwt_k_line = k, bwt_k_base_line = k >> OCC_INTV_SHIFT << OCC_INTV_SHIFT;
+	_l = l - (l >= fmt->primary);
-	if (k == (bwtint_t)(-1))
+	if (_l >> OCC_INTV_SHIFT != _k >> OCC_INTV_SHIFT || k == (bwtint_t)(-1) || l == (bwtint_t)(-1))
 	{
-		p = fmt->bwt + 4 + b * 4;
+		fmt_e1_occ4(fmt, k, cntk);
-		memset(cnt1, 0, 4 * sizeof(uint32_t));
+		fmt_e1_occ4(fmt, l, cntl);
 		memcpy(cnt2, p, 4 * sizeof(uint32_t));
 		return;
 	}
-	k -= (k >= fmt->primary); // k由bwt矩阵对应的行转换成bwt字符串对应的行（去掉了$，所以大于$的行，都减掉1）
+	else
 	{
 		uint32_t x1, y1;
 		uint32_t *p, tmp, *endk, *endl;
 		k -= (k >= fmt->primary); // because $ is not in bwt
 		l -= (l >= fmt->primary);
 		p = fmt_occ_intv(fmt, k);
-	// cout << "base: " << BASE[b] << endl;
+		memcpy(cntk, p, 4 * sizeof(uint32_t));
-	// cout << "k: " << k << "; c 0 cnt: " << p[0] << '\t' << p[1] << '\t' << p[2] << '\t' << p[3] << endl;
+		memcpy(cntl, p, 4 * sizeof(uint32_t));
-	memcpy(cnt1, p, 4 * sizeof(uint32_t));
+		p += 20;
-	memcpy(cnt2, p + 4 + b * 4, 4 * sizeof(uint32_t));
+		// prepare cntk[]
-	// cout << "[start: ] k: " << k << "; k line cnt: " << cnt[0] << '\t' << cnt[1] << '\t' << cnt[2] << '\t' << cnt[3] << endl;
+		endk = p + ((k >> 3) - ((k & ~OCC_INTV_MASK) >> 3));
-
+		endl = p + ((l >> 3) - ((l & ~OCC_INTV_MASK) >> 3));
-	p += 20;											// 该地址是bwt和pre_bwt字符串数据的首地址
+		for (x1 = 0; p < endk; ++p)
 	end = p + ((k >> 3) - ((k & ~OCC_INTV_MASK) >> 3)); // this is the end point of the following loop
 	for (x1 = 0, x2 = 0; p < end; ++p)
 		{
 			x1 += __fmt_occ_e2_aux4(fmt, 4, *p);
 		x2 += __fmt_occ_e2_aux4(fmt, b, *p);
 		}
-	//{
+		y1 = x1;
 	//	x += fmt->cnt_table[b][*p & 0xff]
 	//		 + fmt->cnt_table[b][*p >> 8 & 0xff] 
 	//		 + fmt->cnt_table[b][*p >> 16 & 0xff] 
 	//		 + fmt->cnt_table[b][*p >> 24 & 0xff];
 	//	// cout << "p: " << *p << endl;
 	//	// print_base_uint32(*p);
 	//	// cout << (fmt->cnt_table[b][*p & 0xff] >> 24) << ' ' 
 	//	// 	 << fmt->cnt_table[b][*p >> 24 & 0xff]
 	//	// 	 << endl;
 	//}
 		tmp = *p & ~((1U << ((~k & 7) << 2)) - 1);
 		x1 += __fmt_occ_e2_aux4(fmt, 4, tmp) - (~k & 7);
-	x2 += __fmt_occ_e2_aux4(fmt, b, tmp);
+		for (; p < endl; ++p)
 	if (b == 0)
 		x2 -= ~k & 7;
 	// 如果跨过了second_primary,那么可能需要减掉一次累积值
 	if (b == fmt->first_base && bwt_k_base_line < fmt->sec_primary && bwt_k_line >= fmt->sec_primary)
 		{
-		x2 -= 1 << (fmt->last_base << 3);
+			y1 += __fmt_occ_e2_aux4(fmt, 4, *p);
 		}
 		tmp = *p & ~((1U << ((~k & 7) << 2)) - 1);
 		y1 += __fmt_occ_e2_aux4(fmt, 4, tmp) - (~k & 7);
 		cntk[0] += x1 & 0xff;
 		cntk[1] += x1 >> 8 & 0xff;
 		cntk[2] += x1 >> 16 & 0xff;
 		cntk[3] += x1 >> 24;
 		cntl[0] += y1 & 0xff;
 		cntl[1] += y1 >> 8 & 0xff;
 		cntl[2] += y1 >> 16 & 0xff;
 		cntl[3] += y1 >> 24;
 	}
 	// x += __occ_aux4(bwt, tmp) - (~k & 15);
 	// cout << "x: " << x << " b:" << b << endl;
 	cnt1[0] += x1 & 0xff;
 	cnt1[1] += x1 >> 8 & 0xff;
 	cnt1[2] += x1 >> 16 & 0xff;
 	cnt1[3] += x1 >> 24;
 	cnt2[0] += x2 & 0xff;
 	cnt2[1] += x2 >> 8 & 0xff;
 	cnt2[2] += x2 >> 16 & 0xff;
 	cnt2[3] += x2 >> 24;
 	// cout << "[end  : ]k: " << k << "; k line cnt: " << cnt[0] << '\t' << cnt[1] << '\t' << cnt[2] << '\t' << cnt[3] << endl;
 }
 // 扩展一个碱基
 void fmt_extend1(const FMTIndex *fmt, bwtintv_t *ik, bwtintv_t ok[4], int is_back, int b1)
 {
 	uint32_t tk[4], tl[4];
@ -472,6 +501,7 @@ void fmt_extend1(const FMTIndex *fmt, bwtintv_t *ik, bwtintv_t ok[4], int is_bac
 	fmt_e1_occ4(fmt, ik->x[!is_back] - 1, tk);
 	fmt_e1_occ4(fmt, ik->x[!is_back] - 1 + ik->x[2], tl);
 	fmt_e1_2occ4(fmt, ik->x[!is_back] - 1, ik->x[!is_back] - 1 + ik->x[2], tk, tl);
 	for (i = 0; i != 4; ++i)
 	{
 		ok[i].x[!is_back] = fmt->L2[i] + 1 + tk[i]; // 起始行位置，互补链
@ -483,18 +513,21 @@ void fmt_extend1(const FMTIndex *fmt, bwtintv_t *ik, bwtintv_t ok[4], int is_bac
 	*ik = ok[b1];
 }
 // 扩展两个碱基
 void fmt_extend2(const FMTIndex *fmt, bwtintv_t *ik, bwtintv_t ok[4], int is_back, int b1, int b2)
 {
 	uint32_t tk1[4], tl1[4], tk2[4], tl2[4];
 	int i;
 	// fmt_e2_occ4(fmt, ik->x[!is_back] - 1, b1, tk1, tk2);
 	// fmt_e2_occ4(fmt, ik->x[!is_back] - 1 + ik->x[2], b1, tl1, tl2);
 	// tk表示在k行之前所有各个碱基累积出现次数，tl表示在l行之前的累积
 	fmt_e2_2occ4(fmt, ik->x[!is_back] - 1, ik->x[!is_back] - 1 + ik->x[2], b1, tk1, tk2, tl1, tl2);
-	// fmt_2occ4(fmt, ik->x[!is_back] - 1, ik->x[!is_back] - 1 + ik->x[2], b1, tk1, tl1, tk2, tl2); // tk表示在k行之前所有各个碱基累积出现次数，tl表示在l行之前的累积
+	// cout << "k: " << tk1[0] << '\t' << tk1[1] << '\t' << tk1[2] << '\t' << tk1[3] << endl;
-
+	// cout << "l: " << tl1[0] << '\t' << tl1[1] << '\t' << tl1[2] << '\t' << tl1[3] << endl;
-	fmt_e2_occ4(fmt, ik->x[!is_back] - 1, b1, tk1, tk2);
+	// cout << "k: " << tk2[0] << '\t' << tk2[1] << '\t' << tk2[2] << '\t' << tk2[3] << endl;
-	fmt_e2_occ4(fmt, ik->x[!is_back] - 1 + ik->x[2], b1, tl1, tl2);
+	// cout << "l: " << tl2[0] << '\t' << tl2[1] << '\t' << tl2[2] << '\t' << tl2[3] << endl;
 	// fmt_e2_occ(fmt, -1, 0, tk);
 	// 这里是反向扩展
 	for (i = 0; i != 4; ++i)
@ -505,11 +538,17 @@ void fmt_extend2(const FMTIndex *fmt, bwtintv_t *ik, bwtintv_t ok[4], int is_bac
 	// 因为计算的是互补碱基，所以3对应着0,2对应1，下边是正向扩展
 	ok[3].x[is_back] = ik->x[is_back] + (ik->x[!is_back] <= fmt->primary && ik->x[!is_back] + ik->x[2] - 1 >= fmt->primary);
-	for (i = 2; i >= b1; --i)
+	ok[2].x[is_back] = ok[3].x[is_back] + tl1[3] - tk1[3];
-		ok[i].x[is_back] = ok[i + 1].x[is_back] + tl1[i + 1] - tk1[i + 1];
+	ok[1].x[is_back] = ok[2].x[is_back] + tl1[2] - tk1[2];
 	ok[0].x[is_back] = ok[1].x[is_back] + tl1[1] - tk1[1];
 	cout << "fmt-d: " << BASE[b1] << '\t' << ok[b1].x[is_back] << '\t' << ok[b1].x[2] << endl;
 	ok[3].x[is_back] = ok[b1].x[is_back] + (ok[b1].x[!is_back] <= fmt->primary && ok[b1].x[!is_back] + ok[b1].x[2] - 1 >= fmt->primary);
-	for (i = 2; i >= b2; --i)
+	ok[2].x[is_back] = ok[3].x[is_back] + ok[3].x[2];
-		ok[i].x[is_back] = ok[i + 1].x[is_back] + ok[i + 1].x[2];
+	ok[1].x[is_back] = ok[2].x[is_back] + ok[2].x[2];
 	ok[0].x[is_back] = ok[1].x[is_back] + ok[1].x[2];
 	*ik = ok[b2];
 }
 // 利用fmt搜索seed，完整搜索，只需要单向搜索
@ -557,13 +596,18 @@ int main_fmtidx(int argc, char **argv)
 	//create_bwt_mtx(seq);
 	//cout << seq << endl;
-	bwt_t *bwt = restore_bwt_str(argv[1]); // 读取bwt原始字符串（带ACGT总的累积量）
+	bwt_t *bwt = restore_bwt(argv[1]); // 读取bwt原始字符串（带ACGT总的累积量）
-	create_interval_occ_bwt(bwt); // 根据bwt字符串创建包含interval occ的bwt（128碱基+ACGT累积量）
+	// create_interval_occ_bwt(bwt); // 根据bwt字符串创建包含interval occ的bwt（128碱基+ACGT累积量）
 	cout << "L2: " << bwt->L2[0] << '\t' << bwt->L2[1] << '\t' << bwt->L2[2] << '\t'
 		 << bwt->L2[3] << '\t' << bwt->L2[4] << endl;
 	string s = "AACCCTAA";
 	srand(time(NULL));
 	s = generate_rand_seq(10);
 	cout << "seq: " << s << endl;
 	// s = "TTC";
 	bwt_search(bwt, s);
 	bwt_search2(bwt, s);
@ -573,6 +617,9 @@ int main_fmtidx(int argc, char **argv)
 	// }
 	// TGGGAT
 	FMTIndex *fmt = create_fmt_from_bwt(bwt);
 	dump_fmt("ref.fmt", fmt);
 	// FMTIndex *fmt = restore_fmt("tiny.fmt");
 	fmt_search(fmt, s);
 	// cout << bwt->bwt_size << endl;
 	// cout << bwt->seq_len << endl;
--- a/fmt_index.h
+++ b/fmt_index.h
@ -3,6 +3,14 @@
 #include "bwt.h"
 // 获取碱基c（待查找序列的首个碱基）和对应的互补碱基对应的行，以及间隔
 #define fmt_set_intv(fmt, c, ik) ((ik).x[0] = (fmt)->L2[(int)(c)] + 1, (ik).x[2] = (fmt)->L2[(int)(c) + 1] - (fmt)->L2[(int)(c)], (ik).x[1] = (fmt)->L2[3 - (c)] + 1, (ik).info = 0)
 // k行（bwt str行（不包含$））对应的check point occ数据起始地址（小于k且是OCC_INTERVAL的整数倍）
 #define fmt_occ_intv(b, k) ((b)->bwt + (k) / OCC_INTERVAL * (OCC_INTERVAL / 8 + 20))
 // 字节val中包含bwt base为b的pre-bwt中T G C A（按顺序保存在32位整数里（每个占8bit））的数量
 #define __fmt_occ_e2_aux4(fmt, b, val) \
    ((fmt)->cnt_table[(b)][(val) & 0xff] + (fmt)->cnt_table[b][(val) >> 8 & 0xff] + (fmt)->cnt_table[b][(val) >> 16 & 0xff] + (fmt)->cnt_table[b][(val) >> 24])
 // fm-index, extend twice in one search step (one memory access)
 struct FMTIndex
 {
@ -14,9 +22,9 @@ struct FMTIndex
    uint32_t *bwt;        // BWT
    // occurance array, separated to two parts
    uint32_t cnt_table[5][256]; // 4对应原来的cnt_table，0,1,2,3,分别对应该碱基的扩展
-    int sec_bcp;                // base couple for sec primary line, AA=>0, AC=>1 ... TT=>15
+    uint8_t sec_bcp;                // base couple for sec primary line, AA=>0, AC=>1 ... TT=>15
-    int first_base;             // 序列的第一个碱基2bit的int类型，0,1,2,3
+    uint8_t first_base;             // 序列的第一个碱基2bit的int类型，0,1,2,3
-    int last_base;              // dollar转换成的base
+    uint8_t last_base;              // dollar转换成的base
    // suffix array
    int sa_intv;
    bwtint_t n_sa;
--- a/util.cpp
+++ b/util.cpp
@ -1,7 +1,16 @@
 #define FSYNC_ON_FLUSH
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/time.h>
-
+#include <string.h>
 #include <errno.h>
 #ifdef FSYNC_ON_FLUSH
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
 #endif
 #include <stdarg.h>
 #include "util.h"
 // base转成2bit值
@ -39,6 +48,48 @@ void _err_fatal_simple_core(const char *func, const char *msg)
    abort();
 }
 // 打印信息并停止运行
 void _err_fatal_simple(const char *func, const char *msg)
 {
    fprintf(stderr, "[%s] %s\n", func, msg);
    exit(EXIT_FAILURE);
 }
 void err_fatal(const char *header, const char *fmt, ...)
 {
    va_list args;
    va_start(args, fmt);
    fprintf(stderr, "[%s] ", header);
    vfprintf(stderr, fmt, args);
    fprintf(stderr, "\n");
    va_end(args);
    exit(EXIT_FAILURE);
 }
 void err_fatal_core(const char *header, const char *fmt, ...)
 {
    va_list args;
    va_start(args, fmt);
    fprintf(stderr, "[%s] ", header);
    vfprintf(stderr, fmt, args);
    fprintf(stderr, " Abort!\n");
    va_end(args);
    abort();
 }
 // 打开文件流
 FILE *err_xopen_core(const char *func, const char *fn, const char *mode)
 {
    FILE *fp = 0;
    if (strcmp(fn, "-") == 0)
        return (strstr(mode, "r")) ? stdin : stdout;
    if ((fp = fopen(fn, mode)) == 0)
    {
        err_fatal(func, "fail to open file '%s' : %s", fn, strerror(errno));
    }
    return fp;
 }
 // 读取数据
 bwtint_t fread_fix(FILE *fp, bwtint_t size, void *a)
 {                                  /* Mac/Darwin has a bug when reading data longer than 2GB. This function fixes this issue by reading data in small chunks */
@ -54,3 +105,50 @@ bwtint_t fread_fix(FILE *fp, bwtint_t size, void *a)
    }
    return offset;
 }
 // 写二进制文件
 size_t err_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream)
 {
    size_t ret = fwrite(ptr, size, nmemb, stream);
    if (ret != nmemb)
        _err_fatal_simple("fwrite", strerror(errno));
    return ret;
 }
 // 刷新文件流
 int err_fflush(FILE *stream)
 {
    int ret = fflush(stream);
    if (ret != 0)
        _err_fatal_simple("fflush", strerror(errno));
 #ifdef FSYNC_ON_FLUSH
    /* Calling fflush() ensures that all the data has made it to the
       kernel buffers, but this may not be sufficient for remote filesystems
       (e.g. NFS, lustre) as an error may still occur while the kernel
       is copying the buffered data to the file server.  To be sure of
       catching these errors, we need to call fsync() on the file
       descriptor, but only if it is a regular file.  */
    {
        struct stat sbuf;
        if (0 != fstat(fileno(stream), &sbuf))
            _err_fatal_simple("fstat", strerror(errno));
        if (S_ISREG(sbuf.st_mode))
        {
            if (0 != fsync(fileno(stream)))
                _err_fatal_simple("fsync", strerror(errno));
        }
    }
 #endif
    return ret;
 }
 // 关闭文件流
 int err_fclose(FILE *stream)
 {
    int ret = fclose(stream);
    if (ret != 0)
        _err_fatal_simple("fclose", strerror(errno));
    return ret;
 }
--- a/util.h
+++ b/util.h
@ -10,7 +10,9 @@ typedef uint64_t bwtint_t;
    if ((cond) == 0)       \
    _err_fatal_simple_core(__func__, msg)
-    double realtime(void);
+#define xopen(fn, mode) err_xopen_core(__func__, fn, mode)
 double realtime(void);
 // 在fm-indexv(或者bwt)查找过程中，记录结果
 struct bwtintv_t
@ -27,5 +29,13 @@ void _err_fatal_simple_core(const char *func, const char *msg);
 int bval(char b);
 // 互补碱基值
 int cbval(char b);
 // 打开文件流
 FILE *err_xopen_core(const char *func, const char *fn, const char *mode);
 // 写二进制文件
 size_t err_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream);
 // 刷新文件流
 int err_fflush(FILE *stream);
 // 关闭文件流
 int err_fclose(FILE *stream);
 #endif