结果正确性没有问题,正在测试速度,稍快一些
This commit is contained in:
parent
c69cb901bb
commit
22b75d335b
|
|
@ -0,0 +1,18 @@
|
|||
{
|
||||
// 使用 IntelliSense 了解相关属性。
|
||||
// 悬停以查看现有属性的描述。
|
||||
// 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "fmtidx",
|
||||
"type": "cppdbg",
|
||||
"request": "launch",
|
||||
"program": "${workspaceRoot}/fmtidx",
|
||||
"args": [
|
||||
"/home/zzh/data/reference/human_g1k_v37_decoy.fasta.bwt"
|
||||
],
|
||||
"cwd": "${workspaceFolder}", // 当前工作路径:当前文件所在的工作空间
|
||||
}
|
||||
]
|
||||
}
|
||||
3
Makefile
3
Makefile
|
|
@ -1,6 +1,7 @@
|
|||
CC= g++
|
||||
NOWARN= -Wno-unused-result -Wno-unused-function
|
||||
CFLAGS= -g -Wall $(NOWARN) -O2
|
||||
CFLAGS= #-g -Wall $(NOWARN) #-O2
|
||||
CPPFLAGS= -g -Wall $(NOWARN) -O2
|
||||
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
|
||||
SHOW_PERF= -DSHOW_PERF
|
||||
AR= ar
|
||||
|
|
|
|||
23
bwt.cpp
23
bwt.cpp
|
|
@ -10,7 +10,7 @@
|
|||
|
||||
using namespace std;
|
||||
|
||||
void bwt_dump_bwt(const char *fn, const bwt_t *bwt)
|
||||
void dump_bwt(const char *fn, const bwt_t *bwt)
|
||||
{
|
||||
FILE *fp;
|
||||
fp = xopen(fn, "wb");
|
||||
|
|
@ -166,6 +166,7 @@ void bwt_occ4(const bwt_t *bwt, bwtint_t k, bwtint_t cnt[4])
|
|||
// an analogy to bwt_occ4() but more efficient, requiring k <= l
|
||||
void bwt_2occ4(const bwt_t *bwt, bwtint_t k, bwtint_t l, bwtint_t cntk[4], bwtint_t cntl[4])
|
||||
{
|
||||
// double tm_t = realtime();
|
||||
bwtint_t _k, _l;
|
||||
_k = k - (k >= bwt->primary);
|
||||
_l = l - (l >= bwt->primary);
|
||||
|
|
@ -206,6 +207,8 @@ void bwt_2occ4(const bwt_t *bwt, bwtint_t k, bwtint_t l, bwtint_t cntk[4], bwtin
|
|||
cntl[2] += y >> 16 & 0xff;
|
||||
cntl[3] += y >> 24;
|
||||
}
|
||||
// t5 += realtime() - tm_t;
|
||||
// f1 += 1;
|
||||
}
|
||||
|
||||
void bwt_extend(const bwt_t *bwt, const bwtintv_t *ik, bwtintv_t ok[4], int is_back)
|
||||
|
|
@ -236,7 +239,7 @@ void bwt_search(bwt_t *bwt, const string &q)
|
|||
|
||||
bwt_set_intv(bwt, bval(q[x]), ik);
|
||||
ik.info = x + 1;
|
||||
cout << ik.x[0] << '\t' << ik.x[1] << '\t' << ik.x[2] << endl;
|
||||
// cout << ik.x[0] << '\t' << ik.x[1] << '\t' << ik.x[2] << endl;
|
||||
|
||||
for (i = x + 1; i < q.size(); ++i)
|
||||
{
|
||||
|
|
@ -246,7 +249,7 @@ void bwt_search(bwt_t *bwt, const string &q)
|
|||
bwt_extend(bwt, &ik, ok, 0);
|
||||
ik = ok[c];
|
||||
ik.info = i + 1;
|
||||
cout << "bwt-1: " << ik.x[0] << '\t' << ik.x[1] << '\t' << ik.x[2] << endl;
|
||||
// cout << "bwt-1: " << ik.x[0] << '\t' << ik.x[1] << '\t' << ik.x[2] << endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -284,14 +287,14 @@ void bwt_extend2(const bwt_t *bwt, bwtintv_t *ik, bwtintv_t ok[4], int is_back,
|
|||
}
|
||||
|
||||
// 每次扩展两步
|
||||
void bwt_search2(bwt_t *bwt, const string &q)
|
||||
bwtintv_t bwt_search2(bwt_t *bwt, const string &q)
|
||||
{
|
||||
bwtintv_t ik, ok[4];
|
||||
int i, j, c1, c2, x = 0;
|
||||
|
||||
bwt_set_intv(bwt, bval(q[x]), ik);
|
||||
ik.info = x + 1;
|
||||
cout << ik.x[0] << '\t' << ik.x[1] << '\t' << ik.x[2] << endl;
|
||||
// cout << ik.x[0] << '\t' << ik.x[1] << '\t' << ik.x[2] << endl;
|
||||
|
||||
for (i = x + 1; i + 1 < q.size(); i += 2)
|
||||
{
|
||||
|
|
@ -301,11 +304,13 @@ void bwt_search2(bwt_t *bwt, const string &q)
|
|||
c1 = cbval(q[i]);
|
||||
c2 = cbval(q[i + 1]);
|
||||
|
||||
// double tm_t = realtime();
|
||||
bwt_extend2(bwt, &ik, ok, 0, c1);
|
||||
// t7 += realtime() - tm_t;
|
||||
|
||||
ik = ok[c2];
|
||||
ik.info = i + 1;
|
||||
cout << "bwt-2: " << ik.x[0] << '\t' << ik.x[1] << '\t' << ik.x[2] << endl;
|
||||
// cout << "bwt-2: " << ik.x[0] << '\t' << ik.x[1] << '\t' << ik.x[2] << endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -315,9 +320,13 @@ void bwt_search2(bwt_t *bwt, const string &q)
|
|||
if (i < q.size() && bval(q[i]) < 4)
|
||||
{ // 最后一次扩展
|
||||
c1 = cbval(q[i]);
|
||||
// double tm_t = realtime();
|
||||
bwt_extend(bwt, &ik, ok, 0);
|
||||
// t10 += realtime() - tm_t;
|
||||
ik = ok[c1];
|
||||
ik.info = i + 1;
|
||||
cout << "bwt-2: " << ik.x[0] << '\t' << ik.x[1] << '\t' << ik.x[2] << endl;
|
||||
// cout << "bwt-2: " << ik.x[0] << '\t' << ik.x[1] << '\t' << ik.x[2] << endl;
|
||||
}
|
||||
// cout << ik.x[0] << '\t' << ik.x[1] << '\t' << ik.x[2] << endl;
|
||||
return ik;
|
||||
}
|
||||
4
bwt.h
4
bwt.h
|
|
@ -7,7 +7,7 @@
|
|||
using std::string;
|
||||
|
||||
// occ间隔对应的右移位数,5表示间隔32行保存一次
|
||||
#define OCC_INTV_SHIFT 5
|
||||
#define OCC_INTV_SHIFT 7
|
||||
#define OCC_INTERVAL (1LL << OCC_INTV_SHIFT)
|
||||
#define OCC_INTV_MASK (OCC_INTERVAL - 1)
|
||||
|
||||
|
|
@ -63,6 +63,6 @@ void create_interval_occ_bwt(bwt_t *bwt);
|
|||
// 利用bwt搜索seed,完整搜索,只需要单向搜索
|
||||
void bwt_search(bwt_t *bwt, const string &q);
|
||||
// 每次扩展两步
|
||||
void bwt_search2(bwt_t *bwt, const string &q);
|
||||
bwtintv_t bwt_search2(bwt_t *bwt, const string &q);
|
||||
|
||||
#endif
|
||||
137
fmt_index.cpp
137
fmt_index.cpp
|
|
@ -12,13 +12,18 @@
|
|||
#include "fmt_index.h"
|
||||
using namespace std;
|
||||
|
||||
double t1 = 0, t2 = 0, t3 = 0, t4 = 0, t5 = 0, t6 = 0, t7 = 0, t8 = 0, t9 = 0, t10 = 0,
|
||||
t11 = 0, t12 = 0, t13 = 0, t14 = 0;
|
||||
|
||||
long f1 = 0, f2 = 0, f3 = 0, f4 = 0, f5 = 0;
|
||||
|
||||
const char BASE[4] = {'A', 'C', 'G', 'T'};
|
||||
|
||||
// 求反向互补序列
|
||||
string calc_reverse_seq(string &seq)
|
||||
{
|
||||
string rseq(seq.size(), '0');
|
||||
for (int i = 0; i < seq.size(); ++i)
|
||||
for (size_t i = 0; i < seq.size(); ++i)
|
||||
{
|
||||
if (seq[i] == 'A')
|
||||
rseq[i] = 'T';
|
||||
|
|
@ -60,7 +65,7 @@ void create_bwt_mtx(string &seq)
|
|||
bwtint_t seq_len = seq.size() + 1;
|
||||
string sarr[seq_len];
|
||||
sarr[0] = seq + '$';
|
||||
for (int i = 1; i < seq_len; ++i)
|
||||
for (bwtint_t i = 1; i < seq_len; ++i)
|
||||
{
|
||||
sarr[i] = sarr[0].substr(i) + sarr[0].substr(0, i);
|
||||
}
|
||||
|
|
@ -148,7 +153,7 @@ FMTIndex *restore_fmt(const char *fn)
|
|||
// 根据interval-bwt创建fmt-index
|
||||
FMTIndex *create_fmt_from_bwt(bwt_t *bwt)
|
||||
{
|
||||
FILE *fmt_out = fopen("fmt.txt", "w");
|
||||
// FILE *fmt_out = fopen("fmt.txt", "w");
|
||||
FMTIndex *fmt = (FMTIndex *)calloc(1, sizeof(FMTIndex));
|
||||
fmt_gen_cnt_table(fmt);
|
||||
|
||||
|
|
@ -334,6 +339,7 @@ void fmt_e2_2occ4(const FMTIndex *fmt, bwtint_t k, bwtint_t l, int b,
|
|||
// fmt_e2_occ4(fmt, k, b, cntk1, cntk2);
|
||||
// fmt_e2_occ4(fmt, l, b, cntl1, cntl2);
|
||||
// return;
|
||||
//double tm_t = realtime();
|
||||
bwtint_t _k, _l;
|
||||
_k = k - (k >= fmt->primary); // 换算成了seq的行
|
||||
_l = l - (l >= fmt->primary);
|
||||
|
|
@ -412,6 +418,8 @@ void fmt_e2_2occ4(const FMTIndex *fmt, bwtint_t k, bwtint_t l, int b,
|
|||
// cout << "fmt-occ: " << l << '\t' << cntl1[0] << '\t' << cntl1[1] << '\t' << cntl1[2] << '\t' << cntl1[3] << endl;
|
||||
// cout << "fmt-occ-2: " << l << '\t' << cntl2[0] << '\t' << cntl2[1] << '\t' << cntl2[2] << '\t' << cntl2[3] << endl;
|
||||
}
|
||||
//t6 += realtime() - tm_t;
|
||||
// f2 += 1;
|
||||
}
|
||||
|
||||
// 扩展一个碱基,计算bwt str中各个碱基的occ
|
||||
|
|
@ -445,6 +453,7 @@ void fmt_e1_occ4(const FMTIndex *fmt, bwtint_t k, uint32_t cnt[4])
|
|||
// 对k行和l行同时计算bwt str的occ,如果k和l落在同一个interval区间,可以减少一些计算量和访存
|
||||
void fmt_e1_2occ4(const FMTIndex *fmt, bwtint_t k, bwtint_t l, uint32_t cntk[4], uint32_t cntl[4])
|
||||
{
|
||||
//double tm_t = realtime();
|
||||
bwtint_t _k, _l;
|
||||
_k = k - (k >= fmt->primary); // 换算成了seq的行
|
||||
_l = l - (l >= fmt->primary);
|
||||
|
|
@ -456,7 +465,7 @@ void fmt_e1_2occ4(const FMTIndex *fmt, bwtint_t k, bwtint_t l, uint32_t cntk[4],
|
|||
else
|
||||
{
|
||||
uint32_t x1, y1;
|
||||
uint32_t *p, tmp, *endk, *endl;
|
||||
uint32_t *p, tmp, *ek, *el;
|
||||
|
||||
k -= (k >= fmt->primary); // because $ is not in bwt
|
||||
l -= (l >= fmt->primary);
|
||||
|
|
@ -465,21 +474,21 @@ void fmt_e1_2occ4(const FMTIndex *fmt, bwtint_t k, bwtint_t l, uint32_t cntk[4],
|
|||
memcpy(cntl, p, 4 * sizeof(uint32_t));
|
||||
p += 20;
|
||||
// prepare cntk[]
|
||||
endk = p + ((k >> 3) - ((k & ~OCC_INTV_MASK) >> 3));
|
||||
endl = p + ((l >> 3) - ((l & ~OCC_INTV_MASK) >> 3));
|
||||
for (x1 = 0; p < endk; ++p)
|
||||
ek = p + ((k >> 3) - ((k & ~OCC_INTV_MASK) >> 3));
|
||||
el = p + ((l >> 3) - ((l & ~OCC_INTV_MASK) >> 3));
|
||||
for (x1 = 0; p < ek; ++p)
|
||||
{
|
||||
x1 += __fmt_occ_e2_aux4(fmt, 4, *p);
|
||||
}
|
||||
y1 = x1;
|
||||
tmp = *p & ~((1U << ((~k & 7) << 2)) - 1);
|
||||
x1 += __fmt_occ_e2_aux4(fmt, 4, tmp) - (~k & 7);
|
||||
for (; p < endl; ++p)
|
||||
for (; p < el; ++p)
|
||||
{
|
||||
y1 += __fmt_occ_e2_aux4(fmt, 4, *p);
|
||||
}
|
||||
tmp = *p & ~((1U << ((~k & 7) << 2)) - 1);
|
||||
y1 += __fmt_occ_e2_aux4(fmt, 4, tmp) - (~k & 7);
|
||||
tmp = *p & ~((1U << ((~l & 7) << 2)) - 1);
|
||||
y1 += __fmt_occ_e2_aux4(fmt, 4, tmp) - (~l & 7);
|
||||
|
||||
cntk[0] += x1 & 0xff;
|
||||
cntk[1] += x1 >> 8 & 0xff;
|
||||
|
|
@ -491,6 +500,7 @@ void fmt_e1_2occ4(const FMTIndex *fmt, bwtint_t k, bwtint_t l, uint32_t cntk[4],
|
|||
cntl[2] += y1 >> 16 & 0xff;
|
||||
cntl[3] += y1 >> 24;
|
||||
}
|
||||
//t11 += realtime() - tm_t;
|
||||
}
|
||||
|
||||
// 扩展一个碱基
|
||||
|
|
@ -499,8 +509,8 @@ void fmt_extend1(const FMTIndex *fmt, bwtintv_t *ik, bwtintv_t ok[4], int is_bac
|
|||
uint32_t tk[4], tl[4];
|
||||
int i;
|
||||
|
||||
fmt_e1_occ4(fmt, ik->x[!is_back] - 1, tk);
|
||||
fmt_e1_occ4(fmt, ik->x[!is_back] - 1 + ik->x[2], tl);
|
||||
// fmt_e1_occ4(fmt, ik->x[!is_back] - 1, tk);
|
||||
// fmt_e1_occ4(fmt, ik->x[!is_back] - 1 + ik->x[2], tl);
|
||||
fmt_e1_2occ4(fmt, ik->x[!is_back] - 1, ik->x[!is_back] - 1 + ik->x[2], tk, tl);
|
||||
for (i = 0; i != 4; ++i)
|
||||
{
|
||||
|
|
@ -508,8 +518,9 @@ void fmt_extend1(const FMTIndex *fmt, bwtintv_t *ik, bwtintv_t ok[4], int is_bac
|
|||
ok[i].x[2] = tl[i] - tk[i]; // 间隔
|
||||
}
|
||||
ok[3].x[is_back] = ik->x[is_back] + (ik->x[!is_back] <= fmt->primary && ik->x[!is_back] + ik->x[2] - 1 >= fmt->primary);
|
||||
for (i = 2; i >= b1; --i)
|
||||
ok[i].x[is_back] = ok[i + 1].x[is_back] + ok[i + 1].x[2];
|
||||
ok[2].x[is_back] = ok[3].x[is_back] + ok[3].x[2];
|
||||
ok[1].x[is_back] = ok[2].x[is_back] + ok[2].x[2];
|
||||
ok[0].x[is_back] = ok[1].x[is_back] + ok[1].x[2];
|
||||
*ik = ok[b1];
|
||||
}
|
||||
|
||||
|
|
@ -519,22 +530,18 @@ void fmt_extend2(const FMTIndex *fmt, bwtintv_t *ik, bwtintv_t ok[4], int is_bac
|
|||
uint32_t tk1[4], tl1[4], tk2[4], tl2[4];
|
||||
int i;
|
||||
|
||||
// fmt_e2_occ4(fmt, ik->x[!is_back] - 1, b1, tk1, tk2);
|
||||
// fmt_e2_occ4(fmt, ik->x[!is_back] - 1 + ik->x[2], b1, tl1, tl2);
|
||||
// tk表示在k行之前所有各个碱基累积出现次数,tl表示在l行之前的累积
|
||||
fmt_e2_2occ4(fmt, ik->x[!is_back] - 1, ik->x[!is_back] - 1 + ik->x[2], b1, tk1, tk2, tl1, tl2);
|
||||
|
||||
// cout << "k: " << tk1[0] << '\t' << tk1[1] << '\t' << tk1[2] << '\t' << tk1[3] << endl;
|
||||
// cout << "l: " << tl1[0] << '\t' << tl1[1] << '\t' << tl1[2] << '\t' << tl1[3] << endl;
|
||||
// cout << "k: " << tk2[0] << '\t' << tk2[1] << '\t' << tk2[2] << '\t' << tk2[3] << endl;
|
||||
// cout << "l: " << tl2[0] << '\t' << tl2[1] << '\t' << tl2[2] << '\t' << tl2[3] << endl;
|
||||
|
||||
// 这里是反向扩展
|
||||
for (i = 0; i != 4; ++i)
|
||||
{
|
||||
ok[i].x[!is_back] = fmt->L2[i] + 1 + tk2[i]; // 起始行位置,互补链
|
||||
ok[i].x[2] = tl2[i] - tk2[i]; // 间隔
|
||||
}
|
||||
ok[b2].x[!is_back] = fmt->L2[b2] + 1 + tk2[b2];
|
||||
ok[b2].x[2] = tl2[b2] - tk2[b2];
|
||||
|
||||
// 因为计算的是互补碱基,所以3对应着0,2对应1,下边是正向扩展
|
||||
|
||||
ok[3].x[is_back] = ik->x[is_back] + (ik->x[!is_back] <= fmt->primary && ik->x[!is_back] + ik->x[2] - 1 >= fmt->primary);
|
||||
|
|
@ -542,7 +549,7 @@ void fmt_extend2(const FMTIndex *fmt, bwtintv_t *ik, bwtintv_t ok[4], int is_bac
|
|||
ok[1].x[is_back] = ok[2].x[is_back] + tl1[2] - tk1[2];
|
||||
ok[0].x[is_back] = ok[1].x[is_back] + tl1[1] - tk1[1];
|
||||
|
||||
cout << "fmt-d: " << BASE[b1] << '\t' << ok[b1].x[is_back] << '\t' << ok[b1].x[2] << endl;
|
||||
// cout << "fmt-d: " << BASE[b1] << '\t' << ok[b1].x[is_back] << '\t' << ok[b1].x[2] << endl;
|
||||
|
||||
ok[3].x[is_back] = ok[b1].x[is_back] + (ok[b1].x[!is_back] <= fmt->primary && ok[b1].x[!is_back] + ok[b1].x[2] - 1 >= fmt->primary);
|
||||
ok[2].x[is_back] = ok[3].x[is_back] + ok[3].x[2];
|
||||
|
|
@ -552,39 +559,45 @@ void fmt_extend2(const FMTIndex *fmt, bwtintv_t *ik, bwtintv_t ok[4], int is_bac
|
|||
*ik = ok[b2];
|
||||
}
|
||||
// 利用fmt搜索seed,完整搜索,只需要单向搜索
|
||||
void fmt_search(FMTIndex *fmt, const string &q)
|
||||
bwtintv_t fmt_search(FMTIndex *fmt, const string &q)
|
||||
{
|
||||
bwtintv_t ik, ok[4];
|
||||
int i, j, c1, c2, x = 0;
|
||||
int i, c1, c2, x = 0;
|
||||
int qlen = (int)q.size();
|
||||
|
||||
fmt_set_intv(fmt, bval(q[x]), ik);
|
||||
ik.info = x + 1;
|
||||
cout << ik.x[0] << '\t' << ik.x[1] << '\t' << ik.x[2] << endl;
|
||||
// cout << ik.x[0] << '\t' << ik.x[1] << '\t' << ik.x[2] << endl;
|
||||
|
||||
for (i = x + 1; i + 1 < q.size(); i += 2)
|
||||
for (i = x + 1; i + 1 < qlen; i += 2)
|
||||
{
|
||||
if (bval(q[i]) < 4 && bval(q[i + 1]) < 4)
|
||||
{
|
||||
|
||||
c1 = cbval(q[i]);
|
||||
c2 = cbval(q[i + 1]);
|
||||
|
||||
//double tm_t = realtime();
|
||||
fmt_extend2(fmt, &ik, ok, 0, c1, c2);
|
||||
//t8 += realtime() - tm_t;
|
||||
ik.info = i + 1;
|
||||
cout << "fmt : " << ik.x[0] << '\t' << ik.x[1] << '\t' << ik.x[2] << endl;
|
||||
//cout << "fmt : " << ik.x[0] << '\t' << ik.x[1] << '\t' << ik.x[2] << endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i < q.size() && bval(q[i]) < 4)
|
||||
if (i < qlen && bval(q[i]) < 4)
|
||||
{ // 最后一次扩展
|
||||
c1 = cbval(q[i]);
|
||||
//double tm_t = realtime();
|
||||
fmt_extend1(fmt, &ik, ok, 0, c1);
|
||||
//t9 += realtime() - tm_t;
|
||||
ik.info = i + 1;
|
||||
cout << "fmt : " << ik.x[0] << '\t' << ik.x[1] << '\t' << ik.x[2] << endl;
|
||||
// cout << "fmt : " << ik.x[0] << '\t' << ik.x[1] << '\t' << ik.x[2] << endl;
|
||||
}
|
||||
// cout << ik.x[0] << '\t' << ik.x[1] << '\t' << ik.x[2] << endl;
|
||||
return ik;
|
||||
}
|
||||
|
||||
int main_fmtidx(int argc, char **argv)
|
||||
|
|
@ -596,37 +609,67 @@ int main_fmtidx(int argc, char **argv)
|
|||
//create_bwt_mtx(seq);
|
||||
//cout << seq << endl;
|
||||
|
||||
bwt_t *bwt = restore_bwt(argv[1]); // 读取bwt原始字符串(带ACGT总的累积量)
|
||||
// create_interval_occ_bwt(bwt); // 根据bwt字符串创建包含interval occ的bwt(128碱基+ACGT累积量)
|
||||
cout << "L2: " << bwt->L2[0] << '\t' << bwt->L2[1] << '\t' << bwt->L2[2] << '\t'
|
||||
<< bwt->L2[3] << '\t' << bwt->L2[4] << endl;
|
||||
string bwt_idx = string(argv[1]) + ".bwt";
|
||||
string fmt_idx = string(argv[1]) + ".fmt";
|
||||
|
||||
string s = "AACCCTAA";
|
||||
bwt_t *bwt = restore_bwt(bwt_idx.c_str());
|
||||
FMTIndex *fmt = restore_fmt(fmt_idx.c_str());
|
||||
|
||||
vector<string> seed_arr(10000000);
|
||||
srand(time(NULL));
|
||||
s = generate_rand_seq(10);
|
||||
cout << "seq: " << s << endl;
|
||||
// s = "TTC";
|
||||
|
||||
bwt_search(bwt, s);
|
||||
bwt_search2(bwt, s);
|
||||
t1 = realtime();
|
||||
for (int i = 0; i < (int)seed_arr.size(); ++i)
|
||||
seed_arr[i] = generate_rand_seq(13);
|
||||
t1 = realtime() - t1;
|
||||
cout << "[time gen seed:] " << t1 << "s" << endl;
|
||||
|
||||
// t2 = realtime();
|
||||
// for (int i = 0; i < seed_arr.size(); ++i)
|
||||
// bwt_search(bwt, seed_arr[i]);
|
||||
// t2 = realtime() - t2;
|
||||
// cout << "[time bwt search:] " << t2 << "s" << endl;
|
||||
|
||||
t4 = realtime();
|
||||
for (int i = 0; i < (int)seed_arr.size(); ++i)
|
||||
{
|
||||
// bwtintv_t p1 =
|
||||
bwt_search2(bwt, seed_arr[i]);
|
||||
// bwtintv_t p2 = fmt_search(fmt, seed_arr[i]);
|
||||
// if (p1.x[1] != p2.x[1] || p1.x[2] != p2.x[2] || p1.x[3] != p2.x[3])
|
||||
// cout << seed_arr[i] << endl
|
||||
// << p1.x[1] << ' ' << p1.x[2] << ' ' << p1.x[3] << endl
|
||||
// << p2.x[1] << ' ' << p2.x[2] << ' ' << p2.x[3] << endl;
|
||||
}
|
||||
t4 = realtime() - t4;
|
||||
cout << "[time bwt search 2:] " << t4 << "s" << endl;
|
||||
|
||||
t3 = realtime();
|
||||
for (int i = 0; i < (int)seed_arr.size(); ++i)
|
||||
fmt_search(fmt, seed_arr[i]);
|
||||
t3 = realtime() - t3;
|
||||
cout << "[time fmt search:] " << t3 << "s" << endl;
|
||||
|
||||
cout << "bwt occ: " << t5 << "s; " << t7 << '\t' << t10 << endl;
|
||||
cout << "fmt occ: " << t6 << "s; " << t11 << '\t' << t8 << '\t' << t9 << endl;
|
||||
// bwt_search(bwt, s);
|
||||
// bwt_search2(bwt, s);
|
||||
// for (int i = 0; i < 120; ++i)
|
||||
// {
|
||||
// cout << i << '\t' << bwt_B0(bwt, i) << endl;
|
||||
// }
|
||||
// TGGGAT
|
||||
FMTIndex *fmt = create_fmt_from_bwt(bwt);
|
||||
dump_fmt("ref.fmt", fmt);
|
||||
// FMTIndex *fmt = create_fmt_from_bwt(bwt);
|
||||
// dump_fmt("ref.fmt", fmt);
|
||||
// FMTIndex *fmt = restore_fmt("tiny.fmt");
|
||||
|
||||
fmt_search(fmt, s);
|
||||
//fmt_search(fmt, s);
|
||||
// cout << bwt->bwt_size << endl;
|
||||
// cout << bwt->seq_len << endl;
|
||||
cout << "sec_: " << fmt->sec_bcp << '\t' << fmt->sec_primary << endl;
|
||||
//cout << "sec_: " << fmt->sec_bcp << '\t' << fmt->sec_primary << endl;
|
||||
|
||||
uint8_t b8 = 2 << 4 | 2;
|
||||
cout << "AGAG: " << fmt->cnt_table[2][b8] << endl;
|
||||
cout << (((b8 >> 6) == 0 && (b8 >> 4 & 3) == 2) + ((b8 >> 2 & 3) == 0 && (b8 & 3) == 2)) << endl;
|
||||
//uint8_t b8 = 2 << 4 | 2;
|
||||
//cout << "AGAG: " << fmt->cnt_table[2][b8] << endl;
|
||||
//cout << (((b8 >> 6) == 0 && (b8 >> 4 & 3) == 2) + ((b8 >> 2 & 3) == 0 && (b8 & 3) == 2)) << endl;
|
||||
return 0;
|
||||
}
|
||||
Loading…
Reference in New Issue