diff --git a/bwt.cpp b/bwt.cpp index 942d73f..939d53d 100644 --- a/bwt.cpp +++ b/bwt.cpp @@ -251,9 +251,25 @@ bwtintv_t bwt_search(bwt_t *bwt, const string &q) bwtintv_t ik, ok[4]; int i, c, x = 0; + //double tt = realtime(); + bwt_set_intv(bwt, bval(q[x]), ik); ik.info = x + 1; + for (i = x + 1; i < 12; ++i) + { + if (bval(q[i]) < 4) + { + c = cbval(q[i]); + bwt_extend(bwt, &ik, ok, 0); + ik = ok[c]; + ik.info = i + 1; + } + } + //t5 += realtime() - tt; + x = 11; + + //tt = realtime(); for (i = x + 1; i < (int)q.size(); ++i) { if (bval(q[i]) < 4) @@ -264,6 +280,7 @@ bwtintv_t bwt_search(bwt_t *bwt, const string &q) ik.info = i + 1; } } + //t6 += realtime() - tt; return ik; } diff --git a/fmt_index.cpp b/fmt_index.cpp index c90ae37..2606d65 100644 --- a/fmt_index.cpp +++ b/fmt_index.cpp @@ -514,9 +514,28 @@ bwtintv_t fmt_search(FMTIndex *fmt, const string &q) int i, c1, c2, x = 0; int qlen = (int)q.size(); + //double tt = realtime(); fmt_set_intv(fmt, bval(q[x]), ik); ik.info = x + 1; + for (i = x + 1; i + 1 < 12; i += 2) + { + if (bval(q[i]) < 4 && bval(q[i + 1]) < 4) + { + c1 = cbval(q[i]); + c2 = cbval(q[i + 1]); + fmt_extend2(fmt, &ik, &ok, 0, c1, c2); + ik = ok; + ik.info = i + 1; + } + else + { + break; + } + } + x = 11; + //t3 += realtime() - tt; + //tt = realtime(); // 每次扩展两个碱基 for (i = x + 1; i + 1 < qlen; i += 2) { @@ -540,6 +559,7 @@ bwtintv_t fmt_search(FMTIndex *fmt, const string &q) ik = ok; ik.info = i + 1; } + //t4 += realtime() - tt; return ik; } @@ -638,15 +658,15 @@ bwtintv_t fmt_search_use_kmer(FMTIndex *fmt, const string &q) // 先利用kmer进行索引查询 uint32_t qbit = 0; x = min(KMER_LEN, qlen); - double tt = realtime(); + //double tt = realtime(); for (i = 0; i < x; ++i) { qbit |= bval(q[i]) << ((KMER_LEN - 1 - i) << 1); } kmer_getval_at(&fmt->kmer_entry[qbit], &ik, x - 1); - t1 += realtime() - tt; + //t1 += realtime() - tt; - tt = realtime(); + //tt = realtime(); // 每次扩展两个碱基 for (i = x; i + 1 < qlen; i += 2) { @@ -670,7 +690,7 @@ bwtintv_t fmt_search_use_kmer(FMTIndex *fmt, const string &q) ik = ok; ik.info = i + 1; } - t2 += realtime() - tt; + //t2 += realtime() - tt; return ik; } @@ -778,7 +798,7 @@ int main_fmtidx(int argc, char **argv) // 生成随机序列进行测试 int seq_size = 10000000; // int seq_size = 1; - int seq_len = 19; + int seq_len = 23; vector seq_arr(seq_size); // seq_arr[0] = "GCGATACTAAGA"; // seq_arr[0] = "GAGAGCTGTTCCCGTGTTTTCCATGGTTT"; @@ -812,6 +832,8 @@ int main_fmtidx(int argc, char **argv) cout << "[time fmt kmer search:] " << time_fmt_kmer_search << "s" << endl; cout << "t1: " << t1 << "s; t2: " << t2 << "s" << endl; + cout << "t3: " << t3 << "s; t4: " << t4 << "s" << endl; + cout << "t5: " << t5 << "s; t4: " << t6 << "s" << endl; #endif diff --git a/fmt_index.h b/fmt_index.h index 68cfceb..07114c7 100644 --- a/fmt_index.h +++ b/fmt_index.h @@ -5,7 +5,7 @@ #define FMT_OCC_INTERVAL (1LL << FMT_OCC_INTV_SHIFT) #define FMT_OCC_INTV_MASK (FMT_OCC_INTERVAL - 1) -#define FMT_MID_INTV_SHIFT 4 +#define FMT_MID_INTV_SHIFT 5 #define FMT_MID_INTERVAL (1LL << FMT_MID_INTV_SHIFT) #define FMT_MID_INTV_MASK (FMT_MID_INTERVAL - 1) @@ -38,7 +38,7 @@ #define __fmt_mid_sum(x) \ ((x) >> 24 & 0xff) + ((x) >> 16 & 0xff) + ((x) >> 8 & 0xff) + ((x) & 0xff) -#define KMER_LEN 10 +#define KMER_LEN 12 #define KMER_ARR_SIZE ((1 << (KMER_LEN << 1))) // 用来保存kmer对应的fmt的位置信息 struct KmerEntry