bwa_perf/fmt_index.h

34 lines
1.7 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#ifndef FMT_INDEX_H_
#define FMT_INDEX_H_
#include "bwt.h"
// 获取碱基c待查找序列的首个碱基和对应的互补碱基对应的行以及间隔
#define fmt_set_intv(fmt, c, ik) ((ik).x[0] = (fmt)->L2[(int)(c)] + 1, (ik).x[2] = (fmt)->L2[(int)(c) + 1] - (fmt)->L2[(int)(c)], (ik).x[1] = (fmt)->L2[3 - (c)] + 1, (ik).info = 0)
// k行bwt str行不包含$对应的check point occ数据起始地址小于k且是OCC_INTERVAL的整数倍
#define fmt_occ_intv(b, k) ((b)->bwt + (k) / OCC_INTERVAL * (OCC_INTERVAL / 8 + 20))
// 字节val中包含bwt base为b的pre-bwt中T G C A按顺序保存在32位整数里每个占8bit的数量
#define __fmt_occ_e2_aux4(fmt, b, val) \
((fmt)->cnt_table[(b)][(val) & 0xff] + (fmt)->cnt_table[b][(val) >> 8 & 0xff] + (fmt)->cnt_table[b][(val) >> 16 & 0xff] + (fmt)->cnt_table[b][(val) >> 24])
// fm-index, extend twice in one search step (one memory access)
struct FMTIndex
{
bwtint_t primary; // S^{-1}(0), or the primary index of BWT
bwtint_t sec_primary; // second primary line
bwtint_t L2[5]; // C(), cumulative count
bwtint_t seq_len; // sequence length
bwtint_t bwt_size; // size of bwt, about seq_len/4
uint32_t *bwt; // BWT
// occurance array, separated to two parts
uint32_t cnt_table[5][256]; // 4对应原来的cnt_table0,1,2,3,分别对应该碱基的扩展
uint8_t sec_bcp; // base couple for sec primary line, AA=>0, AC=>1 ... TT=>15
uint8_t first_base; // 序列的第一个碱基2bit的int类型0,1,2,3
uint8_t last_base; // dollar转换成的base
// suffix array
int sa_intv;
bwtint_t n_sa;
uint8_t *sa;
};
#endif