hyb-align/hyb_utils.c

818 lines
49 KiB
C
Raw Normal View History

2025-11-16 01:37:21 +08:00
#include <assert.h>
#include <emmintrin.h>
#include <sys/stat.h>
#include "hyb_idx.h"
#include "share_mem.h"
#include "utils.h"
/////////////////////////////////////////////////////
// 使用hybrid-index的工具函数
// 加载hybrid index
HybridIndex* load_hybrid_idx(const char* prefix) {
HybridIndex* hyb = NULL;
hyb = (HybridIndex*)calloc(1, sizeof(HybridIndex));
// return hyb;
int prefix_len = strlen(prefix);
char* fn = (char*)malloc(prefix_len + 30);
FILE* fp = NULL;
struct stat st;
#define __load_hybrid_idx_code(suffix, data) \
sprintf(fn, "%s%s", prefix, suffix); \
err_check_true(stat(fn, &st), 0); \
fp = xopen(fn, "r"); \
data = (uint8_t*)malloc(st.st_size); \
err_fread_noeof(data, 1, st.st_size, fp); \
err_fclose(fp);
// load ref-len
sprintf(fn, "%s.ref-len", prefix);
// fprintf(stderr, "fn: %s\n", fn);
fp = xopen(fn, "r");
err_check_false(fscanf(fp, "%ld", &hyb->ref_len), EOF);
err_fclose(fp);
// fprintf(stderr, "ref-len: %ld\n", hyb->ref_len);
char* kmer_suffix = ".hybrid.kmer";
char* data_suffix = ".hybrid.data";
// char *kmer_suffix = ".hybrid.learned.kmer";
// char *data_suffix = ".hybrid.learned.data";
#if 0
// shm_clear_hyb();
// load 2-bit ref
__load_hybrid_idx_code(".hybrid.pac", hyb->ref_bits);
// load hyb byte-sa
__load_hybrid_idx_code(".hybrid.sa", hyb->sa);
// load hyb kmer data
__load_hybrid_idx_code(kmer_suffix, hyb->kmer_data);
// load hyb index data
__load_hybrid_idx_code(data_suffix, hyb->index_data);
#else
shm_keep_hyb(prefix);
// load 2-bit ref
hyb->ref_bits = (uint8_t*)shm_get_index(strcat(strcpy(fn, prefix), ".hybrid.pac"));
// load hyb byte-sa
hyb->sa = (uint8_t*)shm_get_index(strcat(strcpy(fn, prefix), ".hybrid.sa"));
// load hyb kmer data
hyb->kmer_data = (uint8_t*)shm_get_index(strcat(strcpy(fn, prefix), kmer_suffix));
// load hyb index data
hyb->index_data = (uint8_t*)shm_get_index(strcat(strcpy(fn, prefix), data_suffix));
#endif
// fprintf(stderr, "文件大小为: %ld 字节, %.2f GB\n", st.st_size, (double)st.st_size / (1024 * 1024 * 1024));
return hyb;
}
// 创建正向反向互补bits
void create_seq_fb_bits(uint8_t* bs, int len, uint8_t* fs, uint8_t* rs) {
if (len > 0) {
uint8_t fbp = 0, rbp = 0;
int i = 0, j = 0, idxf = 0, idxr = 0;
for (; i + 3 < len; i += 4) {
fbp = (bs[i] & 3) | (bs[i + 1] & 3) << 2 | (bs[i + 2] & 3) << 4 | (bs[i + 3] & 3) << 6;
rbp = (3 - (bs[len - i - 1] & 3)) | (3 - (bs[len - i - 2] & 3)) << 2 | (3 - (bs[len - i - 3] & 3)) << 4 |
(3 - (bs[len - i - 4] & 3)) << 6;
fs[idxf++] = fbp;
rs[idxr++] = rbp;
}
fbp = 0;
rbp = 0;
for (; i < len; ++i, ++j) {
fbp |= (bs[i] & 3) << j * 2;
rbp |= (3 - (bs[len - i - 1] & 3)) << j * 2;
}
fs[idxf++] = fbp;
rs[idxr++] = rbp;
}
}
// 将seq和ref正向比对看最多有多少匹配的bpseq和ref都是2-bit编码的
inline int forward_match_len(uint8_t* seq, int64_t seq_pos, int64_t seq_end, uint8_t* ref, int64_t ref_pos, int64_t ref_len) {
if (seq_pos >= seq_end)
return 0;
int64_t max_match_len = MIN(ref_len - ref_pos, seq_end - seq_pos);
int ref_odd = ref_pos & 3;
int seq_odd = seq_pos & 3;
int64_t i = seq_pos;
int64_t j = ref_pos;
int match_len = 0;
/////////////
#define __forward_match_code(first_len, first_ref, first_seq, ref_bits, seq_bits) \
uint64_t bp32ref = first_ref; \
uint64_t bp32seq = first_seq; \
uint64_t cmp = bp32ref ^ bp32seq; \
if (cmp > 0) \
return MIN(__builtin_ctzll(cmp) >> 1, max_match_len); \
int first_cmp_len = first_len; \
match_len = MIN(first_cmp_len, max_match_len); \
i += first_cmp_len; \
j += first_cmp_len; \
seq_odd = i & 3; \
ref_odd = j & 3; \
for (; i + 31 < seq_end; i += 32, j += 32, match_len += 32) { \
bp32ref = ref_bits; \
bp32seq = seq_bits; \
cmp = bp32ref ^ bp32seq; \
if (cmp > 0) \
return MIN(match_len + (__builtin_ctzll(cmp) >> 1), max_match_len); \
} \
if (i < seq_end) { \
bp32ref = ref_bits; \
bp32seq = seq_bits; \
cmp = bp32ref ^ bp32seq; \
if (cmp > 0) \
return MIN(match_len + MIN(__builtin_ctzll(cmp) >> 1, seq_end - i), max_match_len); \
match_len = max_match_len; /*match_len += seq_end - i;*/ \
}
/////////
if (seq_odd < ref_odd) { // 调整到ref的整数字节
__forward_match_code(32 - ref_odd, (*(uint64_t*)&ref[j >> 2]) >> (ref_odd << 1),
(*(uint64_t*)&seq[i >> 2]) << ((ref_odd - seq_odd) << 1) >> (ref_odd << 1),
(*(uint64_t*)&ref[j >> 2]),
seq[i >> 2] >> (seq_odd << 1) | (*(uint64_t*)&seq[(i >> 2) + 1]) << ((4 - seq_odd) << 1));
} else if (seq_odd > ref_odd) { // 调整到seq的整数字节
__forward_match_code(32 - seq_odd, (*(uint64_t*)&ref[j >> 2]) << ((seq_odd - ref_odd) << 1) >> (seq_odd << 1),
(*(uint64_t*)&seq[i >> 2]) >> (seq_odd << 1),
ref[j >> 2] >> (ref_odd << 1) | (*(uint64_t*)&ref[(j >> 2) + 1]) << ((4 - ref_odd) << 1),
(*(uint64_t*)&seq[i >> 2]));
} else { // 可以调整到相同的64位地址进行比较了
__forward_match_code(32 - seq_odd, (*(uint64_t*)&ref[j >> 2]) >> (seq_odd << 1),
(*(uint64_t*)&seq[i >> 2]) >> (seq_odd << 1), (*(uint64_t*)&ref[j >> 2]),
(*(uint64_t*)&seq[i >> 2]));
}
return MIN(match_len, max_match_len);
}
// 将seq和ref反向比对看最多有多少匹配的bp
inline int backward_match_len(uint8_t* seq, int64_t seq_pos, int64_t seq_start, uint8_t* ref, int64_t ref_pos) {
if (seq_pos < seq_start)
return 0;
int64_t max_match_len = MIN(ref_pos + 1, seq_pos - seq_start + 1);
int64_t i = seq_pos;
int64_t j = ref_pos;
int seq_odd = 3 - (i & 3);
int ref_odd = 3 - (j & 3);
int match_len = 0;
/////////////
#define __backward_tail_code(last_code) \
int ext_bp = (7 - (i >> 2)) << 2; \
uint64_t bp32ref = *(uint64_t*)(ref + (j >> 2) - 8) >> ((4 - ref_odd) << 1) | (uint64_t)ref[j >> 2] \
<< ((ref_odd + 28) << 1); \
uint64_t bp32seq = (*(uint64_t*)seq) << ((seq_odd + ext_bp) << 1); \
uint64_t cmp = bp32ref ^ bp32seq; \
if (cmp > 0) \
return MIN(match_len + MIN(__builtin_clzll(cmp) >> 1, (int)i + 1 - seq_start), max_match_len); \
last_code
#define __backward_match_code(first_len, first_ref, first_seq, ref_bits, seq_bits) \
uint64_t bp32ref = first_ref; \
uint64_t bp32seq = first_seq; \
uint64_t cmp = bp32ref ^ bp32seq; \
if (cmp > 0) \
return MIN(MIN(__builtin_clzll(cmp) >> 1, (int)i + 1 - seq_start), max_match_len); \
int first_cmp_len = first_len; \
match_len = MIN(first_cmp_len, max_match_len); \
i -= first_cmp_len; \
j -= first_cmp_len; \
seq_odd = 3 - (i & 3); \
ref_odd = 3 - (j & 3); \
for (; i - 31 >= 0; i -= 32, j -= 32, match_len += 32) { \
bp32ref = ref_bits; \
bp32seq = seq_bits; \
cmp = bp32ref ^ bp32seq; \
if (cmp > 0) \
return MIN(match_len + (__builtin_clzll(cmp) >> 1), max_match_len); \
} \
if (i >= seq_start) { \
__backward_tail_code(match_len = max_match_len); \
}
////////////
if (i < 32) { // 只需要一次比较
__backward_tail_code(return max_match_len);
}
if (seq_odd < ref_odd) { // 调整到ref的整数字节
__backward_match_code(
32 - ref_odd, (*(uint64_t*)&ref[(j >> 2) - 7]) << (ref_odd << 1),
(*(uint64_t*)&seq[(i >> 2) - 7]) >> ((ref_odd - seq_odd) << 1) << (ref_odd << 1), (*(uint64_t*)&ref[(j >> 2) - 7]),
(*(uint64_t*)&seq[(i >> 2) - 8] >> ((4 - seq_odd) << 1)) | ((uint64_t)seq[(i >> 2)] << ((seq_odd + 28) << 1)));
} else if (seq_odd > ref_odd) { // 调整到seq的整数字节
__backward_match_code(
32 - seq_odd, (*(uint64_t*)&ref[(j >> 2) - 7]) >> ((seq_odd - ref_odd) << 1) << (seq_odd << 1),
(*(uint64_t*)&seq[(i >> 2) - 7]) << (seq_odd << 1),
(*(uint64_t*)&ref[(j >> 2) - 8] >> ((4 - ref_odd) << 1)) | ((uint64_t)ref[(j >> 2)] << ((ref_odd + 28) << 1)),
(*(uint64_t*)&seq[(i >> 2) - 7]));
} else { // 可以调整到相同的64位地址进行比较了
__backward_match_code(32 - seq_odd, (*(uint64_t*)&ref[(j >> 2) - 7]) << (seq_odd << 1),
(*(uint64_t*)&seq[(i >> 2) - 7]) << (seq_odd << 1), (*(uint64_t*)&ref[(j >> 2) - 7]),
(*(uint64_t*)&seq[(i >> 2) - 7]));
}
return MIN(match_len, max_match_len);
}
// 根据sa的行获取对应的ref position小端模式
uint64_t hyb_sa_to_ref_pos(uint8_t* sa_arr, uint64_t row) {
const uint64_t start_byte = ((row << 5) + row) >> 3; // 存储这个sa数据的起始字节
uint64_t val = *(uint64_t*)(sa_arr + start_byte);
val = (val >> (row & 7)) & 8589934591ULL; // 33-bits mask
return val;
}
#define __parse_node_start_no_addr(idata) \
*cmp_ref = 1; \
uint32_t seq_pos = *seq_pos_p; \
uint8_t header = *idata; \
idata += 1; \
uint8_t node_type = (header >> 6) & 3; \
uint8_t hits_neq = header >> 5 & 1; \
uint32_t hits_bytes = ((header >> 3) & 3) + 1; \
uint32_t off_bytes = header & 7; \
uint32_t child_ptr_bytes = hits_bytes + off_bytes;
// 解析hyb node初始化变量信息
#define __parse_node_start_code(idata) \
uint8_t* addr = NULL; \
__parse_node_start_no_addr(idata)
// 解析单一碱基序列节点
#define __parse_path_node_code(path_len) \
uint32_t path_len = (header & 1); \
path_len = path_len << 8 | *idata; \
idata += 1; \
int match_len = forward_match_len(seq_bits, seq_pos, seq_end, idata, 0, path_len); \
*seq_pos_p = seq_pos + match_len; \
if (match_len == (int)path_len) { \
addr = idata + (((path_len << 1) + 7) >> 3); \
if (hits_neq) { \
*sa_start_p += 1; \
*hits_p -= 1; \
} \
} else \
*cmp_ref = 0;
// 解析正常hyb节点
#define __parse_child_node_code(kmer_len, mark_bytes, int_type, kmer_code, bits_count, one) \
uint8_t kmer = kmer_code; \
int_type mark = *(int_type*)idata; \
int_type child_num = mark & (one << kmer); \
if (child_num) { \
*seq_pos_p += kmer_len; \
uint32_t nth_child = bits_count(mark & ((one << kmer) - 1)); \
uint8_t has_next_child = bits_count(mark >> kmer >> 1); \
if (*seq_pos_p >= HYB_MAX_SEQ_LEN) { \
*cmp_ref = 0; \
} \
if (off_bytes == HYB_LEAF_NODE) { \
*hits_p -= nth_child + hits_neq + has_next_child; \
*sa_start_p += nth_child + hits_neq; \
} else { \
if (nth_child == 0) { \
idata += mark_bytes; \
uint32_t hits_start = *(uint32_t*)idata & ga_hybHitsMask[hits_bytes]; \
addr = idata + has_next_child * child_ptr_bytes; \
*hits_p = hits_start - hits_neq; \
*sa_start_p += hits_neq; \
} else { \
idata += mark_bytes + (nth_child - 1) * child_ptr_bytes; \
uint32_t hits_start = *(uint32_t*)idata & ga_hybHitsMask[hits_bytes]; \
uint32_t child_offset = *(uint32_t*)(idata + hits_bytes) & ga_hybOffMask[off_bytes]; \
addr = idata + child_offset + (has_next_child + 1) * child_ptr_bytes; \
if (has_next_child) { \
*hits_p = (*(uint32_t*)(idata + child_ptr_bytes) & ga_hybHitsMask[hits_bytes]) - hits_start; \
} else { \
*hits_p -= hits_start + hits_neq; \
} \
*sa_start_p += hits_start; \
} \
} \
} else { \
*cmp_ref = 0; \
}
// 当节点不能完全匹配时候,检查是否能匹配该节点包含的部分碱基序列
#define __parse_part_node_code(kmer_len, mark_bytes, int_type, kmer_base_code, bits_range, bits_count, one) \
uint8_t kmer_base = kmer_base_code; \
int_type mark = *(int_type*)idata; \
int_type kmer_mask = ((one << bits_range) - 1) << kmer_base; \
int_type child_num = mark & kmer_mask; \
if (child_num) { \
*seq_pos_p += kmer_len; \
int_type kmer_pre_mask = (one << kmer_base) - 1; \
uint32_t nth_child = bits_count(mark & kmer_pre_mask); \
uint8_t has_next_child = bits_count(mark >> kmer_base >> bits_range); \
if (off_bytes == HYB_LEAF_NODE) { \
*hits_p -= nth_child + hits_neq + has_next_child; \
*sa_start_p += nth_child + hits_neq; \
} else { \
if (nth_child == 0) { \
idata += mark_bytes; \
uint32_t hits_start = hits_neq; \
if (has_next_child) { \
child_num = bits_count(child_num); \
*hits_p = \
(*(uint32_t*)(idata + (child_num - 1) * child_ptr_bytes) & ga_hybHitsMask[hits_bytes]) - hits_start; \
} else { \
*hits_p -= hits_start; \
} \
*sa_start_p += hits_start; \
} else { \
idata += mark_bytes + (nth_child - 1) * child_ptr_bytes; \
uint32_t hits_start = *(uint32_t*)idata & ga_hybHitsMask[hits_bytes]; \
*sa_start_p += hits_start; \
if (has_next_child) { \
child_num = bits_count(child_num); \
*hits_p = (*(uint32_t*)(idata + child_num * child_ptr_bytes) & ga_hybHitsMask[hits_bytes]) - hits_start; \
} else { \
*hits_p -= hits_start + hits_neq; \
} \
} \
} \
} \
*cmp_ref = 0;
// 解析节点主要代码
#define __parse_hyb_node_code(return_code) \
if (node_type == HYB_BP_PATH) { \
__parse_path_node_code(path_len); \
} else if (node_type == HYB_BP_1) { \
__parse_child_node_code(1, 1, uint8_t, seq_bp[seq_pos], __builtin_popcount, 1); \
} else if (node_type == HYB_BP_2) { \
if (seq_pos + 1 < seq_end) { \
__parse_child_node_code(2, 2, uint16_t, seq_bp[seq_pos] << 2 | seq_bp[seq_pos + 1], __builtin_popcount, 1); \
if (!child_num) { \
__parse_part_node_code(1, 2, uint16_t, seq_bp[seq_pos] << 2, 4, __builtin_popcount, 1); \
} \
} else { \
__parse_part_node_code(1, 2, uint16_t, seq_bp[seq_pos] << 2, 4, __builtin_popcount, 1); \
} \
} else { \
if (seq_pos + 2 < seq_end) { \
__parse_child_node_code(3, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2 | seq_bp[seq_pos + 2], \
__builtin_popcountll, 1ULL); \
if (!child_num) { \
__parse_part_node_code(2, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2, 4, \
__builtin_popcountll, 1ULL); \
if (!child_num) { \
__parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL); \
} \
} \
} else if (seq_pos + 1 < seq_end) { \
__parse_part_node_code(2, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2, 4, __builtin_popcountll, \
1ULL); \
if (!child_num) { \
__parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL); \
} \
} else { \
__parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL); \
} \
} \
/*__builtin_prefetch(addr, 0, 3); */ \
return_code
/////////
// 解析第一个节点, 返回后续对应的节点地址
uint8_t* parse_first_hyb_node(uint8_t* idata, uint8_t* seq_bits, uint8_t* seq_bp, uint32_t seq_end, uint32_t* seq_pos_p,
uint64_t* sa_start_p, uint32_t* hits_p, uint8_t* cmp_ref, int tid) {
if (*seq_pos_p == seq_end)
return NULL;
__parse_node_start_code(idata);
*sa_start_p = (*(uint64_t*)idata) & HYB_NODE_SA_MASK;
idata += 5;
if (*hits_p > HYB_HIT_THRESH) { // 更新hits
*hits_p = *((uint32_t*)idata) & ga_hybHitsMask[hits_bytes]; // hits数量
idata += hits_bytes;
}
// __parse_hyb_node_code(return addr);
if (node_type == HYB_BP_PATH) {
__parse_path_node_code(path_len);
} else if (node_type == HYB_BP_1) {
__parse_child_node_code(1, 1, uint8_t, seq_bp[seq_pos], __builtin_popcount, 1);
} else if (node_type == HYB_BP_2) {
if (seq_pos + 1 < seq_end) {
__parse_child_node_code(2, 2, uint16_t, seq_bp[seq_pos] << 2 | seq_bp[seq_pos + 1], __builtin_popcount, 1);
if (!child_num) {
__parse_part_node_code(1, 2, uint16_t, seq_bp[seq_pos] << 2, 4, __builtin_popcount, 1);
}
} else {
__parse_part_node_code(1, 2, uint16_t, seq_bp[seq_pos] << 2, 4, __builtin_popcount, 1);
}
} else {
if (seq_pos + 2 < seq_end) {
//__parse_child_node_code(3, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2 | seq_bp[seq_pos + 2],
// __builtin_popcountll, 1ULL);
uint8_t kmer = seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2 | seq_bp[seq_pos + 2];
uint64_t mark = *(uint64_t*)idata;
uint64_t child_num = mark & (1ULL << kmer);
if (child_num) {
*seq_pos_p += 3;
uint32_t nth_child = __builtin_popcountll(mark & ((1ULL << kmer) - 1));
uint8_t has_next_child = __builtin_popcountll(mark >> kmer >> 1);
if (*seq_pos_p >= HYB_MAX_SEQ_LEN) {
*cmp_ref = 0;
}
if (off_bytes == HYB_LEAF_NODE) {
*hits_p -= nth_child + hits_neq + has_next_child;
*sa_start_p += nth_child + hits_neq;
} else {
if (nth_child == 0) {
idata += 8;
uint32_t hits_start = *(uint32_t*)idata & ga_hybHitsMask[hits_bytes];
addr = idata + has_next_child * child_ptr_bytes;
*hits_p = hits_start - hits_neq;
*sa_start_p += hits_neq;
} else {
idata += 8 + (nth_child - 1) * child_ptr_bytes;
uint32_t hits_start = *(uint32_t*)idata & ga_hybHitsMask[hits_bytes];
uint32_t child_offset = *(uint32_t*)(idata + hits_bytes) & ga_hybOffMask[off_bytes];
addr = idata + child_offset + (has_next_child + 1) * child_ptr_bytes;
if (has_next_child) {
*hits_p = (*(uint32_t*)(idata + child_ptr_bytes) & ga_hybHitsMask[hits_bytes]) - hits_start;
} else {
*hits_p -= hits_start + hits_neq;
}
*sa_start_p += hits_start;
}
}
} else {
*cmp_ref = 0;
}
if (!child_num) {
//__parse_part_node_code(2, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2, 4,
//__builtin_popcountll,
// 1ULL);
uint8_t kmer_base = seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2;
uint64_t mark = *(uint64_t*)idata;
uint64_t kmer_mask = ((1ULL << 4) - 1) << kmer_base;
uint64_t child_num = mark & kmer_mask;
if (child_num) {
*seq_pos_p += 2;
uint64_t kmer_pre_mask = (1ULL << kmer_base) - 1;
uint32_t nth_child = __builtin_popcountll(mark & kmer_pre_mask);
uint8_t has_next_child = __builtin_popcountll(mark >> kmer_base >> 4);
if (off_bytes == HYB_LEAF_NODE) {
*hits_p -= nth_child + hits_neq + has_next_child;
*sa_start_p += nth_child + hits_neq;
} else {
if (nth_child == 0) {
idata += 8;
uint32_t hits_start = hits_neq;
if (has_next_child) {
child_num = __builtin_popcountll(child_num);
*hits_p =
(*(uint32_t*)(idata + (child_num - 1) * child_ptr_bytes) & ga_hybHitsMask[hits_bytes]) -
hits_start;
} else {
*hits_p -= hits_start;
}
*sa_start_p += hits_start;
} else {
idata += 8 + (nth_child - 1) * child_ptr_bytes;
uint32_t hits_start = *(uint32_t*)idata & ga_hybHitsMask[hits_bytes];
*sa_start_p += hits_start;
if (has_next_child) {
child_num = __builtin_popcountll(child_num);
*hits_p = (*(uint32_t*)(idata + child_num * child_ptr_bytes) & ga_hybHitsMask[hits_bytes]) -
hits_start;
} else {
*hits_p -= hits_start + hits_neq;
}
}
}
}
*cmp_ref = 0;
if (!child_num) {
// __parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL);
uint8_t kmer_base = seq_bp[seq_pos] << 4;
uint64_t mark = *(uint64_t*)idata;
uint64_t kmer_mask = ((1ULL << 16) - 1) << kmer_base;
uint64_t child_num = mark & kmer_mask;
if (child_num) {
*seq_pos_p += 1;
uint64_t kmer_pre_mask = (1ULL << kmer_base) - 1;
uint32_t nth_child = __builtin_popcountll(mark & kmer_pre_mask);
uint8_t has_next_child = __builtin_popcountll(mark >> kmer_base >> 16);
if (off_bytes == HYB_LEAF_NODE) {
*hits_p -= nth_child + hits_neq + has_next_child;
*sa_start_p += nth_child + hits_neq;
} else {
if (nth_child == 0) {
idata += 8;
uint32_t hits_start = hits_neq;
if (has_next_child) {
child_num = __builtin_popcountll(child_num);
*hits_p =
(*(uint32_t*)(idata + (child_num - 1) * child_ptr_bytes) & ga_hybHitsMask[hits_bytes]) -
hits_start;
} else {
*hits_p -= hits_start;
}
*sa_start_p += hits_start;
} else {
idata += 8 + (nth_child - 1) * child_ptr_bytes;
uint32_t hits_start = *(uint32_t*)idata & ga_hybHitsMask[hits_bytes];
*sa_start_p += hits_start;
if (has_next_child) {
child_num = __builtin_popcountll(child_num);
*hits_p = (*(uint32_t*)(idata + child_num * child_ptr_bytes) & ga_hybHitsMask[hits_bytes]) -
hits_start;
} else {
*hits_p -= hits_start + hits_neq;
}
}
}
}
*cmp_ref = 0;
}
}
} else if (seq_pos + 1 < seq_end) {
__parse_part_node_code(2, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2, 4, __builtin_popcountll,
1ULL);
if (!child_num) {
__parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL);
}
} else {
__parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL);
}
}
return addr;
}
// 解析后续的正常节点
uint8_t* parse_one_hyb_node(uint8_t* idata, uint8_t* seq_bits, uint8_t* seq_bp, uint32_t seq_end, uint32_t* seq_pos_p,
uint64_t* sa_start_p, uint32_t* hits_p, uint8_t* cmp_ref, int tid) {
if (*seq_pos_p == seq_end)
return NULL;
__parse_node_start_code(idata);
// __parse_hyb_node_code(return addr);
if (node_type == HYB_BP_PATH) {
__parse_path_node_code(path_len);
} else if (node_type == HYB_BP_1) {
__parse_child_node_code(1, 1, uint8_t, seq_bp[seq_pos], __builtin_popcount, 1);
} else if (node_type == HYB_BP_2) {
if (seq_pos + 1 < seq_end) {
__parse_child_node_code(2, 2, uint16_t, seq_bp[seq_pos] << 2 | seq_bp[seq_pos + 1], __builtin_popcount, 1);
if (!child_num) {
__parse_part_node_code(1, 2, uint16_t, seq_bp[seq_pos] << 2, 4, __builtin_popcount, 1);
}
} else {
__parse_part_node_code(1, 2, uint16_t, seq_bp[seq_pos] << 2, 4, __builtin_popcount, 1);
}
} else {
if (seq_pos + 2 < seq_end) {
//__parse_child_node_code(3, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2 | seq_bp[seq_pos + 2],
// __builtin_popcountll, 1ULL);
uint8_t kmer = seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2 | seq_bp[seq_pos + 2];
uint64_t mark = *(uint64_t*)idata;
uint64_t child_num = mark & (1ULL << kmer);
if (child_num) {
*seq_pos_p += 3;
uint32_t nth_child = __builtin_popcountll(mark & ((1ULL << kmer) - 1));
uint8_t has_next_child = __builtin_popcountll(mark >> kmer >> 1);
if (*seq_pos_p >= HYB_MAX_SEQ_LEN) {
*cmp_ref = 0;
}
if (off_bytes == HYB_LEAF_NODE) {
*hits_p -= nth_child + hits_neq + has_next_child;
*sa_start_p += nth_child + hits_neq;
} else {
if (nth_child == 0) {
idata += 8;
uint32_t hits_start = *(uint32_t*)idata & ga_hybHitsMask[hits_bytes];
addr = idata + has_next_child * child_ptr_bytes;
*hits_p = hits_start - hits_neq;
*sa_start_p += hits_neq;
} else {
idata += 8 + (nth_child - 1) * child_ptr_bytes;
uint32_t hits_start = *(uint32_t*)idata & ga_hybHitsMask[hits_bytes];
uint32_t child_offset = *(uint32_t*)(idata + hits_bytes) & ga_hybOffMask[off_bytes];
addr = idata + child_offset + (has_next_child + 1) * child_ptr_bytes;
if (has_next_child) {
*hits_p = (*(uint32_t*)(idata + child_ptr_bytes) & ga_hybHitsMask[hits_bytes]) - hits_start;
} else {
*hits_p -= hits_start + hits_neq;
}
*sa_start_p += hits_start;
}
}
} else {
*cmp_ref = 0;
}
if (!child_num) {
__parse_part_node_code(2, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2, 4, __builtin_popcountll,
1ULL);
if (!child_num) {
__parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL);
}
}
} else if (seq_pos + 1 < seq_end) {
__parse_part_node_code(2, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2, 4, __builtin_popcountll,
1ULL);
if (!child_num) {
__parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL);
}
} else {
__parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL);
}
}
return addr;
}
void parse_one_hyb_node_min_hits(uint8_t* idata, uint8_t* seq_bits, uint8_t* seq_bp, uint32_t seq_end, int min_hits,
int is_head, uint32_t* seq_pos_p, uint64_t* sa_start_p, uint32_t* hits_p, int tid) {
if (*seq_pos_p == seq_end)
return;
uint8_t cmp_ref_val = 0;
uint8_t* cmp_ref = &cmp_ref_val;
__parse_node_start_no_addr(idata);
if (is_head) {
*sa_start_p = (*(uint64_t*)idata) & HYB_NODE_SA_MASK;
idata += 5;
if (*hits_p > HYB_HIT_THRESH) { // 更新hits
*hits_p = *((uint32_t*)idata) & ga_hybHitsMask[hits_bytes]; // hits数量
idata += hits_bytes;
}
}
uint8_t* prev_idata = idata;
uint32_t prev_seq_pos = *seq_pos_p;
uint32_t prev_hits = *hits_p;
uint64_t prev_sa_start = *sa_start_p;
if (node_type == HYB_BP_2) {
__parse_part_node_code(1, 2, uint16_t, seq_bp[seq_pos] << 2, 4, __builtin_popcount, 1);
if (*hits_p < min_hits) {
*seq_pos_p = prev_seq_pos;
*hits_p = prev_hits;
*sa_start_p = prev_sa_start;
}
} else if (node_type == HYB_BP_3) {
__parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL);
if (*hits_p < min_hits) {
*seq_pos_p = prev_seq_pos;
*hits_p = prev_hits;
*sa_start_p = prev_sa_start;
} else if (seq_pos + 1 < seq_end) {
uint32_t pp_seq_pos = prev_seq_pos;
uint32_t pp_hits = prev_hits;
uint64_t pp_sa_start = prev_sa_start;
prev_seq_pos = *seq_pos_p;
prev_hits = *hits_p;
prev_sa_start = *sa_start_p;
*seq_pos_p = pp_seq_pos;
*hits_p = pp_hits;
*sa_start_p = pp_sa_start;
idata = prev_idata; // 恢复到上一个节点
__parse_part_node_code(2, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2, 4, __builtin_popcountll,
1ULL);
if (*hits_p < min_hits) {
*seq_pos_p = prev_seq_pos;
*hits_p = prev_hits;
*sa_start_p = prev_sa_start;
}
}
}
}
void parse_one_hyb_node_max_hits(uint8_t* idata, uint8_t* seq_bits, uint8_t* seq_bp, uint32_t seq_end, int max_hits, int min_bp,
int is_head, uint32_t* seq_pos_p, uint64_t* sa_start_p, uint32_t* hits_p, int tid) {
if (*seq_pos_p == seq_end)
return;
uint8_t cmp_ref_val = 0;
uint8_t* cmp_ref = &cmp_ref_val;
__parse_node_start_no_addr(idata);
if (is_head) {
*sa_start_p = (*(uint64_t*)idata) & HYB_NODE_SA_MASK;
idata += 5;
if (*hits_p > HYB_HIT_THRESH) { // 更新hits
*hits_p = *((uint32_t*)idata) & ga_hybHitsMask[hits_bytes]; // hits数量
idata += hits_bytes;
}
}
uint8_t* prev_idata = idata;
uint32_t prev_seq_pos = *seq_pos_p;
uint32_t prev_hits = *hits_p;
uint64_t prev_sa_start = *sa_start_p;
if (node_type == HYB_BP_2) {
__parse_part_node_code(1, 2, uint16_t, seq_bp[seq_pos] << 2, 4, __builtin_popcount, 1);
} else if (node_type == HYB_BP_3) {
if (min_bp == 2) {
__parse_part_node_code(2, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2, 4, __builtin_popcountll,
1ULL);
} else {
__parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL);
if (*hits_p >= max_hits) {
*seq_pos_p = prev_seq_pos;
*hits_p = prev_hits;
*sa_start_p = prev_sa_start;
idata = prev_idata; // 恢复到上一个节点
__parse_part_node_code(2, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2, 4, __builtin_popcountll,
1ULL);
}
}
} else { // path node
if (min_bp > 0)
*seq_pos_p += min_bp;
}
}
// 需要给定初始化的hits和seq_pos
#define CALC_STAT 0
void get_leaf_node(uint8_t* idata, uint8_t* seq_bits, uint8_t* seq_bp, uint32_t seq_end, uint32_t* seq_pos_p, uint32_t* hits_p,
uint64_t* sa_start_p, uint8_t* cmp_ref, int tid) {
uint8_t* next_addr = parse_first_hyb_node(idata, seq_bits, seq_bp, seq_end, seq_pos_p, sa_start_p, hits_p, cmp_ref, tid);
#if CALC_STAT
uint8_t* prev_addr = idata;
if (next_addr != NULL) {
// fprintf(stderr, "addr dist: %ld\n", next_addr - prev_addr);
uint64_t dist = next_addr - prev_addr;
if (dist < 32)
gdat[0]++;
else if (dist < 64)
gdat[1]++;
else if (dist < 128)
gdat[2]++;
else
gdat[3]++;
}
#endif
while (next_addr != NULL && *hits_p > 1) {
#if CALC_STAT
prev_addr = next_addr;
#endif
next_addr = parse_one_hyb_node(next_addr, seq_bits, seq_bp, seq_end, seq_pos_p, sa_start_p, hits_p, cmp_ref, tid);
#if CALC_STAT
if (next_addr != NULL) {
// fprintf(stderr, "addr dist: %ld\n", next_addr - prev_addr);
uint64_t dist = next_addr - prev_addr;
if (dist < 32)
gdat[0]++;
else if (dist < 64)
gdat[1]++;
else if (dist < 128)
gdat[2]++;
else
gdat[3]++;
}
#endif
}
}
void get_kmer_data(const HybridIndex* hyb, uint8_t* seq_bits, int kmer_pos, uint8_t* type_hits, uint64_t* offset) {
uint64_t kmer = _kmer_from_pos(seq_bits, kmer_pos);
uint8_t* kmer_data_addr = hyb->kmer_data + kmer * HYB_KMER_DATA_BYTES;
*type_hits = *kmer_data_addr & HYB_KMER_DATA_TYPE_MASK;
*offset = (*(uint64_t*)kmer_data_addr & HYB_KMER_DATA_MASK) >> HYB_KMER_DATA_TYPE_BITS;
}
void right_end_match(const HybridIndex* hyb, const int seq_len, const Range* read_range, uint8_t* for_bits, uint8_t* back_bits,
int kmer_start, int init_match_len, uint64_t ref_pos, int* right_match) {
if (ref_pos < hyb->ref_len) {
*right_match = forward_match_len(for_bits, kmer_start + init_match_len, read_range->end, hyb->ref_bits,
ref_pos + init_match_len, hyb->ref_len);
} else {
ref_pos = (hyb->ref_len << 1) - 1 - ref_pos;
*right_match = backward_match_len(back_bits, seq_len - kmer_start - init_match_len - 1, seq_len - read_range->end,
hyb->ref_bits, ref_pos - init_match_len);
}
*right_match += init_match_len; // 包括kmer的长度
}
void left_end_match(const HybridIndex* hyb, const int seq_len, const Range* read_range, uint8_t* for_bits, uint8_t* back_bits,
int kmer_start, int init_match_len, uint64_t ref_pos, int* left_match) {
if (ref_pos < hyb->ref_len) {
*left_match = backward_match_len(for_bits, kmer_start - 1, read_range->start, hyb->ref_bits, ref_pos - 1);
} else {
ref_pos = (hyb->ref_len << 1) - 1 - ref_pos;
*left_match = forward_match_len(back_bits, seq_len - kmer_start, seq_len - read_range->start, hyb->ref_bits,
ref_pos + 1, hyb->ref_len);
}
}
void both_end_match(const HybridIndex* hyb, const int seq_len, const Range* read_range, uint8_t* for_bits, uint8_t* back_bits,
int kmer_start, int init_match_len, uint64_t ref_pos, int* left_match, int* right_match) {
if (ref_pos < hyb->ref_len) {
*right_match = forward_match_len(for_bits, kmer_start + init_match_len, read_range->end, hyb->ref_bits,
ref_pos + init_match_len, hyb->ref_len);
*left_match = backward_match_len(for_bits, kmer_start - 1, read_range->start, hyb->ref_bits, ref_pos - 1);
} else {
ref_pos = (hyb->ref_len << 1) - 1 - ref_pos;
*right_match = backward_match_len(back_bits, seq_len - kmer_start - init_match_len - 1, seq_len - read_range->end,
hyb->ref_bits, ref_pos - init_match_len);
*left_match = forward_match_len(back_bits, seq_len - kmer_start, seq_len - read_range->start, hyb->ref_bits,
ref_pos + 1, hyb->ref_len);
}
*right_match += init_match_len; // 包括kmer的长度
}