#include #include #include #include "hyb_idx.h" #include "share_mem.h" #include "utils.h" ///////////////////////////////////////////////////// // 使用hybrid-index的工具函数 /* // 加载hybrid index HybridIndex* load_hybrid_idx(const char* prefix) { HybridIndex* hyb = NULL; hyb = (HybridIndex*)calloc(1, sizeof(HybridIndex)); // return hyb; int prefix_len = strlen(prefix); char* fn = (char*)malloc(prefix_len + 30); FILE* fp = NULL; struct stat st; #define __load_hybrid_idx_code(suffix, data) \ sprintf(fn, "%s%s", prefix, suffix); \ err_check_true(stat(fn, &st), 0, fn); \ fp = xopen(fn, "r"); \ data = (uint8_t*)malloc(st.st_size); \ err_fread_noeof(data, 1, st.st_size, fp); \ err_fclose(fp); // load ref-len sprintf(fn, "%s.ref-len", prefix); // fprintf(stderr, "fn: %s\n", fn); fp = xopen(fn, "r"); err_check_false(fscanf(fp, "%ld", &hyb->ref_len), EOF); err_fclose(fp); // fprintf(stderr, "ref-len: %ld\n", hyb->ref_len); char* kmer_suffix = ".hybrid.kmer"; char* data_suffix = ".hybrid.data"; // char *kmer_suffix = ".hybrid.learned.kmer"; // char *data_suffix = ".hybrid.learned.data"; #if 0 // shm_clear_hyb(); // load 2-bit ref __load_hybrid_idx_code(".hybrid.pac", hyb->ref_bits); // load hyb byte-sa __load_hybrid_idx_code(".hybrid.sa", hyb->sa); // load hyb kmer data __load_hybrid_idx_code(kmer_suffix, hyb->kmer_data); // load hyb index data __load_hybrid_idx_code(data_suffix, hyb->index_data); #else shm_keep_hyb(prefix); // load 2-bit ref hyb->ref_bits = (uint8_t*)shm_get_index(strcat(strcpy(fn, prefix), ".hybrid.pac")); // load hyb byte-sa hyb->sa = (uint8_t*)shm_get_index(strcat(strcpy(fn, prefix), ".hybrid.sa")); // load hyb kmer data hyb->kmer_data = (uint8_t*)shm_get_index(strcat(strcpy(fn, prefix), kmer_suffix)); // load hyb index data hyb->index_data = (uint8_t*)shm_get_index(strcat(strcpy(fn, prefix), data_suffix)); #endif // fprintf(stderr, "文件大小为: %ld 字节, %.2f GB\n", st.st_size, (double)st.st_size / (1024 * 1024 * 1024)); return hyb; } */ // 创建正向反向互补bits void create_seq_fb_bits(uint8_t* bs, int len, uint8_t* fs, uint8_t* rs) { if (len > 0) { uint8_t fbp = 0, rbp = 0; int i = 0, j = 0, idxf = 0, idxr = 0; for (; i + 3 < len; i += 4) { fbp = (bs[i] & 3) | (bs[i + 1] & 3) << 2 | (bs[i + 2] & 3) << 4 | (bs[i + 3] & 3) << 6; rbp = (3 - (bs[len - i - 1] & 3)) | (3 - (bs[len - i - 2] & 3)) << 2 | (3 - (bs[len - i - 3] & 3)) << 4 | (3 - (bs[len - i - 4] & 3)) << 6; fs[idxf++] = fbp; rs[idxr++] = rbp; } fbp = 0; rbp = 0; for (; i < len; ++i, ++j) { fbp |= (bs[i] & 3) << j * 2; rbp |= (3 - (bs[len - i - 1] & 3)) << j * 2; } fs[idxf++] = fbp; rs[idxr++] = rbp; } } // 将seq和ref正向比对,看最多有多少匹配的bp,seq和ref都是2-bit编码的 inline int forward_match_len(uint8_t* seq, int64_t seq_pos, int64_t seq_end, uint8_t* ref, int64_t ref_pos, int64_t ref_len) { if (seq_pos >= seq_end) return 0; int64_t max_match_len = MIN(ref_len - ref_pos, seq_end - seq_pos); int ref_odd = ref_pos & 3; int seq_odd = seq_pos & 3; int64_t i = seq_pos; int64_t j = ref_pos; int match_len = 0; ///////////// #define __forward_match_code(first_len, first_ref, first_seq, ref_bits, seq_bits) \ uint64_t bp32ref = first_ref; \ uint64_t bp32seq = first_seq; \ uint64_t cmp = bp32ref ^ bp32seq; \ if (cmp > 0) \ return MIN(__builtin_ctzll(cmp) >> 1, max_match_len); \ int first_cmp_len = first_len; \ match_len = MIN(first_cmp_len, max_match_len); \ i += first_cmp_len; \ j += first_cmp_len; \ seq_odd = i & 3; \ ref_odd = j & 3; \ for (; i + 31 < seq_end; i += 32, j += 32, match_len += 32) { \ bp32ref = ref_bits; \ bp32seq = seq_bits; \ cmp = bp32ref ^ bp32seq; \ if (cmp > 0) \ return MIN(match_len + (__builtin_ctzll(cmp) >> 1), max_match_len); \ } \ if (i < seq_end) { \ bp32ref = ref_bits; \ bp32seq = seq_bits; \ cmp = bp32ref ^ bp32seq; \ if (cmp > 0) \ return MIN(match_len + MIN(__builtin_ctzll(cmp) >> 1, seq_end - i), max_match_len); \ match_len = max_match_len; /*match_len += seq_end - i;*/ \ } ///////// if (seq_odd < ref_odd) { // 调整到ref的整数字节 __forward_match_code(32 - ref_odd, (*(uint64_t*)&ref[j >> 2]) >> (ref_odd << 1), (*(uint64_t*)&seq[i >> 2]) << ((ref_odd - seq_odd) << 1) >> (ref_odd << 1), (*(uint64_t*)&ref[j >> 2]), seq[i >> 2] >> (seq_odd << 1) | (*(uint64_t*)&seq[(i >> 2) + 1]) << ((4 - seq_odd) << 1)); } else if (seq_odd > ref_odd) { // 调整到seq的整数字节 __forward_match_code(32 - seq_odd, (*(uint64_t*)&ref[j >> 2]) << ((seq_odd - ref_odd) << 1) >> (seq_odd << 1), (*(uint64_t*)&seq[i >> 2]) >> (seq_odd << 1), ref[j >> 2] >> (ref_odd << 1) | (*(uint64_t*)&ref[(j >> 2) + 1]) << ((4 - ref_odd) << 1), (*(uint64_t*)&seq[i >> 2])); } else { // 可以调整到相同的64位地址进行比较了 __forward_match_code(32 - seq_odd, (*(uint64_t*)&ref[j >> 2]) >> (seq_odd << 1), (*(uint64_t*)&seq[i >> 2]) >> (seq_odd << 1), (*(uint64_t*)&ref[j >> 2]), (*(uint64_t*)&seq[i >> 2])); } return MIN(match_len, max_match_len); } // 将seq和ref反向比对,看最多有多少匹配的bp inline int backward_match_len(uint8_t* seq, int64_t seq_pos, int64_t seq_start, uint8_t* ref, int64_t ref_pos) { if (seq_pos < seq_start) return 0; int64_t max_match_len = MIN(ref_pos + 1, seq_pos - seq_start + 1); int64_t i = seq_pos; int64_t j = ref_pos; int seq_odd = 3 - (i & 3); int ref_odd = 3 - (j & 3); int match_len = 0; ///////////// #define __backward_tail_code(last_code) \ int ext_bp = (7 - (i >> 2)) << 2; \ uint64_t bp32ref = *(uint64_t*)(ref + (j >> 2) - 8) >> ((4 - ref_odd) << 1) | (uint64_t)ref[j >> 2] \ << ((ref_odd + 28) << 1); \ uint64_t bp32seq = (*(uint64_t*)seq) << ((seq_odd + ext_bp) << 1); \ uint64_t cmp = bp32ref ^ bp32seq; \ if (cmp > 0) \ return MIN(match_len + MIN(__builtin_clzll(cmp) >> 1, (int)i + 1 - seq_start), max_match_len); \ last_code #define __backward_match_code(first_len, first_ref, first_seq, ref_bits, seq_bits) \ uint64_t bp32ref = first_ref; \ uint64_t bp32seq = first_seq; \ uint64_t cmp = bp32ref ^ bp32seq; \ if (cmp > 0) \ return MIN(MIN(__builtin_clzll(cmp) >> 1, (int)i + 1 - seq_start), max_match_len); \ int first_cmp_len = first_len; \ match_len = MIN(first_cmp_len, max_match_len); \ i -= first_cmp_len; \ j -= first_cmp_len; \ seq_odd = 3 - (i & 3); \ ref_odd = 3 - (j & 3); \ for (; i - 31 >= 0; i -= 32, j -= 32, match_len += 32) { \ bp32ref = ref_bits; \ bp32seq = seq_bits; \ cmp = bp32ref ^ bp32seq; \ if (cmp > 0) \ return MIN(match_len + (__builtin_clzll(cmp) >> 1), max_match_len); \ } \ if (i >= seq_start) { \ __backward_tail_code(match_len = max_match_len); \ } //////////// if (i < 32) { // 只需要一次比较 __backward_tail_code(return max_match_len); } if (seq_odd < ref_odd) { // 调整到ref的整数字节 __backward_match_code( 32 - ref_odd, (*(uint64_t*)&ref[(j >> 2) - 7]) << (ref_odd << 1), (*(uint64_t*)&seq[(i >> 2) - 7]) >> ((ref_odd - seq_odd) << 1) << (ref_odd << 1), (*(uint64_t*)&ref[(j >> 2) - 7]), (*(uint64_t*)&seq[(i >> 2) - 8] >> ((4 - seq_odd) << 1)) | ((uint64_t)seq[(i >> 2)] << ((seq_odd + 28) << 1))); } else if (seq_odd > ref_odd) { // 调整到seq的整数字节 __backward_match_code( 32 - seq_odd, (*(uint64_t*)&ref[(j >> 2) - 7]) >> ((seq_odd - ref_odd) << 1) << (seq_odd << 1), (*(uint64_t*)&seq[(i >> 2) - 7]) << (seq_odd << 1), (*(uint64_t*)&ref[(j >> 2) - 8] >> ((4 - ref_odd) << 1)) | ((uint64_t)ref[(j >> 2)] << ((ref_odd + 28) << 1)), (*(uint64_t*)&seq[(i >> 2) - 7])); } else { // 可以调整到相同的64位地址进行比较了 __backward_match_code(32 - seq_odd, (*(uint64_t*)&ref[(j >> 2) - 7]) << (seq_odd << 1), (*(uint64_t*)&seq[(i >> 2) - 7]) << (seq_odd << 1), (*(uint64_t*)&ref[(j >> 2) - 7]), (*(uint64_t*)&seq[(i >> 2) - 7])); } return MIN(match_len, max_match_len); } // 根据sa的行获取对应的ref position(小端模式) uint64_t hyb_sa_to_ref_pos(uint8_t* sa_arr, uint64_t row) { const uint64_t start_byte = ((row << 5) + row) >> 3; // 存储这个sa数据的起始字节 uint64_t val = *(uint64_t*)(sa_arr + start_byte); val = (val >> (row & 7)) & 8589934591ULL; // 33-bits mask return val; } #define __parse_node_start_no_addr(idata) \ *cmp_ref = 1; \ uint32_t seq_pos = *seq_pos_p; \ uint8_t header = *idata; \ idata += 1; \ uint8_t node_type = (header >> 6) & 3; \ uint8_t hits_neq = header >> 5 & 1; \ uint32_t hits_bytes = ((header >> 3) & 3) + 1; \ uint32_t off_bytes = header & 7; \ uint32_t child_ptr_bytes = hits_bytes + off_bytes; // 解析hyb node初始化变量信息 #define __parse_node_start_code(idata) \ uint8_t* addr = NULL; \ __parse_node_start_no_addr(idata) // 解析单一碱基序列节点 #define __parse_path_node_code(path_len) \ uint32_t path_len = (header & 1); \ path_len = path_len << 8 | *idata; \ idata += 1; \ int match_len = forward_match_len(seq_bits, seq_pos, seq_end, idata, 0, path_len); \ *seq_pos_p = seq_pos + match_len; \ if (match_len == (int)path_len) { \ addr = idata + (((path_len << 1) + 7) >> 3); \ if (hits_neq) { \ *sa_start_p += 1; \ *hits_p -= 1; \ } \ } else \ *cmp_ref = 0; // 解析正常hyb节点 #define __parse_child_node_code(kmer_len, mark_bytes, int_type, kmer_code, bits_count, one) \ uint8_t kmer = kmer_code; \ int_type mark = *(int_type*)idata; \ int_type child_num = mark & (one << kmer); \ if (child_num) { \ *seq_pos_p += kmer_len; \ uint32_t nth_child = bits_count(mark & ((one << kmer) - 1)); \ uint8_t has_next_child = bits_count(mark >> kmer >> 1); \ if (*seq_pos_p >= HYB_MAX_SEQ_LEN) { \ *cmp_ref = 0; \ } \ if (off_bytes == HYB_LEAF_NODE) { \ *hits_p -= nth_child + hits_neq + has_next_child; \ *sa_start_p += nth_child + hits_neq; \ } else { \ if (nth_child == 0) { \ idata += mark_bytes; \ uint32_t hits_start = *(uint32_t*)idata & ga_hybHitsMask[hits_bytes]; \ addr = idata + has_next_child * child_ptr_bytes; \ *hits_p = hits_start - hits_neq; \ *sa_start_p += hits_neq; \ } else { \ idata += mark_bytes + (nth_child - 1) * child_ptr_bytes; \ uint32_t hits_start = *(uint32_t*)idata & ga_hybHitsMask[hits_bytes]; \ uint32_t child_offset = *(uint32_t*)(idata + hits_bytes) & ga_hybOffMask[off_bytes]; \ addr = idata + child_offset + (has_next_child + 1) * child_ptr_bytes; \ if (has_next_child) { \ *hits_p = (*(uint32_t*)(idata + child_ptr_bytes) & ga_hybHitsMask[hits_bytes]) - hits_start; \ } else { \ *hits_p -= hits_start + hits_neq; \ } \ *sa_start_p += hits_start; \ } \ } \ } else { \ *cmp_ref = 0; \ } // 当节点不能完全匹配时候,检查是否能匹配该节点包含的部分碱基序列 #define __parse_part_node_code(kmer_len, mark_bytes, int_type, kmer_base_code, bits_range, bits_count, one) \ uint8_t kmer_base = kmer_base_code; \ int_type mark = *(int_type*)idata; \ int_type kmer_mask = ((one << bits_range) - 1) << kmer_base; \ int_type child_num = mark & kmer_mask; \ if (child_num) { \ *seq_pos_p += kmer_len; \ int_type kmer_pre_mask = (one << kmer_base) - 1; \ uint32_t nth_child = bits_count(mark & kmer_pre_mask); \ uint8_t has_next_child = bits_count(mark >> kmer_base >> bits_range); \ if (off_bytes == HYB_LEAF_NODE) { \ *hits_p -= nth_child + hits_neq + has_next_child; \ *sa_start_p += nth_child + hits_neq; \ } else { \ if (nth_child == 0) { \ idata += mark_bytes; \ uint32_t hits_start = hits_neq; \ if (has_next_child) { \ child_num = bits_count(child_num); \ *hits_p = \ (*(uint32_t*)(idata + (child_num - 1) * child_ptr_bytes) & ga_hybHitsMask[hits_bytes]) - hits_start; \ } else { \ *hits_p -= hits_start; \ } \ *sa_start_p += hits_start; \ } else { \ idata += mark_bytes + (nth_child - 1) * child_ptr_bytes; \ uint32_t hits_start = *(uint32_t*)idata & ga_hybHitsMask[hits_bytes]; \ *sa_start_p += hits_start; \ if (has_next_child) { \ child_num = bits_count(child_num); \ *hits_p = (*(uint32_t*)(idata + child_num * child_ptr_bytes) & ga_hybHitsMask[hits_bytes]) - hits_start; \ } else { \ *hits_p -= hits_start + hits_neq; \ } \ } \ } \ } \ *cmp_ref = 0; // 解析节点主要代码 #define __parse_hyb_node_code(return_code) \ if (node_type == HYB_BP_PATH) { \ __parse_path_node_code(path_len); \ } else if (node_type == HYB_BP_1) { \ __parse_child_node_code(1, 1, uint8_t, seq_bp[seq_pos], __builtin_popcount, 1); \ } else if (node_type == HYB_BP_2) { \ if (seq_pos + 1 < seq_end) { \ __parse_child_node_code(2, 2, uint16_t, seq_bp[seq_pos] << 2 | seq_bp[seq_pos + 1], __builtin_popcount, 1); \ if (!child_num) { \ __parse_part_node_code(1, 2, uint16_t, seq_bp[seq_pos] << 2, 4, __builtin_popcount, 1); \ } \ } else { \ __parse_part_node_code(1, 2, uint16_t, seq_bp[seq_pos] << 2, 4, __builtin_popcount, 1); \ } \ } else { \ if (seq_pos + 2 < seq_end) { \ __parse_child_node_code(3, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2 | seq_bp[seq_pos + 2], \ __builtin_popcountll, 1ULL); \ if (!child_num) { \ __parse_part_node_code(2, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2, 4, \ __builtin_popcountll, 1ULL); \ if (!child_num) { \ __parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL); \ } \ } \ } else if (seq_pos + 1 < seq_end) { \ __parse_part_node_code(2, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2, 4, __builtin_popcountll, \ 1ULL); \ if (!child_num) { \ __parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL); \ } \ } else { \ __parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL); \ } \ } \ /*__builtin_prefetch(addr, 0, 3); */ \ return_code ///////// // 解析第一个节点, 返回后续对应的节点地址 uint8_t* parse_first_hyb_node(uint8_t* idata, uint8_t* seq_bits, uint8_t* seq_bp, uint32_t seq_end, uint32_t* seq_pos_p, uint64_t* sa_start_p, uint32_t* hits_p, uint8_t* cmp_ref, int tid) { if (*seq_pos_p == seq_end) return NULL; __parse_node_start_code(idata); *sa_start_p = (*(uint64_t*)idata) & HYB_NODE_SA_MASK; idata += 5; if (*hits_p > HYB_HIT_THRESH) { // 更新hits *hits_p = *((uint32_t*)idata) & ga_hybHitsMask[hits_bytes]; // hits数量 idata += hits_bytes; } // __parse_hyb_node_code(return addr); if (node_type == HYB_BP_PATH) { __parse_path_node_code(path_len); } else if (node_type == HYB_BP_1) { __parse_child_node_code(1, 1, uint8_t, seq_bp[seq_pos], __builtin_popcount, 1); } else if (node_type == HYB_BP_2) { if (seq_pos + 1 < seq_end) { __parse_child_node_code(2, 2, uint16_t, seq_bp[seq_pos] << 2 | seq_bp[seq_pos + 1], __builtin_popcount, 1); if (!child_num) { __parse_part_node_code(1, 2, uint16_t, seq_bp[seq_pos] << 2, 4, __builtin_popcount, 1); } } else { __parse_part_node_code(1, 2, uint16_t, seq_bp[seq_pos] << 2, 4, __builtin_popcount, 1); } } else { if (seq_pos + 2 < seq_end) { //__parse_child_node_code(3, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2 | seq_bp[seq_pos + 2], // __builtin_popcountll, 1ULL); uint8_t kmer = seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2 | seq_bp[seq_pos + 2]; uint64_t mark = *(uint64_t*)idata; uint64_t child_num = mark & (1ULL << kmer); if (child_num) { *seq_pos_p += 3; uint32_t nth_child = __builtin_popcountll(mark & ((1ULL << kmer) - 1)); uint8_t has_next_child = __builtin_popcountll(mark >> kmer >> 1); if (*seq_pos_p >= HYB_MAX_SEQ_LEN) { *cmp_ref = 0; } if (off_bytes == HYB_LEAF_NODE) { *hits_p -= nth_child + hits_neq + has_next_child; *sa_start_p += nth_child + hits_neq; } else { if (nth_child == 0) { idata += 8; uint32_t hits_start = *(uint32_t*)idata & ga_hybHitsMask[hits_bytes]; addr = idata + has_next_child * child_ptr_bytes; *hits_p = hits_start - hits_neq; *sa_start_p += hits_neq; } else { idata += 8 + (nth_child - 1) * child_ptr_bytes; uint32_t hits_start = *(uint32_t*)idata & ga_hybHitsMask[hits_bytes]; uint32_t child_offset = *(uint32_t*)(idata + hits_bytes) & ga_hybOffMask[off_bytes]; addr = idata + child_offset + (has_next_child + 1) * child_ptr_bytes; if (has_next_child) { *hits_p = (*(uint32_t*)(idata + child_ptr_bytes) & ga_hybHitsMask[hits_bytes]) - hits_start; } else { *hits_p -= hits_start + hits_neq; } *sa_start_p += hits_start; } } } else { *cmp_ref = 0; } if (!child_num) { //__parse_part_node_code(2, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2, 4, //__builtin_popcountll, // 1ULL); uint8_t kmer_base = seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2; uint64_t mark = *(uint64_t*)idata; uint64_t kmer_mask = ((1ULL << 4) - 1) << kmer_base; uint64_t child_num = mark & kmer_mask; if (child_num) { *seq_pos_p += 2; uint64_t kmer_pre_mask = (1ULL << kmer_base) - 1; uint32_t nth_child = __builtin_popcountll(mark & kmer_pre_mask); uint8_t has_next_child = __builtin_popcountll(mark >> kmer_base >> 4); if (off_bytes == HYB_LEAF_NODE) { *hits_p -= nth_child + hits_neq + has_next_child; *sa_start_p += nth_child + hits_neq; } else { if (nth_child == 0) { idata += 8; uint32_t hits_start = hits_neq; if (has_next_child) { child_num = __builtin_popcountll(child_num); *hits_p = (*(uint32_t*)(idata + (child_num - 1) * child_ptr_bytes) & ga_hybHitsMask[hits_bytes]) - hits_start; } else { *hits_p -= hits_start; } *sa_start_p += hits_start; } else { idata += 8 + (nth_child - 1) * child_ptr_bytes; uint32_t hits_start = *(uint32_t*)idata & ga_hybHitsMask[hits_bytes]; *sa_start_p += hits_start; if (has_next_child) { child_num = __builtin_popcountll(child_num); *hits_p = (*(uint32_t*)(idata + child_num * child_ptr_bytes) & ga_hybHitsMask[hits_bytes]) - hits_start; } else { *hits_p -= hits_start + hits_neq; } } } } *cmp_ref = 0; if (!child_num) { // __parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL); uint8_t kmer_base = seq_bp[seq_pos] << 4; uint64_t mark = *(uint64_t*)idata; uint64_t kmer_mask = ((1ULL << 16) - 1) << kmer_base; uint64_t child_num = mark & kmer_mask; if (child_num) { *seq_pos_p += 1; uint64_t kmer_pre_mask = (1ULL << kmer_base) - 1; uint32_t nth_child = __builtin_popcountll(mark & kmer_pre_mask); uint8_t has_next_child = __builtin_popcountll(mark >> kmer_base >> 16); if (off_bytes == HYB_LEAF_NODE) { *hits_p -= nth_child + hits_neq + has_next_child; *sa_start_p += nth_child + hits_neq; } else { if (nth_child == 0) { idata += 8; uint32_t hits_start = hits_neq; if (has_next_child) { child_num = __builtin_popcountll(child_num); *hits_p = (*(uint32_t*)(idata + (child_num - 1) * child_ptr_bytes) & ga_hybHitsMask[hits_bytes]) - hits_start; } else { *hits_p -= hits_start; } *sa_start_p += hits_start; } else { idata += 8 + (nth_child - 1) * child_ptr_bytes; uint32_t hits_start = *(uint32_t*)idata & ga_hybHitsMask[hits_bytes]; *sa_start_p += hits_start; if (has_next_child) { child_num = __builtin_popcountll(child_num); *hits_p = (*(uint32_t*)(idata + child_num * child_ptr_bytes) & ga_hybHitsMask[hits_bytes]) - hits_start; } else { *hits_p -= hits_start + hits_neq; } } } } *cmp_ref = 0; } } } else if (seq_pos + 1 < seq_end) { __parse_part_node_code(2, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2, 4, __builtin_popcountll, 1ULL); if (!child_num) { __parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL); } } else { __parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL); } } return addr; } // 解析后续的正常节点 uint8_t* parse_one_hyb_node(uint8_t* idata, uint8_t* seq_bits, uint8_t* seq_bp, uint32_t seq_end, uint32_t* seq_pos_p, uint64_t* sa_start_p, uint32_t* hits_p, uint8_t* cmp_ref, int tid) { if (*seq_pos_p == seq_end) return NULL; __parse_node_start_code(idata); // __parse_hyb_node_code(return addr); if (node_type == HYB_BP_PATH) { __parse_path_node_code(path_len); } else if (node_type == HYB_BP_1) { __parse_child_node_code(1, 1, uint8_t, seq_bp[seq_pos], __builtin_popcount, 1); } else if (node_type == HYB_BP_2) { if (seq_pos + 1 < seq_end) { __parse_child_node_code(2, 2, uint16_t, seq_bp[seq_pos] << 2 | seq_bp[seq_pos + 1], __builtin_popcount, 1); if (!child_num) { __parse_part_node_code(1, 2, uint16_t, seq_bp[seq_pos] << 2, 4, __builtin_popcount, 1); } } else { __parse_part_node_code(1, 2, uint16_t, seq_bp[seq_pos] << 2, 4, __builtin_popcount, 1); } } else { if (seq_pos + 2 < seq_end) { //__parse_child_node_code(3, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2 | seq_bp[seq_pos + 2], // __builtin_popcountll, 1ULL); uint8_t kmer = seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2 | seq_bp[seq_pos + 2]; uint64_t mark = *(uint64_t*)idata; uint64_t child_num = mark & (1ULL << kmer); if (child_num) { *seq_pos_p += 3; uint32_t nth_child = __builtin_popcountll(mark & ((1ULL << kmer) - 1)); uint8_t has_next_child = __builtin_popcountll(mark >> kmer >> 1); if (*seq_pos_p >= HYB_MAX_SEQ_LEN) { *cmp_ref = 0; } if (off_bytes == HYB_LEAF_NODE) { *hits_p -= nth_child + hits_neq + has_next_child; *sa_start_p += nth_child + hits_neq; } else { if (nth_child == 0) { idata += 8; uint32_t hits_start = *(uint32_t*)idata & ga_hybHitsMask[hits_bytes]; addr = idata + has_next_child * child_ptr_bytes; *hits_p = hits_start - hits_neq; *sa_start_p += hits_neq; } else { idata += 8 + (nth_child - 1) * child_ptr_bytes; uint32_t hits_start = *(uint32_t*)idata & ga_hybHitsMask[hits_bytes]; uint32_t child_offset = *(uint32_t*)(idata + hits_bytes) & ga_hybOffMask[off_bytes]; addr = idata + child_offset + (has_next_child + 1) * child_ptr_bytes; if (has_next_child) { *hits_p = (*(uint32_t*)(idata + child_ptr_bytes) & ga_hybHitsMask[hits_bytes]) - hits_start; } else { *hits_p -= hits_start + hits_neq; } *sa_start_p += hits_start; } } } else { *cmp_ref = 0; } if (!child_num) { __parse_part_node_code(2, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2, 4, __builtin_popcountll, 1ULL); if (!child_num) { __parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL); } } } else if (seq_pos + 1 < seq_end) { __parse_part_node_code(2, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2, 4, __builtin_popcountll, 1ULL); if (!child_num) { __parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL); } } else { __parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL); } } return addr; } void parse_one_hyb_node_min_hits(uint8_t* idata, uint8_t* seq_bits, uint8_t* seq_bp, uint32_t seq_end, int min_hits, int is_head, uint32_t* seq_pos_p, uint64_t* sa_start_p, uint32_t* hits_p, int tid) { if (*seq_pos_p == seq_end) return; uint8_t cmp_ref_val = 0; uint8_t* cmp_ref = &cmp_ref_val; __parse_node_start_no_addr(idata); if (is_head) { *sa_start_p = (*(uint64_t*)idata) & HYB_NODE_SA_MASK; idata += 5; if (*hits_p > HYB_HIT_THRESH) { // 更新hits *hits_p = *((uint32_t*)idata) & ga_hybHitsMask[hits_bytes]; // hits数量 idata += hits_bytes; } } uint8_t* prev_idata = idata; uint32_t prev_seq_pos = *seq_pos_p; uint32_t prev_hits = *hits_p; uint64_t prev_sa_start = *sa_start_p; if (node_type == HYB_BP_2) { __parse_part_node_code(1, 2, uint16_t, seq_bp[seq_pos] << 2, 4, __builtin_popcount, 1); if (*hits_p < min_hits) { *seq_pos_p = prev_seq_pos; *hits_p = prev_hits; *sa_start_p = prev_sa_start; } } else if (node_type == HYB_BP_3) { __parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL); if (*hits_p < min_hits) { *seq_pos_p = prev_seq_pos; *hits_p = prev_hits; *sa_start_p = prev_sa_start; } else if (seq_pos + 1 < seq_end) { uint32_t pp_seq_pos = prev_seq_pos; uint32_t pp_hits = prev_hits; uint64_t pp_sa_start = prev_sa_start; prev_seq_pos = *seq_pos_p; prev_hits = *hits_p; prev_sa_start = *sa_start_p; *seq_pos_p = pp_seq_pos; *hits_p = pp_hits; *sa_start_p = pp_sa_start; idata = prev_idata; // 恢复到上一个节点 __parse_part_node_code(2, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2, 4, __builtin_popcountll, 1ULL); if (*hits_p < min_hits) { *seq_pos_p = prev_seq_pos; *hits_p = prev_hits; *sa_start_p = prev_sa_start; } } } } void parse_one_hyb_node_max_hits(uint8_t* idata, uint8_t* seq_bits, uint8_t* seq_bp, uint32_t seq_end, int max_hits, int min_bp, int is_head, uint32_t* seq_pos_p, uint64_t* sa_start_p, uint32_t* hits_p, int tid) { if (*seq_pos_p == seq_end) return; uint8_t cmp_ref_val = 0; uint8_t* cmp_ref = &cmp_ref_val; __parse_node_start_no_addr(idata); if (is_head) { *sa_start_p = (*(uint64_t*)idata) & HYB_NODE_SA_MASK; idata += 5; if (*hits_p > HYB_HIT_THRESH) { // 更新hits *hits_p = *((uint32_t*)idata) & ga_hybHitsMask[hits_bytes]; // hits数量 idata += hits_bytes; } } uint8_t* prev_idata = idata; uint32_t prev_seq_pos = *seq_pos_p; uint32_t prev_hits = *hits_p; uint64_t prev_sa_start = *sa_start_p; if (node_type == HYB_BP_2) { __parse_part_node_code(1, 2, uint16_t, seq_bp[seq_pos] << 2, 4, __builtin_popcount, 1); } else if (node_type == HYB_BP_3) { if (min_bp == 2) { __parse_part_node_code(2, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2, 4, __builtin_popcountll, 1ULL); } else { __parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL); if (*hits_p >= max_hits) { *seq_pos_p = prev_seq_pos; *hits_p = prev_hits; *sa_start_p = prev_sa_start; idata = prev_idata; // 恢复到上一个节点 __parse_part_node_code(2, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2, 4, __builtin_popcountll, 1ULL); } } } else { // path node if (min_bp > 0) *seq_pos_p += min_bp; } } // 需要给定初始化的hits和seq_pos #define CALC_STAT 0 void get_leaf_node(uint8_t* idata, uint8_t* seq_bits, uint8_t* seq_bp, uint32_t seq_end, uint32_t* seq_pos_p, uint32_t* hits_p, uint64_t* sa_start_p, uint8_t* cmp_ref, int tid) { uint8_t* next_addr = parse_first_hyb_node(idata, seq_bits, seq_bp, seq_end, seq_pos_p, sa_start_p, hits_p, cmp_ref, tid); #if CALC_STAT uint8_t* prev_addr = idata; if (next_addr != NULL) { // fprintf(stderr, "addr dist: %ld\n", next_addr - prev_addr); uint64_t dist = next_addr - prev_addr; if (dist < 32) gdat[0]++; else if (dist < 64) gdat[1]++; else if (dist < 128) gdat[2]++; else gdat[3]++; } #endif while (next_addr != NULL && *hits_p > 1) { #if CALC_STAT prev_addr = next_addr; #endif next_addr = parse_one_hyb_node(next_addr, seq_bits, seq_bp, seq_end, seq_pos_p, sa_start_p, hits_p, cmp_ref, tid); #if CALC_STAT if (next_addr != NULL) { // fprintf(stderr, "addr dist: %ld\n", next_addr - prev_addr); uint64_t dist = next_addr - prev_addr; if (dist < 32) gdat[0]++; else if (dist < 64) gdat[1]++; else if (dist < 128) gdat[2]++; else gdat[3]++; } #endif } } void get_kmer_data(const HybridIndex* hyb, uint8_t* seq_bits, int kmer_pos, uint8_t* type_hits, uint64_t* offset) { uint64_t kmer = _kmer_from_pos(seq_bits, kmer_pos); uint8_t* kmer_data_addr = hyb->kmer_data + kmer * HYB_KMER_DATA_BYTES; *type_hits = *kmer_data_addr & HYB_KMER_DATA_TYPE_MASK; *offset = (*(uint64_t*)kmer_data_addr & HYB_KMER_DATA_MASK) >> HYB_KMER_DATA_TYPE_BITS; } void right_end_match(const HybridIndex* hyb, const int seq_len, const Range* read_range, uint8_t* for_bits, uint8_t* back_bits, int kmer_start, int init_match_len, uint64_t ref_pos, int* right_match) { if (ref_pos < hyb->ref_len) { *right_match = forward_match_len(for_bits, kmer_start + init_match_len, read_range->end, hyb->ref_bits, ref_pos + init_match_len, hyb->ref_len); } else { ref_pos = (hyb->ref_len << 1) - 1 - ref_pos; *right_match = backward_match_len(back_bits, seq_len - kmer_start - init_match_len - 1, seq_len - read_range->end, hyb->ref_bits, ref_pos - init_match_len); } *right_match += init_match_len; // 包括kmer的长度 } void left_end_match(const HybridIndex* hyb, const int seq_len, const Range* read_range, uint8_t* for_bits, uint8_t* back_bits, int kmer_start, int init_match_len, uint64_t ref_pos, int* left_match) { if (ref_pos < hyb->ref_len) { *left_match = backward_match_len(for_bits, kmer_start - 1, read_range->start, hyb->ref_bits, ref_pos - 1); } else { ref_pos = (hyb->ref_len << 1) - 1 - ref_pos; *left_match = forward_match_len(back_bits, seq_len - kmer_start, seq_len - read_range->start, hyb->ref_bits, ref_pos + 1, hyb->ref_len); } } void both_end_match(const HybridIndex* hyb, const int seq_len, const Range* read_range, uint8_t* for_bits, uint8_t* back_bits, int kmer_start, int init_match_len, uint64_t ref_pos, int* left_match, int* right_match) { if (ref_pos < hyb->ref_len) { *right_match = forward_match_len(for_bits, kmer_start + init_match_len, read_range->end, hyb->ref_bits, ref_pos + init_match_len, hyb->ref_len); *left_match = backward_match_len(for_bits, kmer_start - 1, read_range->start, hyb->ref_bits, ref_pos - 1); } else { ref_pos = (hyb->ref_len << 1) - 1 - ref_pos; *right_match = backward_match_len(back_bits, seq_len - kmer_start - init_match_len - 1, seq_len - read_range->end, hyb->ref_bits, ref_pos - init_match_len); *left_match = forward_match_len(back_bits, seq_len - kmer_start, seq_len - read_range->start, hyb->ref_bits, ref_pos + 1, hyb->ref_len); } *right_match += init_match_len; // 包括kmer的长度 }