From 74b464ee4aec1112a511bb8e6c9d69c1aad5a2be Mon Sep 17 00:00:00 2001 From: zzh Date: Fri, 7 Nov 2025 21:31:11 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BA=86=E5=88=9B=E5=BB=BAby?= =?UTF-8?q?te=5Fsa=E5=92=8Ckmer=20index=E7=9A=84=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 8 +- Makefile | 8 +- bwa.c | 2 +- bwt.c | 213 +++++++++++++++++++++++++++ bwt.h | 39 ++++- bwtindex.c | 10 ++ debug.c | 66 +++++++++ debug.h | 35 +++++ hyb_bwa.c | 265 ++++++++++++++++++++++++++++++++++ bwa_hyb.c => hyb_create_idx.c | 0 hyb_idx.h | 11 ++ hyb_seeding_1.c | 0 hyb_seeding_2.c | 0 hyb_seeding_3.c | 0 hyb_utils.c | 0 main.c | 47 +++--- profiling.c | 221 ++++++++++++++++++++++++++++ profiling.h | 130 +++++++++++++++++ rle.h | 4 + utils.h | 67 +++++++++ 20 files changed, 1101 insertions(+), 25 deletions(-) create mode 100644 debug.c create mode 100644 debug.h create mode 100644 hyb_bwa.c rename bwa_hyb.c => hyb_create_idx.c (100%) create mode 100644 hyb_idx.h create mode 100644 hyb_seeding_1.c create mode 100644 hyb_seeding_2.c create mode 100644 hyb_seeding_3.c create mode 100644 hyb_utils.c create mode 100644 profiling.c create mode 100644 profiling.h diff --git a/.gitignore b/.gitignore index 7a2305d..6f553bd 100644 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,13 @@ test64 .*.swp Makefile.bak bwamem-lite - +test_index/ +index/ +orig_index/ +run.sh +debug.sh +hybalign +*.sam # ---> C # Prerequisites *.d diff --git a/Makefile b/Makefile index d9dbbc0..03bd762 100644 --- a/Makefile +++ b/Makefile @@ -4,13 +4,15 @@ CFLAGS= -g -Wall -Wno-unused-function -O3 WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS AR= ar DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC) +HYBOBJS= hyb_bwa.o hyb_utils.o hyb_seeding_1.o hyb_seeding_2.o hyb_seeding_3.o hyb_create_idx.o debug.o profiling.o LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o \ QSufSort.o bwt_gen.o rope.o rle.o is.o bwtindex.o AOBJS= bwashm.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \ bwape.o kopen.o pemerge.o maxk.o \ bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \ - bwtsw2_chain.o fastmap.o bwtsw2_pair.o -PROG= bwa + bwtsw2_chain.o fastmap.o bwtsw2_pair.o \ + $(HYBOBJS) +PROG= hybalign INCLUDES= LIBS= -lm -lz -lpthread SUBDIRS= . @@ -34,7 +36,7 @@ endif all:$(PROG) -bwa:libbwa.a $(AOBJS) main.o +$(PROG):libbwa.a $(AOBJS) main.o $(CC) $(CFLAGS) $(LDFLAGS) $(AOBJS) main.o -o $@ -L. -lbwa $(LIBS) bwamem-lite:libbwa.a example.o diff --git a/bwa.c b/bwa.c index 9f05523..e7b571c 100644 --- a/bwa.c +++ b/bwa.c @@ -280,7 +280,7 @@ bwt_t *bwa_idx_load_bwt(const char *hint) tmp = calloc(strlen(prefix) + 5, 1); strcat(strcpy(tmp, prefix), ".bwt"); // FM-index bwt = bwt_restore_bwt(tmp); - strcat(strcpy(tmp, prefix), ".sa"); // partial suffix array (SA) + strcat(strcpy(tmp, prefix), ".32.sa"); // partial suffix array (SA) bwt_restore_sa(tmp, bwt); free(tmp); free(prefix); return bwt; diff --git a/bwt.c b/bwt.c index 9083654..e89b2e4 100644 --- a/bwt.c +++ b/bwt.c @@ -467,3 +467,216 @@ void bwt_destroy(bwt_t *bwt) free(bwt->sa); free(bwt->bwt); free(bwt); } + +////////////////////////////////// new funcs for hyb-align //////////////////////////////// + +// 设置某一行的排序值,sa的索引有效值从1开始,(0设置为-1, 小端模式) +void bwt_set_sa(uint8_t* sa_arr, bwtint_t k, bwtint_t val) { + // const bwtint_t block_idx = (k >> 3) * 33; + const bwtint_t block_idx = ((k >> 3) << 5) + (k >> 3); // 8个数为一组,共享33个字节 + const int val_idx_in_block = k & 7; + const bwtint_t start_byte_idx = block_idx + (val_idx_in_block << 2); + bwtint_t* sa_addr = (bwtint_t*)(sa_arr + start_byte_idx); + // *sa_addr &= (1 << val_idx_in_block) - 1; // + // 如果开辟内存的时候清零了,这一步可以省略,会清除后面的数据,只适合按递增顺序赋值 + *sa_addr |= (val & ((1L << 33) - 1)) << val_idx_in_block; +} + +// 获取某一行的排序值(小端模式) +bwtint_t bwt_get_sa(uint8_t* sa_arr, bwtint_t k) { + // const bwtint_t block_idx = (k >> 3) * 33; + const bwtint_t block_idx = ((k >> 3) << 5) + (k >> 3); // 8个数为一组,共享33个字节 + const int val_idx_in_block = k & 7; + const bwtint_t start_byte_idx = block_idx + (val_idx_in_block << 2); + bwtint_t val = *(bwtint_t*)(sa_arr + start_byte_idx); + val = (val >> val_idx_in_block) & 8589934591; + return val; +} + +/* + * bwtint_t bwt_get_sa(uint8_t* sa_arr, bwtint_t k) { + * const bwtint_t start_byte = ((k << 5) + k) >> 3; // 存储这个sa数据的起始字节 + * bwtint_t val = *(bwtint_t*)(sa_arr + start_byte); + * val = (val >> (k & 7)) & 8589934591; + * return val; + * } + */ + +// 第一次创建索引时,创建不采样的SA(24G)和采样间隔为4的SA(6G) +// bwt->bwt and bwt->occ must be precalculated +void bwt_cal_byte_sa(bwt_t* bwt) { + bwtint_t isa, sa, i, block_size; // S(isa) = sa isa是后缀数组的索引,sa表示位置 + double tmp_time, elapsed_time; + int intv = 1; + int intv_round = intv; // 间隔多少来保存一个位置信息 + + kv_roundup32(intv_round); + xassert(intv_round == intv, "SA sample interval is not a power of 2."); + xassert(bwt->bwt, "bwt_t::bwt is not initialized."); + + if (bwt->byte_sa) + free(bwt->byte_sa); + bwt->sa_intv = intv; + bwt->n_sa = (bwt->seq_len + intv) / intv; + bwt->byte_sa = (uint8_t*)calloc(SA_BYTES(bwt->n_sa), 1); // 用33位表示位置 + + fprintf(stderr, "bytes: %ld, sa size: %ld\n", SA_BYTES(bwt->n_sa), bwt->n_sa); + // calculate SA value + isa = 0; + sa = bwt->seq_len; + block_size = bwt->seq_len / 100; + tmp_time = realtime(); + for (i = 0; i < bwt->seq_len; ++i) { + if (i % block_size == 0) { + elapsed_time = realtime() - tmp_time; + fprintf(stderr, "%ld%% percent complished. %f s elapsed.\n", i / block_size, elapsed_time); + } + if (isa % intv == 0) { + bwt_set_sa(bwt->byte_sa, isa / intv, sa); // 第一个位置是$,所以位置就是序列长度 + if (i % (block_size / 2) == 0) { + fprintf(stderr, "%ld %ld\n", sa, bwt_get_sa(bwt->byte_sa, isa / intv)); // 验证结果正确性 + } + } + --sa; // 从后往前,一个位置一个位置的找对应的后缀数组,isa就是与sa对应的后缀数组排序后在sa数组中的相对位置 + isa = bwt_invPsi(bwt, isa); // 下一个后缀数组的相对位置 + } + if (isa % intv == 0) + bwt_set_sa(bwt->byte_sa, isa / intv, sa); + // bwt_set_sa(bwt->byte_sa, 0, (bwtint_t)-1); // 赋值成-1也没问题,set_sa那里已经修正了 + bwt_set_sa(bwt->byte_sa, 0, 8589934591); // 2的33次方-1 before this line, bwt->sa[0] = bwt->seq_len +} + +// 保存byte_sa +void bwt_dump_byte_sa(const char* fn, const bwt_t* bwt) { + FILE* fp; + fp = xopen(fn, "wb"); + err_fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp); + err_fwrite(bwt->L2 + 1, sizeof(bwtint_t), 4, fp); + err_fwrite(&bwt->sa_intv, sizeof(bwtint_t), 1, fp); + err_fwrite(&bwt->seq_len, sizeof(bwtint_t), 1, fp); + err_fwrite(bwt->byte_sa, 1, SA_BYTES(bwt->n_sa), fp); + err_fflush(fp); + err_fclose(fp); +} + +// 设置kmer第pos个碱基对应的fmt匹配信息 +void kmer_setval_at(uint8_t* mem_addr, bwtintv_t ik, int pos) { + int byte_idx = pos * 14; + uint8_t* arr = mem_addr + byte_idx; + arr[0] = (uint8_t)(ik.x[0] >> 32); + *((uint32_t*)(arr + 1)) = (uint32_t)ik.x[0]; + arr += 5; + arr[0] = (uint8_t)(ik.x[1] >> 32); + *((uint32_t*)(arr + 1)) = (uint32_t)ik.x[1]; + arr += 5; + *((uint32_t*)arr) = (uint32_t)ik.x[2]; +} + +// 获取kmer的bwt匹配信息 +void kmer_getval_at(uint8_t* mem_addr, bwtintv_t* ok, int pos) { + bwtint_t x0, x1, x2; + int byte_idx = pos * 14; + uint8_t* arr = mem_addr + byte_idx; + x0 = *arr; + x0 = (x0 << 32) | *((uint32_t*)(arr + 1)); + arr += 5; + x1 = *arr; + x1 = (x1 << 32) | *((uint32_t*)(arr + 1)); + arr += 5; + x2 = *((uint32_t*)arr); + ok->x[0] = x0; + ok->x[1] = x1; + ok->x[2] = x2; +} + +// 获取kmer对应的fmt匹配信息, pos should be [0, 13], qbit一定是14个碱基对应的编码, pos是指在14个碱基中的位置 +void bwt_kmer_get(const KmerHash* kmer_hash, bwtintv_t* ok, uint32_t qbit, int pos) { +#if HASH_KMER_LEN == 14 + if (pos == 13) + kmer_getval_at(kmer_hash->ke14[qbit].intv_arr, ok, 0); + else if (pos == 12) + kmer_getval_at(kmer_hash->ke13[qbit >> 2].intv_arr, ok, 0); + else if (pos == 11) + kmer_getval_at(kmer_hash->ke12[qbit >> 4].intv_arr, ok, 0); + else if (pos == 10) // 其实对应kmer长度为10 + kmer_getval_at(kmer_hash->ke11[qbit >> 6].intv_arr, ok, 0); + else + kmer_getval_at(kmer_hash->ke10[qbit >> 8].intv_arr, ok, pos); +#elif HASH_KMER_LEN == 13 + if (pos == 12) + kmer_getval_at(kmer_hash->ke13[qbit].intv_arr, ok, 0); + else if (pos == 11) + kmer_getval_at(kmer_hash->ke12[qbit >> 2].intv_arr, ok, 0); + else if (pos == 10) + kmer_getval_at(kmer_hash->ke11[qbit >> 4].intv_arr, ok, 0); + else + kmer_getval_at(kmer_hash->ke10[qbit >> 6].intv_arr, ok, pos); +#else + if (pos == 11) + kmer_getval_at(kmer_hash->ke12[qbit].intv_arr, ok, 0); + else if (pos == 10) + kmer_getval_at(kmer_hash->ke11[qbit >> 2].intv_arr, ok, 0); + else + kmer_getval_at(kmer_hash->ke10[qbit >> 4].intv_arr, ok, pos); +#endif +} + +// 只是用来计算kmer-index,所以q中只包含A/C/G/T,不包含N +bwtintv_t bwt_forward_search(bwt_t* bwt, const uint8_t* q, int qlen) { + bwtintv_t ik, ok[4]; + int i, c1; + bwt_set_intv(bwt, q[0], ik); + // 每次扩展一个碱基 + for (i = 1; i < qlen; ++i) { + c1 = 3 - q[i]; + bwt_extend(bwt, &ik, ok, 0); + ik = ok[c1]; + } + ik.info = qlen; + return ik; +} + +// 将kmer hash数据写入到文件 +void bwt_dump_kmer_idx(const char* fn, const KmerHash* kh) { + FILE* fp; + fp = xopen(fn, "wb"); + err_fwrite(kh->ke10, 1, (1 << (10 << 1)) * sizeof(KmerEntryArr), fp); + err_fwrite(kh->ke11, 1, (1 << (11 << 1)) * sizeof(KmerEntry), fp); + err_fwrite(kh->ke12, 1, (1 << (12 << 1)) * sizeof(KmerEntry), fp); +#if HASH_KMER_LEN > 12 + err_fwrite(kh->ke13, 1, (1 << (13 << 1)) * sizeof(KmerEntry), fp); +#endif +#if HASH_KMER_LEN > 13 + err_fwrite(kh->ke14, 1, (1 << (14 << 1)) * sizeof(KmerEntry), fp); +#endif + err_fflush(fp); + err_fclose(fp); +} + +// 从文件中读取kmer hash信息 +KmerHash bwt_restore_kmer_idx(const char* fn) { + FILE* fp = xopen(fn, "rb"); + KmerHash khash; + KmerHash* kh = &khash; + +#define LOAD_KMER_HASH(kmer_len) \ + do { \ + len = 1 << (kmer_len << 1); \ + kh->ke##kmer_len = (KmerEntry*)malloc(len * sizeof(KmerEntry)); \ + fread_fix(fp, len * sizeof(KmerEntry), kh->ke##kmer_len); \ + } while (0) + + int len = 1 << (10 << 1); + kh->ke10 = (KmerEntryArr*)malloc(len * sizeof(KmerEntryArr)); + fread_fix(fp, len * sizeof(KmerEntryArr), kh->ke10); + LOAD_KMER_HASH(11); + LOAD_KMER_HASH(12); +#if HASH_KMER_LEN > 12 + LOAD_KMER_HASH(13); +#endif +#if HASH_KMER_LEN > 13 + LOAD_KMER_HASH(14); +#endif + err_fclose(fp); + return khash; +} \ No newline at end of file diff --git a/bwt.h b/bwt.h index daba87f..36af98e 100644 --- a/bwt.h +++ b/bwt.h @@ -45,6 +45,28 @@ typedef unsigned char ubyte_t; typedef uint64_t bwtint_t; +#define SA_BYTES(n_sa) ((((33 * (n_sa) + 7) / 8) & (~7L)) + 8) /* 8字节对齐*/ +#define HASH_KMER_LEN 14 + +// 用来保存kmer对应的bwt的位置信息 +typedef struct { + // 40+40+32 14个byte,这样好处理 + uint8_t intv_arr[14]; // 保存kmer中每扩展一个碱基对应的bwtintv_t数据 +} KmerEntry; + +typedef struct { + uint8_t intv_arr[140]; // 保存长度为10的kmer,每个碱基对应的bwt匹配信息 +} KmerEntryArr; + +// 保存各个位置对应的bwt匹配信息 +typedef struct { + KmerEntryArr* ke10; + KmerEntry* ke11; + KmerEntry* ke12; + KmerEntry* ke13; + KmerEntry* ke14; +} KmerHash; + typedef struct { bwtint_t primary; // S^{-1}(0), or the primary index of BWT bwtint_t L2[5]; // C(), cumulative count @@ -57,6 +79,8 @@ typedef struct { int sa_intv; bwtint_t n_sa; bwtint_t *sa; + uint8_t* byte_sa; // byte-based Suffix-Array + KmerHash kmer_hash; // 保存kmer对应的bwt位置信息 } bwt_t; typedef struct { @@ -125,8 +149,21 @@ extern "C" { int bwt_seed_strategy1(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_len, int max_intv, bwtintv_t *mem); + + // new funcs for hyb-align + void bwt_cal_byte_sa(bwt_t* bwt); + void bwt_set_sa(uint8_t* sa_arr, bwtint_t k, bwtint_t val); + bwtint_t bwt_get_sa(uint8_t* sa_arr, bwtint_t k); + void kmer_getval_at(uint8_t* mem_addr, bwtintv_t* ok, int pos); + void kmer_setval_at(uint8_t* mem_addr, bwtintv_t ik, int pos); + void bwt_kmer_get(const KmerHash* kmer_hash, bwtintv_t* ok, uint32_t qbit, int pos); + void bwt_dump_byte_sa(const char *fn, const bwt_t *bwt); + bwtintv_t bwt_forward_search(bwt_t* bwt, const uint8_t* q, int qlen); + void bwt_dump_kmer_idx(const char* fn, const KmerHash* kh); + KmerHash bwt_restore_kmer_idx(const char* fn); + #ifdef __cplusplus -} + } #endif #endif diff --git a/bwtindex.c b/bwtindex.c index 9da66e1..bdf1a73 100644 --- a/bwtindex.c +++ b/bwtindex.c @@ -269,6 +269,16 @@ int bwa_idx_build(const char *fa, const char *prefix, int algo_type, int block_s t = clock(); if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Pack FASTA... "); l_pac = bns_fasta2bntseq(fp, prefix, 0); + uint64_t seq_len = l_pac >> 1; + strcpy(str, prefix); strcat(str, ".ref-len"); + FILE* f_len = fopen(str, "w"); + fprintf(f_len, "%lu\n", seq_len); + fclose(f_len); + memset(str, 0, strlen(prefix) + 10); + + //fprintf(stderr, "len: %ld, %ld\n", l_pac, seq_len); + //exit(0); + if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); err_gzclose(fp); } diff --git a/debug.c b/debug.c new file mode 100644 index 0000000..95b9479 --- /dev/null +++ b/debug.c @@ -0,0 +1,66 @@ +/********************************************************************************************* + Description: data and files for debugging + Copyright : All right reserved by NCIC.ICT + + Author : Zhang Zhonghai + Date : 2024/04/09 +***********************************************************************************************/ +#include +#include "debug.h" + +FILE *gf[4] = {0}, + *gfq[4] = {0}, + *gft[4] = {0}, + *gfi[4] = {0}; + +uint64_t debug_num = 0; + +uint64_t all_match_len = 0; +uint64_t all_seq_num = 0; +uint64_t all_type_hits = 0; +double seed_time = 0; + +int open_qti_files() +{ + char fn[1024] = {0}; + int i = 0; + for (; i < 4; ++i) + { + sprintf(fn, "./output/q%d.txt", i); + gfq[i] = fopen(fn, "w"); + sprintf(fn, "./output/t%d.txt", i); + gft[i] = fopen(fn, "w"); + sprintf(fn, "./output/i%d.txt", i); + gfi[i] = fopen(fn, "w"); + } + return 0; +} + +int open_debug_files() +{ + char fn[1024] = {0}; + int i = 0; + for (; i < 4; ++i) + { + sprintf(fn, "./output/fp%d.txt", i); + gf[i] = fopen(fn, "w"); + } + return 0; +} + +int close_files() +{ + int i = 0; + for (; i < 4; ++i) + { + if (gf[i] != 0) + fclose(gf[i]); + if (gfq[i] != 0) + fclose(gfq[i]); + if (gft[i] != 0) + fclose(gft[i]); + if (gfi[i] != 0) + fclose(gfi[i]); + } + return 0; +} \ No newline at end of file diff --git a/debug.h b/debug.h new file mode 100644 index 0000000..68e8a3b --- /dev/null +++ b/debug.h @@ -0,0 +1,35 @@ +/********************************************************************************************* + Description: data and files for debugging + Copyright : All right reserved by NCIC.ICT + + Author : Zhang Zhonghai + Date : 2024/04/09 +***********************************************************************************************/ +#include +#include + +////////////////// for debug and test ////////////////////////// + +#define DEBUG_FILE_OUTPUT // 打开gfp1-4文件,并记录debug信息 +// #define COUNT_SEED_LENGTH // 记录seed匹配数量降低到1时的长度,以及最终扩展的长度 +// #define GET_FULL_MATCH_READ // 获取完全匹配的reads +// #define COUNT_CALC_NUM // 统计BSW的剪枝后的计算量和未剪枝前的计算量 +// #define GET_DIFFERENT_EXTENSION_LENGTH // 获取不同长度extension的query,target,和其他用于计算的数据 +// #define GET_KSW_ALIGN_SEQ +// #define DEBUG_SW_EXTEND // 将bsw的分值输入到debug文件里 + +//////////////////////////////////////////////////////////////// + +extern FILE *gf[4], *gfq[4], *gft[4], *gfi[4]; + +extern uint64_t debug_num; +extern uint64_t all_match_len; +extern uint64_t all_seq_num; +extern uint64_t all_type_hits; +extern double seed_time; + +int open_qti_files(); + +int open_debug_files(); + +int close_files(); \ No newline at end of file diff --git a/hyb_bwa.c b/hyb_bwa.c new file mode 100644 index 0000000..028933f --- /dev/null +++ b/hyb_bwa.c @@ -0,0 +1,265 @@ +/* The MIT License + + Copyright (c) 2025- Zhonghai Zhang (ICT) + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +#include +#include +#include +#include +#include +#include + +#include "bntseq.h" +#include "bwa.h" +#include "bwt.h" +#include "rle.h" +#include "rope.h" +#include "utils.h" +#include "kvec.h" +#include "hyb_idx.h" + + +#ifdef _DIVBWT +#include "divsufsort.h" +#endif + +#ifdef USE_MALLOC_WRAPPERS +#include "malloc_wrap.h" +#endif + +int bwa_bwt2fullbytesa(int argc, char* argv[]) // the "bwt2bytesa" command +{ + bwt_t* bwt; + if (optind + 2 > argc) { + fprintf(stderr, "Usage: %s bwt2bytesa \n", PROG_NAME); + return 1; + } + bwt = bwt_restore_bwt(argv[optind]); + bwt_cal_byte_sa(bwt); + _store_data_to_file(argv[optind + 1], bwt->byte_sa, sizeof(bwtint_t), SA_BYTES(bwt->n_sa) >> 3); + return 0; +} + +// extract suffix array from non-sampled suffix array +int bwa_extract_sa(int argc, char* argv[]) { + bwt_t* bwt; + int c, sa_intv = 32; + while ((c = getopt(argc, argv, "i:")) >= 0) { + switch (c) { + case 'i': + sa_intv = atoi(optarg); + break; + default: + return 1; + } + } + if (optind + 3 > argc) { + fprintf(stderr, "Usage: bwa extractsa [-i %d] \n", sa_intv); + return 1; + } + bwt = bwt_restore_bwt(argv[optind]); + + fprintf(stderr, "bwt seq_len %ld, intv: %d\n", bwt->seq_len, sa_intv); + //exit(0); + + _load_file_to_data(argv[optind + 1], bwt->byte_sa); + + if (bwt->sa) free(bwt->sa); + bwt->sa_intv = sa_intv; + bwt->n_sa = (bwt->seq_len + sa_intv) / sa_intv; + bwt->sa = (bwtint_t*)calloc(bwt->n_sa, sizeof(bwtint_t)); + uint64_t i; + for (i = 0; i < bwt->seq_len; ++i) { + if (i % sa_intv == 0) + bwt->sa[i / sa_intv] = bwt_get_sa(bwt->byte_sa, i); + } + if (i % sa_intv == 0) + bwt->sa[i / sa_intv] = bwt_get_sa(bwt->byte_sa, i); + bwt->sa[0] = (bwtint_t)-1; // before this line, bwt->sa[0] = bwt->seq_len + // bwt_cal_sa(bwt, sa_intv); + bwt_dump_sa(argv[optind + 2], bwt); + // bwt_destroy(bwt); + return 0; +} + +// extract byte suffix array from non-sampled suffix array +int bwa_extract_byte_sa(int argc, char* argv[]) { + bwt_t* bwt; + int c, sa_intv = 32; + while ((c = getopt(argc, argv, "i:")) >= 0) { + switch (c) { + case 'i': + sa_intv = atoi(optarg); + break; + default: + return 1; + } + } + if (optind + 3 > argc) { + fprintf(stderr, "Usage: bwa extractbytesa [-i %d] \n", sa_intv); + return 1; + } + bwt = bwt_restore_bwt(argv[optind]); + + // fprintf(stderr, "bwt seq_len %ld, intv: %d\n", bwt->seq_len, sa_intv); + // exit(0); + + uint8_t *full_byte_sa = NULL; + _load_file_to_data(argv[optind + 1], full_byte_sa); + + bwt->sa_intv = sa_intv; + bwt->n_sa = (bwt->seq_len + sa_intv) / sa_intv; + bwt->byte_sa = (uint8_t*)calloc(SA_BYTES(bwt->n_sa), 1); + uint64_t i; + for (i = 0; i < bwt->seq_len; ++i) { + if (i % sa_intv == 0) + bwt_set_sa(bwt->byte_sa, i / sa_intv, bwt_get_sa(full_byte_sa, i)); + } + if (i % sa_intv == 0) + bwt_set_sa(bwt->byte_sa, i / sa_intv, bwt_get_sa(full_byte_sa, i)); + bwt_set_sa(bwt->byte_sa, 0, 8589934591); // before this line, bwt->sa[0] = bwt->seq_len + bwt_dump_byte_sa(argv[optind + 2], bwt); + // bwt_destroy(bwt); + return 0; +} + +// 生成所有KMER_LEN长度的序列,字节表示(0A,1C,2G,3T) +void gen_kmer_bits(uint8_t** seq_arr, uint64_t* kmer_arr_size, int kmer_len) { + uint64_t i; + uint64_t seq_up_val = (1L << (kmer_len << 1)); + *kmer_arr_size = seq_up_val; + *seq_arr = (uint8_t*)realloc(*seq_arr, seq_up_val * (uint64_t)kmer_len); + for (i = 0; i < seq_up_val; ++i) { + const uint64_t base_idx = i * kmer_len; + for (int j = kmer_len - 1; j >= 0; --j) { + (*seq_arr)[base_idx + kmer_len - 1 - j] = (i >> (j << 1)) & 3; + } + } +} + +void bwt_create_kmer_idx(bwt_t* bwt) { + uint64_t kmer_arr_size = 0; + uint8_t* seq_arr = 0; + bwtintv_t ik, ok[4]; + uint64_t j; + int i, c1; + int qlen = 10; + KmerHash* kh = &bwt->kmer_hash; + + kh->ke10 = (KmerEntryArr*)malloc((1 << (10 << 1)) * sizeof(KmerEntryArr)); + kh->ke11 = (KmerEntry*)malloc((1 << (11 << 1)) * sizeof(KmerEntry)); + kh->ke12 = (KmerEntry*)malloc((1 << (12 << 1)) * sizeof(KmerEntry)); + kh->ke13 = (KmerEntry*)malloc((1 << (13 << 1)) * sizeof(KmerEntry)); + kh->ke14 = (KmerEntry*)malloc((1 << (14 << 1)) * sizeof(KmerEntry)); + + // 长度为10的kmer + gen_kmer_bits(&seq_arr, &kmer_arr_size, 10); + for (j = 0; j < kmer_arr_size; ++j) { + uint8_t* q = &seq_arr[j * 10]; + uint8_t* mem_addr = kh->ke10[j].intv_arr; + bwt_set_intv(bwt, q[0], ik); + kmer_setval_at(mem_addr, ik, 0); + // 每次扩展一个碱基 + for (i = 1; i < qlen; ++i) { + c1 = 3 - q[i]; + bwt_extend(bwt, &ik, ok, 0); + ik = ok[c1]; + kmer_setval_at(mem_addr, ik, i); + } + } + fprintf(stderr, "Kmer idx for length 10 done.\n"); +#define GEN_KMER_FOR_LEN(len) \ + do { \ + gen_kmer_bits(&seq_arr, &kmer_arr_size, (len)); \ + for (j = 0; j < kmer_arr_size; ++j) { \ + bwtintv_t p = bwt_forward_search(bwt, &seq_arr[j * (len)], (len)); \ + kmer_setval_at(bwt->kmer_hash.ke##len[j].intv_arr, p, 0); \ + } \ + fprintf(stderr, "Kmer idx for length %d done.\n", len); \ + } while (0) + + GEN_KMER_FOR_LEN(11); // 长度11的kmer + GEN_KMER_FOR_LEN(12); + GEN_KMER_FOR_LEN(13); + GEN_KMER_FOR_LEN(14); + free(seq_arr); +} + +int bwa_bwt2kmer(int argc, char* argv[]) { + bwt_t* bwt; + if (optind + 2 > argc) { + fprintf(stderr, "Usage: bwa extractbytesa \n"); + return 1; + } + bwt = bwt_restore_bwt(argv[optind]); + + bwt_create_kmer_idx(bwt); + + bwt_dump_kmer_idx(argv[optind + 1], &bwt->kmer_hash); + + return 0; +} + +int bwa_bwt2hyb(int argc, char* argv[]) {} + +// 创建正向的kmer +inline uint64_t build_forward_kmer(const uint8_t* q, int qlen, int kmer_len, int* base_consumed) { + uint64_t qbit = 0, i; + qlen = qlen < kmer_len ? qlen : kmer_len; + for (i = 0; i < qlen; ++i) { + if (q[i] > 3) + break; // 要考虑碱基是N + qbit |= (uint64_t)q[i] << ((kmer_len - 1 - i) << 1); + } + *base_consumed = i; + return qbit; +} + +int hyb_test(int argc, char* argv[]) { + bwt_t* bwt; + if (optind + 1 > argc) { + fprintf(stderr, "Usage: bwa hybtest \n"); + return 1; + } + + char fn[MAX_PATH]; + snprintf(fn, MAX_PATH, "%s.bwt", argv[optind]); + + bwt = bwt_restore_bwt(fn); + snprintf(fn, MAX_PATH, "%s.hyb.kmer", argv[optind]); + bwt->kmer_hash = bwt_restore_kmer_idx(fn); + + uint8_t test_seq[14] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1}; // ACGTACGTACGTAC + bwtintv_t ok1, ok2; + ok1 = bwt_forward_search(bwt, test_seq, 14); + + int base_consumed = 0; + uint64_t qbit = build_forward_kmer(test_seq, 14, 14, &base_consumed); + + bwt_kmer_get(&bwt->kmer_hash, &ok2, qbit, 13); + + fprintf(stderr, "forward search: [%lu, %lu, %lu), kmer idx: [%lu, %lu, %lu)\n", ok1.x[0], ok1.x[1], ok1.x[2], ok2.x[0], ok2.x[1], ok2.x[2]); + + return 0; +} \ No newline at end of file diff --git a/bwa_hyb.c b/hyb_create_idx.c similarity index 100% rename from bwa_hyb.c rename to hyb_create_idx.c diff --git a/hyb_idx.h b/hyb_idx.h new file mode 100644 index 0000000..eb43c05 --- /dev/null +++ b/hyb_idx.h @@ -0,0 +1,11 @@ +#pragma once + +#include +#include +#include +#include + +#include "debug.h" +#include "kvec.h" +#include "utils.h" + diff --git a/hyb_seeding_1.c b/hyb_seeding_1.c new file mode 100644 index 0000000..e69de29 diff --git a/hyb_seeding_2.c b/hyb_seeding_2.c new file mode 100644 index 0000000..e69de29 diff --git a/hyb_seeding_3.c b/hyb_seeding_3.c new file mode 100644 index 0000000..e69de29 diff --git a/hyb_utils.c b/hyb_utils.c new file mode 100644 index 0000000..e69de29 diff --git a/main.c b/main.c index 9a21e03..33decfa 100644 --- a/main.c +++ b/main.c @@ -54,8 +54,12 @@ int main_pemerge(int argc, char *argv[]); int main_maxk(int argc, char *argv[]); int bwa_bwt2kmer(int argc, char* argv[]); // create kmer-index from bwt -int bwa_bwt2bytesa(int argc, char* argv[]); // create byte-based Suffix-Array +int bwa_bwt2fullbytesa(int argc, char* argv[]); // create full byte-based Suffix-Array int bwa_bwt2hyb(int argc, char* argv[]); // create hybrid-index +int bwa_extract_sa(int argc, char* argv[]); // extract suffix array from non-sampled suffix array +int bwa_extract_byte_sa(int argc, char* argv[]); // extract suffix array from non-sampled suffix array + +int hyb_test(int argc, char* argv[]); // for test static int usage() { @@ -64,24 +68,26 @@ static int usage() fprintf(stderr, "Version: %s\n", PACKAGE_VERSION); fprintf(stderr, "Contact: Heng Li \n\n"); fprintf(stderr, "Usage: bwa [options]\n\n"); - fprintf(stderr, "Command: index index sequences in the FASTA format\n"); - fprintf(stderr, " mem BWA-MEM algorithm\n"); - fprintf(stderr, " fastmap identify super-maximal exact matches\n"); - fprintf(stderr, " pemerge merge overlapping paired ends (EXPERIMENTAL)\n"); - fprintf(stderr, " aln gapped/ungapped alignment\n"); - fprintf(stderr, " samse generate alignment (single ended)\n"); - fprintf(stderr, " sampe generate alignment (paired ended)\n"); - fprintf(stderr, " bwasw BWA-SW for long queries (DEPRECATED)\n"); + fprintf(stderr, "Command: index index sequences in the FASTA format\n"); + fprintf(stderr, " mem BWA-MEM algorithm\n"); + fprintf(stderr, " fastmap identify super-maximal exact matches\n"); + fprintf(stderr, " pemerge merge overlapping paired ends (EXPERIMENTAL)\n"); + fprintf(stderr, " aln gapped/ungapped alignment\n"); + fprintf(stderr, " samse generate alignment (single ended)\n"); + fprintf(stderr, " sampe generate alignment (paired ended)\n"); + fprintf(stderr, " bwasw BWA-SW for long queries (DEPRECATED)\n"); fprintf(stderr, "\n"); - fprintf(stderr, " shm manage indices in shared memory\n"); - fprintf(stderr, " fa2pac convert FASTA to PAC format\n"); - fprintf(stderr, " pac2bwt generate BWT from PAC\n"); - fprintf(stderr, " pac2bwtgen alternative algorithm for generating BWT\n"); - fprintf(stderr, " bwtupdate update .bwt to the new format\n"); - fprintf(stderr, " bwt2sa generate SA from BWT and Occ\n"); - fprintf(stderr, " bwt2bytesa generate SA(using byte array) from BWT and Occ\n"); - fprintf(stderr, " bwt2kmer generate kmer hash index from bwt to accelarate the first 14 bases in seeding process.\n"); - fprintf(stderr, " bwt2hyb generate hybrid index from BWT\n"); + fprintf(stderr, " shm manage indices in shared memory\n"); + fprintf(stderr, " fa2pac convert FASTA to PAC format\n"); + fprintf(stderr, " pac2bwt generate BWT from PAC\n"); + fprintf(stderr, " pac2bwtgen alternative algorithm for generating BWT\n"); + fprintf(stderr, " bwtupdate update .bwt to the new format\n"); + fprintf(stderr, " bwt2sa generate SA from BWT and Occ\n"); + fprintf(stderr, " bwt2fullbytesa generate SA(using byte array) from BWT and Occ\n"); + fprintf(stderr, " bwt2kmer generate kmer hash index from bwt to accelarate the first 14 bases in seeding process.\n"); + fprintf(stderr, " bwt2hyb generate hybrid index from BWT\n"); + fprintf(stderr, " extractsa generate sa from full byte suffix array\n"); + fprintf(stderr, " extractbytesa generate byte sa from full byte suffix array\n"); fprintf(stderr, "\n"); fprintf(stderr, "Note: To use BWA, you need to first index the genome with `bwa index'.\n" @@ -119,9 +125,12 @@ int main(int argc, char *argv[]) else if (strcmp(argv[1], "shm") == 0) ret = main_shm(argc-1, argv+1); else if (strcmp(argv[1], "pemerge") == 0) ret = main_pemerge(argc-1, argv+1); else if (strcmp(argv[1], "maxk") == 0) ret = main_maxk(argc-1, argv+1); - else if (strcmp(argv[1], "bwt2bytesa") == 0) ret = bwa_bwt2bytesa(argc - 1, argv + 1); + else if (strcmp(argv[1], "bwt2fullbytesa") == 0) ret = bwa_bwt2fullbytesa(argc - 1, argv + 1); else if (strcmp(argv[1], "bwt2kmer") == 0) ret = bwa_bwt2kmer(argc - 1, argv + 1); else if (strcmp(argv[1], "bwt2hyb") == 0) ret = bwa_bwt2hyb(argc - 1, argv + 1); + else if (strcmp(argv[1], "extractsa") == 0) ret = bwa_extract_sa(argc - 1, argv + 1); + else if (strcmp(argv[1], "extractbytesa") == 0) ret = bwa_extract_byte_sa(argc - 1, argv + 1); + else if (strcmp(argv[1], "hybtest") == 0) ret = hyb_test(argc - 1, argv + 1); else { fprintf(stderr, "[main] unrecognized command '%s'\n", argv[1]); return 1; diff --git a/profiling.c b/profiling.c new file mode 100644 index 0000000..d591721 --- /dev/null +++ b/profiling.c @@ -0,0 +1,221 @@ +/* +Description: profiling related data + +Copyright : All right reserved by ICT + +Author : Zhang Zhonghai +Date : 2024/04/06 +*/ + +#include +#include "utils.h" +#include "profiling.h" +#include "debug.h" + +#ifdef SHOW_PERF +uint64_t tprof[LIM_THREAD_PROF_TYPE][LIM_THREAD] = {0}; +uint64_t proc_freq = 1000; +uint64_t gprof[LIM_GLOBAL_PROF_TYPE] = {0}; +#endif + +#ifdef SHOW_DATA_PERF +/* +tdat[0]: read nums +tdat[1]: seed-1 full match nums +*/ +int64_t tdat[LIM_THREAD_DATA_TYPE][LIM_THREAD] = {0}; +int64_t t_sd[LIM_THREAD_DATA_TYPE][LIM_THREAD] = {0}; +int64_t gdat[LIM_GLOBAL_DATA_TYPE] = {0}; +int64_t gd1[LIM_GLOBAL_DATA_TYPE] = {0}; + +#endif + +int64_t sum(int64_t *a, int len) { + int64_t res = 0; + int i = 0; + for (i=0; i umax) umax = a[i]; + if (a[i] < umin) umin = a[i]; + uavg += a[i]; + } + *avg = uavg * 1.0 / len / proc_freq; + *max = umax * 1.0 / proc_freq; + *min = umin * 1.0 / proc_freq; + return 1; +} + +uint64_t get_sum(uint64_t *a, int len) { + int i = 0; + uint64_t all = 0; + for (i = 0; i < len; i++) { + all += a[i]; + } + return all; +} + +int display_stats(int nthreads) +{ +#ifdef SHOW_PERF + double avg, max, min; + int i; + + fprintf(stderr, "[steps in main_mem]\n"); + fprintf(stderr, "time_parse_arg: %0.2lf s\n", gprof[G_PREPARE] * 1.0 / proc_freq); + fprintf(stderr, "time_load_idx: %0.2lf s\n", gprof[G_LOAD_IDX] * 1.0 / proc_freq); + fprintf(stderr, "time_pipeline: %0.2lf s\n", gprof[G_PIPELINE] * 1.0 / proc_freq); + fprintf(stderr, "time_all: %0.2lf s\n", gprof[G_ALL] * 1.0 / proc_freq); + + fprintf(stderr, "\n[steps in pipeline]\n"); + fprintf(stderr, "time_read: %0.2lf s\n", gprof[G_READ] * 1.0 / proc_freq); + fprintf(stderr, "time_compute: %0.2lf s\n", gprof[G_COMPUTE] * 1.0 / proc_freq); + fprintf(stderr, "time_write: %0.2lf s\n", gprof[G_WRITE] * 1.0 / proc_freq); + + fprintf(stderr, "\n[steps in mem_process_seqs]\n"); + fprintf(stderr, "time_mem_prepare: %0.2lf s\n", gprof[G_MEM_PREPARE] * 1.0 / proc_freq); + fprintf(stderr, "time_mem_kernel: %0.2lf s\n", gprof[G_MEM_KERNEL] * 1.0 / proc_freq); + fprintf(stderr, "time_mem_pestat: %0.2lf s\n", gprof[G_MEM_PESTAT] * 1.0 / proc_freq); + fprintf(stderr, "time_mem_sam: %0.2lf s\n", gprof[G_MEM_SAM] * 1.0 / proc_freq); + + fprintf(stderr, "\n[steps in kernel]\n"); + find_opt(tprof[T_SEED_ALL], nthreads, &max, &min, &avg); + fprintf(stderr, "time_seed_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min); + find_opt(tprof[T_CHAIN_ALL], nthreads, &max, &min, &avg); + fprintf(stderr, "time_chain_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min); + find_opt(tprof[T_ALN_ALL], nthreads, &max, &min, &avg); + fprintf(stderr, "time_aln_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min); + find_opt(tprof[T_INS_SIZE], nthreads, &max, &min, &avg); + fprintf(stderr, "time_ins_size_all: %0.2lf (%0.2lf, %0.2lf) s\n", avg, max, min); + + fprintf(stderr, "\n[steps in seeding]\n"); + find_opt(tprof[T_SEED_1], nthreads, &max, &min, &avg); + fprintf(stderr, "time_seed_1: %0.2lf s %0.2lf s %0.2lf s\n", max, min, avg); + find_opt(tprof[T_SEED_2], nthreads, &max, &min, &avg); + fprintf(stderr, "time_seed_2: %0.2lf s\n", avg); + find_opt(tprof[T_SEED_3], nthreads, &max, &min, &avg); + fprintf(stderr, "time_seed_3: %0.2lf s\n", avg); + + fprintf(stderr, "\n[steps in chain]\n"); + find_opt(tprof[T_GEN_CHAIN], nthreads, &max, &min, &avg); + fprintf(stderr, "time_gen_chain: %0.2lf s\n", avg); + find_opt(tprof[T_FLT_CHAIN], nthreads, &max, &min, &avg); + fprintf(stderr, "time_flt_chain: %0.2lf s\n", avg); + find_opt(tprof[T_FLT_CHANNED_SEEDS], nthreads, &max, &min, &avg); + fprintf(stderr, "time_flt_chained_seeds: %0.2lf s\n", avg); + find_opt(tprof[T_SAL], nthreads, &max, &min, &avg); + fprintf(stderr, "time_sal: %0.2lf s\n", avg); + find_opt(tprof[T_BSW], nthreads, &max, &min, &avg); + fprintf(stderr, "time_bsw: %0.2lf s\n", avg); + + fprintf(stderr, "\n[steps in gen sam]\n"); + find_opt(tprof[T_SAM_MATESW], nthreads, &max, &min, &avg); + fprintf(stderr, "time_mate_sw: %0.2lf s\n", avg); + find_opt(tprof[T_KSW_ALIGN2], nthreads, &max, &min, &avg); + fprintf(stderr, "time_ksw_align2: %0.2lf s\n", avg); + find_opt(tprof[T_KSW_LOOP], nthreads, &max, &min, &avg); + fprintf(stderr, "time_ksw_loop: %0.2lf s\n", avg); + find_opt(tprof[T_KSW_REVERSE], nthreads, &max, &min, &avg); + fprintf(stderr, "time_ksw_reverse: %0.2lf s\n", avg); + find_opt(tprof[T_SAM_REG2ALN], nthreads, &max, &min, &avg); + fprintf(stderr, "time_reg2aln: %0.2lf s\n", avg); + + fprintf(stderr, "time_ksw_loop: %0.2lf s\n", gprof[G_KSW_LOOP] * 1.0 / proc_freq); + fprintf(stderr, "time_ksw_end_loop: %0.2lf s\n", gprof[G_KSW_END_LOOP] * 1.0 / proc_freq); + + fprintf(stderr, "seq num: %ld\n", gdat[0]); + fprintf(stderr, "full num: %ld\n", gdat[1]); + fprintf(stderr, "percent: %0.2lf%c\n", (double)gdat[1] / gdat[0] * 100, '%'); + + fprintf(stderr, "all_match_len: %ld\n", all_match_len); + fprintf(stderr, "all_seq_num: %ld\n", all_seq_num); + fprintf(stderr, "all_type_hits: %ld\n", all_type_hits); + fprintf(stderr, "seed_time: %f\n", seed_time); + fprintf(stderr, "all_match_len: %ld\n", get_sum(tprof[T_SEED_LEN], nthreads)); + +#define PRINT_SEED_TIME(mark) \ + find_opt(tprof[T_SEED_##mark], nthreads, &max, &min, &avg); \ + fprintf(stderr, "time_seed_%s: %0.2lf s %0.2lf s %0.2lf s\n", #mark, max, min, avg); + +#if 1 +// PRINT_SEED_TIME(1_ALL); +// PRINT_SEED_TIME(1_0); +// PRINT_SEED_TIME(1_1); +// PRINT_SEED_TIME(1_2); +// PRINT_SEED_TIME(1_3); + PRINT_SEED_TIME(1_3_1); +// PRINT_SEED_TIME(1_3_2); +// PRINT_SEED_TIME(1_3_3); +// PRINT_SEED_TIME(1_3_4); +// PRINT_SEED_TIME(1_3_5); +// PRINT_SEED_TIME(1_3_6); +// PRINT_SEED_TIME(1_3_7); +#endif +#if 1 +// PRINT_SEED_TIME(2_ALL); +// PRINT_SEED_TIME(2_0); +// PRINT_SEED_TIME(2_1); +// PRINT_SEED_TIME(2_2); + PRINT_SEED_TIME(2_2_0); +// PRINT_SEED_TIME(2_2_1); +// PRINT_SEED_TIME(2_2_2); +// PRINT_SEED_TIME(2_2_3); +#endif +#if 1 +// PRINT_SEED_TIME(3_ALL); +// PRINT_SEED_TIME(3_0); +// PRINT_SEED_TIME(3_1); +// PRINT_SEED_TIME(3_2); +// PRINT_SEED_TIME(3_3); + PRINT_SEED_TIME(3_3_0); +// PRINT_SEED_TIME(3_3_1); +// PRINT_SEED_TIME(3_3_2); +#endif + double all = 0; + for (i = 0; i < 50; ++i) { + //all += sum(tdat[i], nthreads); + // fprintf(stderr, "sum %d: %ld\n", i, sum(tdat[i], nthreads)); + } + for (i = 0; i < 50; ++i) { + //all += sum(tdat[i], nthreads); + // fprintf(stderr, "%d: %f\n", i, sum(tdat[i], nthreads) * 100 / all); + } +#if 0 + uint64_t b64 = 0, u64 = 0; + for (i = 0; i < 256; ++i) { + uint64_t s = sum(t_sd[i], nthreads); + if (i < 64) + b64 += s; + else + u64 += s; + fprintf(stderr, "addr %d: %ld\n", i, s); + } + fprintf(stderr, "b64 %ld; u64 %ld\n", b64, u64); +#endif + // fprintf(stderr, "sum 0: %ld\n", sum(tdat[TD_SEED_1_0], nthreads)); + // fprintf(stderr, "sum 1: %ld\n", sum(tdat[TD_SEED_1_1], nthreads)); + // fprintf(stderr, "sum 2: %ld\n", sum(tdat[TD_SEED_1_2], nthreads)); + // fprintf(stderr, "sum 3: %ld\n", sum(tdat[TD_SEED_1_3], nthreads)); + // fprintf(stderr, "sum 4: %ld\n", sum(tdat[TD_SEED_1_4], nthreads)); + // fprintf(stderr, "sum 5: %ld\n", sum(tdat[TD_SEED_1_5], nthreads)); + // int i; + // for (i=0; i + +#define USE_RDTSC 1 + +#define LIM_THREAD 128 +#define LIM_THREAD_PROF_TYPE 128 +#define LIM_GLOBAL_PROF_TYPE 128 +#define LIM_THREAD_DATA_TYPE 256 +#define LIM_GLOBAL_DATA_TYPE 256 + +#ifdef SHOW_PERF +extern uint64_t proc_freq; +extern uint64_t tprof[LIM_THREAD_PROF_TYPE][LIM_THREAD]; +extern uint64_t gprof[LIM_GLOBAL_PROF_TYPE]; +#endif + +#ifdef SHOW_DATA_PERF +extern int64_t tdat[LIM_THREAD_DATA_TYPE][LIM_THREAD]; +extern int64_t t_sd[LIM_THREAD_DATA_TYPE][LIM_THREAD]; +extern int64_t gdat[LIM_GLOBAL_DATA_TYPE]; +extern int64_t gd1[LIM_GLOBAL_DATA_TYPE]; +#endif + +#ifdef SHOW_PERF +#if USE_RDTSC +#define PROF_START(tmp_time) const uint64_t prof_tmp_##tmp_time = __rdtsc() +#define PROF_END(result, tmp_time) result += __rdtsc() - prof_tmp_##tmp_time +#else +#define PROF_START(tmp_time) uint64_t prof_tmp_##tmp_time = realtime_msec() +#define PROF_END(result, tmp_time) result += realtime_msec() - prof_tmp_##tmp_time +#endif +#else +#define PROF_START(tmp_time) +#define PROF_END(result, tmp_time) +#endif + +// tdat +enum { + TD_SEED_1_0 = 0, + TD_SEED_1_1, + TD_SEED_1_2, + TD_SEED_1_3, + TD_SEED_1_4, + TD_SEED_1_5, +}; + +// GLOBAL +enum { + G_ALL = 0, + G_PIPELINE, + G_READ, + G_WRITE, + G_COMPUTE, + G_PREPARE, + G_LOAD_IDX, + G_MEM_PREPARE, + G_MEM_KERNEL, + G_MEM_PESTAT, + G_MEM_SAM, + G_KSW_LOOP, + G_KSW_END_LOOP, + G_read_seq +}; + +// THREAD +enum { + T_SEED_ALL = 0, + T_CHAIN_ALL, + T_ALN_ALL, + T_INS_SIZE, + T_SEED_1, + T_SEED_2, + T_SEED_3, + T_SAL, + T_GEN_CHAIN, + T_FLT_CHAIN, + T_FLT_CHANNED_SEEDS, + T_READ_SA, + T_BSW, + T_BSW_ALL, + T_SAM_MATESW, + T_KSW_ALIGN2, + T_KSW_REVERSE, + T_SAM_REG2ALN, + T_KSW_LOOP, + T_SEED_LEN, + T_SEED_1_ALL, + T_SEED_1_0, + T_SEED_1_1, + T_SEED_1_2, + T_SEED_1_3, + T_SEED_1_3_1, + T_SEED_1_3_2, + T_SEED_1_3_3, + T_SEED_1_3_4, + T_SEED_1_3_5, + T_SEED_1_3_6, + T_SEED_1_3_7, + T_SEED_2_ALL, + T_SEED_2_0, + T_SEED_2_1, + T_SEED_2_2, + T_SEED_2_2_0, + T_SEED_2_2_1, + T_SEED_2_2_2, + T_SEED_2_2_3, + T_SEED_3_ALL, + T_SEED_3_0, + T_SEED_3_1, + T_SEED_3_2, + T_SEED_3_3, + T_SEED_3_3_0, + T_SEED_3_3_1, + T_SEED_3_3_2 +}; + +int display_stats(int); + +#endif diff --git a/rle.h b/rle.h index 4f8946d..ccdddde 100644 --- a/rle.h +++ b/rle.h @@ -4,10 +4,14 @@ #include #ifdef __GNUC__ +#ifndef LIKELY #define LIKELY(x) __builtin_expect((x),1) +#endif #else +#ifndef LIKELY #define LIKELY(x) (x) #endif +#endif #ifdef __cplusplus extern "C" { diff --git a/utils.h b/utils.h index d4d4550..995c9f5 100644 --- a/utils.h +++ b/utils.h @@ -27,8 +27,10 @@ #ifndef LH3_UTILS_H #define LH3_UTILS_H +#include #include #include +#include #include #ifdef __GNUC__ @@ -47,6 +49,71 @@ #define xassert(cond, msg) if ((cond) == 0) _err_fatal_simple_core(__func__, msg) +#define PROG_NAME "hybalign" + +///////////// added for hyb-align ///////////// + +#undef MAX +#undef MIN +#define MAX(x, y) ((x) > (y) ? (x) : (y)) +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +#undef MAX_PATH +#define MAX_PATH 2048 + +#ifdef __GNUC__ +#ifndef LIKELY +#define LIKELY(x) __builtin_expect((x), 1) +#endif +#ifndef UNLIKELY +#define UNLIKELY(x) __builtin_expect((x), 0) +#endif +#else +#ifndef LIKELY +#define LIKELY(x) (x) +#endif +#ifndef UNLIKELY +#define UNLIKELY(x) (x) +#endif +#endif + +#define err_check_true(ret_code, right_val) \ + if ((ret_code) != (right_val)) \ + err_fatal("err_check", "Value not right: True-Val %d\n", right_val) + +#define err_check_false(ret_code, err_val) \ + if ((ret_code) == (err_val)) \ + err_fatal("err_check", "Value not right: False-Val %d\n", err_val) + +#define _load_file_to_data(fn, data) \ + do { \ + FILE* fp = NULL; \ + struct stat st; \ + err_check_true(stat(fn, &st), 0); \ + fp = xopen(fn, "r"); \ + data = (uint8_t*)malloc(st.st_size); \ + err_fread_noeof(data, 1, st.st_size, fp); \ + err_fclose(fp); \ + } while (0) + +#define _load_file_to_data_ps(prefix, suffix, data) \ + do { \ + char fn[MAX_PATH]; \ + sprintf(fn, "%s%s", prefix, suffix); \ + _load_file_to_data(fn, data); \ + } while (0) + +#define _store_data_to_file(fn, data, ele_size, nums) \ + do { \ + FILE* fp; \ + fp = xopen(fn, "wb"); \ + err_fwrite(data, ele_size, nums, fp); \ + err_fflush(fp); \ + err_fclose(fp); \ + } while (0) + +///////////// end of added for hyb-align ///////////// + typedef struct { uint64_t x, y; } pair64_t;