diff --git a/.vscode/launch.json b/.vscode/launch.json index 7c5134d..d1ecb76 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -49,6 +49,18 @@ "~/data/reference/human_g1k_v37_decoy.fasta.kmer" ], "cwd": "${workspaceFolder}", // 当前工作路径:当前文件所在的工作空间 + }, + { + "name": "share mem", + "preLaunchTask": "Build", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceRoot}/fastbwa", + "args": [ + "shm", + "~/data1/fmt_ref/human_g1k_v37_decoy.fasta" + ], + "cwd": "${workspaceFolder}", // 当前工作路径:当前文件所在的工作空间 } ] } \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index f62b57e..ba57521 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -35,6 +35,11 @@ "bitset": "c", "iterator": "c", "memory": "c", - "__locale": "c" + "__locale": "c", + "stdint.h": "c", + "bntseq.h": "c", + "inttypes.h": "c", + "ertindex.h": "c", + "ertseeding.h": "c" } } \ No newline at end of file diff --git a/Makefile b/Makefile index fb6e41b..f5af91e 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,4 @@ CC= gcc -#CC= clang --analyze -# CFLAGS= -g -Wall -Wno-unused-function -O2 CFLAGS= -g -Wall -Wno-unused-function -mavx2 -O2 WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS @@ -18,7 +16,7 @@ AOBJS= bwashm.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \ bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \ bwtsw2_chain.o fastmap.o bwtsw2_pair.o profiling.o \ fmt_idx.o ksw_extend2_avx2.o ksw_extend2_avx2_u8.o \ - debug.o + debug.o ertindex.o ertseeding.o PROG= fastbwa INCLUDES= LIBS= -lm -lz -lpthread -ldl diff --git a/build_ert.sh b/build_ert.sh new file mode 100755 index 0000000..dbadb53 --- /dev/null +++ b/build_ert.sh @@ -0,0 +1,3 @@ +#!/bin/bash +time ./fastbwa bwt2ert ~/data1/fmt_ref/human_g1k_v37_decoy.fasta -t 64 +#time ./fastbwa bwt2ert ~/reference/human_g1k_v37_decoy.fasta -t 64 diff --git a/bwa.c b/bwa.c index a595973..1d6bb1c 100644 --- a/bwa.c +++ b/bwa.c @@ -30,18 +30,20 @@ #include #include #include +#include #include "bntseq.h" #include "bwa.h" #include "ksw.h" #include "utils.h" #include "kstring.h" #include "kvec.h" +#include "ertindex.h" #ifdef USE_MALLOC_WRAPPERS # include "malloc_wrap.h" #endif -int bwa_verbose = 3; +int bwa_verbose = 4; int bwa_dbg = 0; char bwa_rg_id[256]; char *bwa_pg; @@ -486,6 +488,87 @@ FMTIndex *bwa_idx_load_fmt(const char *hint) return fmt; } +bwaidx_t *bwa_ertidx_load_from_disk(const char *hint) +{ + bwaidx_t *idx = 0; + int i, c; + const char *prefix = hint; + if (prefix == 0) { + if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] fail to locate the index files\n", __func__); + return 0; + } + idx = calloc(1, sizeof(bwaidx_t)); + + idx->bns = bns_restore(prefix); + for (i = c = 0; i < idx->bns->n_seqs; ++i) + if (idx->bns->anns[i].is_alt) ++c; + if (bwa_verbose >= 3) + fprintf(stderr, "[M::%s] read %d ALT contigs\n", __func__, c); + + idx->pac = calloc(idx->bns->l_pac/4+1, 1); + err_fread_noeof(idx->pac, 1, idx->bns->l_pac/4+1, idx->bns->fp_pac); // concatenated 2-bit encoded sequence + err_fclose(idx->bns->fp_pac); + idx->bns->fp_pac = 0; + + // load ert + { + char kmer_tbl_file_name[PATH_MAX]; + char ml_tbl_file_name[PATH_MAX]; + sprintf(kmer_tbl_file_name, "%s.ert.kmer.table", prefix); + sprintf(ml_tbl_file_name, "%s.ert.mlt.table", prefix); + FILE *kmer_tbl_fd, *ml_tbl_fd; + kmer_tbl_fd = fopen(kmer_tbl_file_name, "rb"); + if (kmer_tbl_fd == NULL) { + fprintf(stderr, "[M::%s::ERT] Can't open k-mer index\n.", __func__); + exit(1); + } + ml_tbl_fd = fopen(ml_tbl_file_name, "rb"); + if (ml_tbl_fd == NULL) { + fprintf(stderr, "[M::%s::ERT] Can't open multi-level tree index\n.", __func__); + exit(1); + } + double ctime, rtime; + ctime = cputime(); + rtime = realtime(); + int64_t allocMem = numKmers * 8L; + + // + // Read k-mer index + // + idx->ert = (ERT *)calloc(1, sizeof(ERT)); + idx->ert->kmer_size = numKmers * sizeof(uint64_t); + idx->ert->kmer_offsets = (uint64_t *)malloc(idx->ert->kmer_size); + assert(idx->ert->kmer_offsets != NULL); + if (bwa_verbose >= 3) { + fprintf(stderr, "[M::%s::ERT] Reading kmer index to memory\n", __func__); + } + err_fread_noeof(idx->ert->kmer_offsets, sizeof(uint64_t), numKmers, kmer_tbl_fd); + + // + // Read multi-level tree index + // + fseek(ml_tbl_fd, 0L, SEEK_END); + long size = ftell(ml_tbl_fd); + idx->ert->mlt_size = size; + allocMem += size; + idx->ert->mlt_table = (uint8_t *)malloc(size * sizeof(uint8_t)); + assert(idx->ert->mlt_table != NULL); + fseek(ml_tbl_fd, 0L, SEEK_SET); + if (bwa_verbose >= 3) { + fprintf(stderr, "[M::%s::ERT] Reading multi-level tree index to memory\n", __func__); + } + err_fread_noeof(idx->ert->mlt_table, sizeof(uint8_t), size, ml_tbl_fd); + fclose(kmer_tbl_fd); + fclose(ml_tbl_fd); + if (bwa_verbose >= 3) + { + fprintf(stderr, "[M::%s::ERT] Index tables loaded in %.3f CPU sec, %.3f real sec...\n", __func__, cputime() - ctime, realtime() - rtime); + } + } + + return idx; +} + bwaidx_t *bwa_idx_load_from_disk(const char *hint, int which) { bwaidx_t *idx; @@ -497,9 +580,14 @@ bwaidx_t *bwa_idx_load_from_disk(const char *hint, int which) } idx = calloc(1, sizeof(bwaidx_t)); - if (which & BWA_IDX_BWT) idx->fmt = bwa_idx_load_fmt(hint); - //if (which & BWA_IDX_BWT) idx->bwt = bwa_idx_load_bwt(hint); - //idx->bwt->kmer_hash = idx->fmt->kmer_hash; + + if (which & BWA_IDX_FMT) idx->fmt = bwa_idx_load_fmt(hint); + if (which & BWA_IDX_BWT) + { + idx->bwt = bwa_idx_load_bwt(hint); + if (which & BWA_IDX_FMT) + idx->bwt->kmer_hash = idx->fmt->kmer_hash; + } if (which & BWA_IDX_BNS) { @@ -515,8 +603,10 @@ bwaidx_t *bwa_idx_load_from_disk(const char *hint, int which) err_fclose(idx->bns->fp_pac); idx->bns->fp_pac = 0; // 赋值到fmt中对应的pac - idx->fmt->l_pac = idx->bns->l_pac; - idx->fmt->pac = idx->pac; + if (which & BWA_IDX_FMT) { + idx->fmt->l_pac = idx->bns->l_pac; + idx->fmt->pac = idx->pac; + } } } free(prefix); @@ -550,8 +640,8 @@ int bwa_mem2idx(int64_t l_mem, uint8_t *mem, bwaidx_t *idx) // generate idx->bwt x = sizeof(bwt_t); idx->bwt = malloc(x); memcpy(idx->bwt, mem + k, x); k += x; x = idx->bwt->bwt_size * 4; idx->bwt->bwt = (uint32_t*)(mem + k); k += x; - x = SA_BYTES(idx->bwt->n_sa); idx->bwt->sa = (bwtint_t*)(mem + k); k += x; - + x = idx->bwt->n_sa * sizeof(bwtint_t); idx->bwt->sa = (bwtint_t*)(mem + k); k += x; + // generate idx->bns and idx->pac x = sizeof(bntseq_t); idx->bns = malloc(x); memcpy(idx->bns, mem + k, x); k += x; x = idx->bns->n_holes * sizeof(bntamb1_t); idx->bns->ambs = (bntamb1_t*)(mem + k); k += x; @@ -567,6 +657,176 @@ int bwa_mem2idx(int64_t l_mem, uint8_t *mem, bwaidx_t *idx) return 0; } +static void mem_to_bnspac(bwaidx_t *idx, uint8_t **mem_, int64_t *k_) +{ + int i; + int64_t x, k = *k_; + uint8_t *mem = *mem_; + // generate idx->bns and idx->pac + x = sizeof(bntseq_t); idx->bns = malloc(x); memcpy(idx->bns, mem + k, x); k += x; + x = idx->bns->n_holes * sizeof(bntamb1_t); idx->bns->ambs = (bntamb1_t*)(mem + k); k += x; + x = idx->bns->n_seqs * sizeof(bntann1_t); idx->bns->anns = malloc(x); memcpy(idx->bns->anns, mem + k, x); k += x; + for (i = 0; i < idx->bns->n_seqs; ++i) { + idx->bns->anns[i].name = (char*)(mem + k); k += strlen(idx->bns->anns[i].name) + 1; + idx->bns->anns[i].anno = (char*)(mem + k); k += strlen(idx->bns->anns[i].anno) + 1; + } + idx->pac = (uint8_t*)(mem + k); k += idx->bns->l_pac/4+1; + *k_ = k; + *mem_ = mem; +} + +int bwa_mem2fmtidx(int64_t l_mem, uint8_t *mem, bwaidx_t *idx) +{ + int64_t k = 0, x; + // generate idx->fmt + x = sizeof(FMTIndex); idx->fmt = malloc(x); memcpy(idx->fmt, mem + k, x); k += x; + x = idx->fmt->bwt_size * 4; idx->fmt->bwt = (uint32_t*)(mem + k); k += x; + x = SA_BYTES(idx->fmt->n_sa); idx->fmt->sa = (uint8_t*)(mem + k); k += x; + // kmer hash + x = (1 << (10 << 1)) * sizeof(KmerEntryArr); + idx->fmt->kmer_hash.ke10 = (KmerEntryArr*)(mem + k); k += x; + x = (1 << (11 << 1)) * sizeof(KmerEntry); + idx->fmt->kmer_hash.ke11 = (KmerEntry*)(mem + k); k += x; + x = (1 << (12 << 1)) * sizeof(KmerEntry); + idx->fmt->kmer_hash.ke12 = (KmerEntry*)(mem + k); k += x; +#if HASH_KMER_LEN > 12 + x = (1 << (13 << 1)) * sizeof(KmerEntry); + idx->fmt->kmer_hash.ke13 = (KmerEntry*)(mem + k); k += x; +#endif +#if HASH_KMER_LEN > 13 + x = (1 << (14 << 1)) * sizeof(KmerEntry); + idx->fmt->kmer_hash.ke14 = (KmerEntry*)(mem + k); k += x; +#endif + + // generate idx->bns and idx->pac + mem_to_bnspac(idx, &mem, &k); + assert(k == l_mem); + idx->l_mem = k; idx->mem = mem; + idx->fmt->pac = idx->pac; + return 0; +} + +int bwa_mem2ertidx(int64_t l_mem, uint8_t *mem, bwaidx_t *idx) +{ + int64_t k = 0, x; + // generate idx->ert + x = sizeof(ERT); idx->ert = malloc(x); memcpy(idx->ert, mem + k, x); k += x; + x = idx->ert->mlt_size; idx->ert->mlt_table = (uint8_t *)(mem + k); k += x; + x = idx->ert->kmer_size; idx->ert->kmer_offsets = (uint64_t *)(mem + k); k += x; + + // generate idx->bns and idx->pac + mem_to_bnspac(idx, &mem, &k); + assert(k == l_mem); + idx->l_mem = k; + idx->mem = mem; + + return 0; +} + +static void move_bns_to_mem(bwaidx_t *idx, uint8_t **mem_, int64_t *k_) +{ + int i; + int64_t x, tmp, k=*k_; + uint8_t *mem = *mem_; + // copy idx->bns + tmp = idx->bns->n_seqs * sizeof(bntann1_t) + idx->bns->n_holes * sizeof(bntamb1_t); + for (i = 0; i < idx->bns->n_seqs; ++i) // compute the size of heap-allocated memory + tmp += strlen(idx->bns->anns[i].name) + strlen(idx->bns->anns[i].anno) + 2; + mem = realloc(mem, k + sizeof(bntseq_t) + tmp); + x = sizeof(bntseq_t); memcpy(mem + k, idx->bns, x); k += x; + x = idx->bns->n_holes * sizeof(bntamb1_t); memcpy(mem + k, idx->bns->ambs, x); k += x; + free(idx->bns->ambs); + x = idx->bns->n_seqs * sizeof(bntann1_t); memcpy(mem + k, idx->bns->anns, x); k += x; + for (i = 0; i < idx->bns->n_seqs; ++i) { + x = strlen(idx->bns->anns[i].name) + 1; memcpy(mem + k, idx->bns->anns[i].name, x); k += x; + x = strlen(idx->bns->anns[i].anno) + 1; memcpy(mem + k, idx->bns->anns[i].anno, x); k += x; + free(idx->bns->anns[i].name); free(idx->bns->anns[i].anno); + } + free(idx->bns->anns); + *k_ = k; + *mem_ = mem; +} + +static void move_pac_to_mem(bwaidx_t *idx, uint8_t **mem_, int64_t *k_) +{ + int64_t x, k = *k_; + uint8_t *mem = *mem_; + // copy idx->pac + x = idx->bns->l_pac/4+1; + mem = realloc(mem, k + x); + memcpy(mem + k, idx->pac, x); k += x; + free(idx->bns); idx->bns = 0; + free(idx->pac); idx->pac = 0; + *k_ = k; + *mem_ = mem; +} + +int bwa_ertidx2mem(bwaidx_t *idx) +{ + int64_t x, k; + uint8_t *mem; + // copy idx->ert + x = idx->ert->mlt_size; + mem = realloc(idx->ert->mlt_table, sizeof(ERT) + x); idx->ert->mlt_table = 0; + memmove(mem + sizeof(ERT), mem, x); + memcpy(mem, idx->ert, sizeof(ERT)); k = sizeof(ERT) + x; + + x = idx->ert->kmer_size; + mem = realloc(mem, k + x); memcpy(mem + k, idx->ert->kmer_offsets, x); k += x; + free(idx->ert->kmer_offsets); idx->ert->kmer_offsets = 0; + free(idx->ert); + idx->ert = 0; + + // copy idx->bns + move_bns_to_mem(idx, &mem, &k); + // copy idx->pac + move_pac_to_mem(idx, &mem, &k); + + return bwa_mem2ertidx(k, mem, idx); +} + +int bwa_fmtidx2mem(bwaidx_t *idx) +{ + int64_t k, x; + uint8_t *mem; + + // copy idx->fmt + x = idx->fmt->bwt_size * 4; + mem = realloc(idx->fmt->bwt, sizeof(FMTIndex) + x); idx->fmt->bwt = 0; + memmove(mem + sizeof(FMTIndex), mem, x); + memcpy(mem, idx->fmt, sizeof(FMTIndex)); k = sizeof(FMTIndex) + x; + x = SA_BYTES(idx->fmt->n_sa); mem = realloc(mem, k + x); memcpy(mem + k, idx->fmt->sa, x); k += x; + // kmer hash + x = (1 << (10 << 1)) * sizeof(KmerEntryArr); + mem = realloc(mem, k + x); memcpy(mem + k, idx->fmt->kmer_hash.ke10, x); k += x; + x = (1 << (11 << 1)) * sizeof(KmerEntry); + mem = realloc(mem, k + x); memcpy(mem + k, idx->fmt->kmer_hash.ke11, x); k += x; + x = (1 << (12 << 1)) * sizeof(KmerEntry); + mem = realloc(mem, k + x); memcpy(mem + k, idx->fmt->kmer_hash.ke12, x); k += x; +#if HASH_KMER_LEN > 12 + x = (1 << (13 << 1)) * sizeof(KmerEntry); + mem = realloc(mem, k + x); memcpy(mem + k, idx->fmt->kmer_hash.ke13, x); k += x; +#endif +#if HASH_KMER_LEN > 13 + x = (1 << (14 << 1)) * sizeof(KmerEntry); + mem = realloc(mem, k + x); memcpy(mem + k, idx->fmt->kmer_hash.ke14, x); k += x; +#endif + free(idx->fmt->kmer_hash.ke10); + free(idx->fmt->kmer_hash.ke11); + free(idx->fmt->kmer_hash.ke12); + free(idx->fmt->kmer_hash.ke13); + free(idx->fmt->kmer_hash.ke14); + free(idx->fmt->sa); + free(idx->fmt); idx->fmt = 0; + + // copy idx->bns + move_bns_to_mem(idx, &mem, &k); + // copy idx->pac + move_pac_to_mem(idx, &mem, &k); + + return bwa_mem2fmtidx(k, mem, idx); +} + int bwa_idx2mem(bwaidx_t *idx) { int i; @@ -574,14 +834,16 @@ int bwa_idx2mem(bwaidx_t *idx) uint8_t *mem; // copy idx->bwt + x = idx->bwt->bwt_size * 4; mem = realloc(idx->bwt->bwt, sizeof(bwt_t) + x); idx->bwt->bwt = 0; memmove(mem + sizeof(bwt_t), mem, x); memcpy(mem, idx->bwt, sizeof(bwt_t)); k = sizeof(bwt_t) + x; - x = SA_BYTES(idx->bwt->n_sa); mem = realloc(mem, k + x); memcpy(mem + k, idx->bwt->sa, x); k += x; + x = idx->bwt->n_sa * sizeof(bwtint_t); mem = realloc(mem, k + x); memcpy(mem + k, idx->bwt->sa, x); k += x; free(idx->bwt->sa); free(idx->bwt); idx->bwt = 0; + // copy idx->bns tmp = idx->bns->n_seqs * sizeof(bntann1_t) + idx->bns->n_holes * sizeof(bntamb1_t); for (i = 0; i < idx->bns->n_seqs; ++i) // compute the size of heap-allocated memory diff --git a/bwa.h b/bwa.h index 9cf7f78..c8012fc 100644 --- a/bwa.h +++ b/bwa.h @@ -36,7 +36,8 @@ #define BWA_IDX_BWT 0x1 #define BWA_IDX_BNS 0x2 #define BWA_IDX_PAC 0x4 -#define BWA_IDX_ALL 0x7 +#define BWA_IDX_FMT 0x8 +#define BWA_IDX_ALL 0xF #define BWA_CTL_SIZE 0x10000 @@ -47,13 +48,21 @@ #define BWA_DBG_QNAME 0x1 +typedef struct { + uint64_t *kmer_offsets; // ert kmer + uint8_t *mlt_table; // ert mlt + uint64_t kmer_size; + uint64_t mlt_size; +} ERT; + typedef struct { bwt_t *bwt; // FM-index FMTIndex *fmt;// FMT-index bntseq_t *bns; // information on the reference sequences uint8_t *pac; // the actual 2-bit encoded reference sequences with 'N' converted to a random base + ERT *ert; - int is_shm; + int is_shm; int64_t l_mem; uint8_t *mem; } bwaidx_t; @@ -91,10 +100,17 @@ extern "C" { bwaidx_t *bwa_idx_load_from_shm(const char *hint); bwaidx_t *bwa_idx_load_from_disk(const char *hint, int which); + bwaidx_t *bwa_fmtidx_load_from_shm(const char *hint); + bwaidx_t *bwa_ertidx_load_from_shm(const char *hint); + bwaidx_t *bwa_ertidx_load_from_disk(const char *hint); bwaidx_t *bwa_idx_load(const char *hint, int which); void bwa_idx_destroy(bwaidx_t *idx); int bwa_idx2mem(bwaidx_t *idx); + int bwa_fmtidx2mem(bwaidx_t *idx); + int bwa_ertidx2mem(bwaidx_t *idx); int bwa_mem2idx(int64_t l_mem, uint8_t *mem, bwaidx_t *idx); + int bwa_mem2fmtidx(int64_t l_mem, uint8_t *mem, bwaidx_t *idx); + int bwa_mem2ertidx(int64_t l_mem, uint8_t *mem, bwaidx_t *idx); void bwa_print_sam_hdr(const bntseq_t *bns, const char *hdr_line); char *bwa_set_rg(const char *s); diff --git a/bwashm.c b/bwashm.c index e3f765c..99c84b8 100644 --- a/bwashm.c +++ b/bwashm.c @@ -9,8 +9,9 @@ #include #include "bwa.h" -int bwa_shm_stage(bwaidx_t *idx, const char *hint, const char *_tmpfn) +int bwa_shm_stage(bwaidx_t *idx, const char *hint, int useERT, const char *_tmpfn) { + const char *name; uint8_t *shm, *shm_idx; uint16_t *cnt; @@ -26,7 +27,7 @@ int bwa_shm_stage(bwaidx_t *idx, const char *hint, const char *_tmpfn) to_init = 1; } if (shmid < 0) return -1; - ftruncate(shmid, BWA_CTL_SIZE); + if (ftruncate(shmid, BWA_CTL_SIZE) < 0) return -1; shm = mmap(0, BWA_CTL_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, shmid, 0); cnt = (uint16_t*)shm; if (to_init) { @@ -34,7 +35,11 @@ int bwa_shm_stage(bwaidx_t *idx, const char *hint, const char *_tmpfn) cnt[1] = 4; } - if (idx->mem == 0) bwa_idx2mem(idx); + // 将所有索引数据放在连续的内存空间里 + if (idx->mem == 0) { + if (useERT) bwa_ertidx2mem(idx); + else bwa_fmtidx2mem(idx); + } if (tmpfn) { FILE *fp; @@ -63,7 +68,7 @@ int bwa_shm_stage(bwaidx_t *idx, const char *hint, const char *_tmpfn) memcpy(shm + cnt[1], &idx->l_mem, 8); memcpy(shm + cnt[1] + 8, name, l - 8); cnt[1] += l; ++cnt[0]; - ftruncate(shmid, idx->l_mem); + if (ftruncate(shmid, idx->l_mem) < 0) return -1; shm_idx = mmap(0, idx->l_mem, PROT_READ|PROT_WRITE, MAP_SHARED, shmid, 0); if (tmpfn) { FILE *fp; @@ -79,11 +84,58 @@ int bwa_shm_stage(bwaidx_t *idx, const char *hint, const char *_tmpfn) memcpy(shm_idx, idx->mem, idx->l_mem); free(idx->mem); } - bwa_mem2idx(idx->l_mem, shm_idx, idx); + if (useERT) + bwa_mem2ertidx(idx->l_mem, shm_idx, idx); + else + bwa_mem2fmtidx(idx->l_mem, shm_idx, idx); idx->is_shm = 1; return 0; } +#define INIT_SHM_LOAD \ + const char *name; \ + uint8_t *shm, *shm_idx; \ + uint16_t *cnt, i; \ + char *p, path[PATH_MAX + 1]; \ + int shmid; \ + int64_t l_mem; \ + bwaidx_t *idx; \ + if (hint == 0 || hint[0] == 0) return 0; \ + for (name = hint + strlen(hint) - 1; name >= hint && *name != '/'; --name); \ + ++name; \ + if ((shmid = shm_open("/bwactl", O_RDONLY, 0)) < 0) return 0; \ + shm = mmap(0, BWA_CTL_SIZE, PROT_READ, MAP_SHARED, shmid, 0); \ + cnt = (uint16_t*)shm; \ + if (cnt[0] == 0) return 0; \ + for (i = 0, p = (char*)(shm + 4); i < cnt[0]; ++i) { \ + memcpy(&l_mem, p, 8); p += 8; \ + if (strcmp(p, name) == 0) break; \ + p += strlen(p) + 1; \ + } \ + if (i == cnt[0]) return 0; \ + strcat(strcpy(path, "/bwaidx-"), name); \ + if ((shmid = shm_open(path, O_RDONLY, 0)) < 0) return 0; \ + shm_idx = mmap(0, l_mem, PROT_READ, MAP_SHARED, shmid, 0); \ + idx = calloc(1, sizeof(bwaidx_t)); + +bwaidx_t *bwa_ertidx_load_from_shm(const char *hint_) +{ + char hint[PATH_MAX]; + sprintf(hint, "%s.ert", hint_); + INIT_SHM_LOAD; + bwa_mem2ertidx(l_mem, shm_idx, idx); + idx->is_shm = 1; + return idx; +} + +bwaidx_t *bwa_fmtidx_load_from_shm(const char *hint) +{ + INIT_SHM_LOAD; + bwa_mem2fmtidx(l_mem, shm_idx, idx); + idx->is_shm = 1; + return idx; +} + bwaidx_t *bwa_idx_load_from_shm(const char *hint) { const char *name; @@ -178,18 +230,22 @@ int bwa_shm_destroy(void) int main_shm(int argc, char *argv[]) { - int c, to_list = 0, to_drop = 0, ret = 0; + int c, to_list = 0, to_drop = 0, ret = 0, useERT = 0; char *tmpfn = 0; - while ((c = getopt(argc, argv, "ldf:")) >= 0) { + char shm_prefix[PATH_MAX]; + while ((c = getopt(argc, argv, "ldf:Z")) >= 0) + { if (c == 'l') to_list = 1; else if (c == 'd') to_drop = 1; else if (c == 'f') tmpfn = optarg; + else if (c == 'Z') useERT = 1; } if (optind == argc && !to_list && !to_drop) { fprintf(stderr, "\nUsage: fastbwa shm [-d|-l] [-f tmpFile] [idxbase]\n\n"); fprintf(stderr, "Options: -d destroy all indices in shared memory\n"); fprintf(stderr, " -l list names of indices in shared memory\n"); - fprintf(stderr, " -f FILE temporary file to reduce peak memory\n\n"); + fprintf(stderr, " -f FILE temporary file to reduce peak memory\n"); + fprintf(stderr, " -Z use Ert as seeding index\n\n"); return 1; } if (optind < argc && (to_list || to_drop)) { @@ -197,15 +253,25 @@ int main_shm(int argc, char *argv[]) return 1; } if (optind < argc) { - if (bwa_shm_test(argv[optind]) == 0) { + if (useERT) + sprintf(shm_prefix, "%s.ert", argv[optind]); + else + sprintf(shm_prefix, "%s", argv[optind]); + if (bwa_shm_test(shm_prefix) == 0) + { bwaidx_t *idx; - idx = bwa_idx_load_from_disk(argv[optind], BWA_IDX_ALL); - if (bwa_shm_stage(idx, argv[optind], tmpfn) < 0) { + if (useERT) + idx = bwa_ertidx_load_from_disk(argv[optind]); + else + idx = bwa_idx_load_from_disk(argv[optind], BWA_IDX_BNS | BWA_IDX_PAC | BWA_IDX_FMT); + if (bwa_shm_stage(idx, shm_prefix, useERT, tmpfn) < 0) { fprintf(stderr, "[E::%s] failed to stage the index in shared memory\n", __func__); ret = 1; } bwa_idx_destroy(idx); - } else fprintf(stderr, "[M::%s] index '%s' is already in shared memory\n", __func__, argv[optind]); + } + else + fprintf(stderr, "[M::%s] index '%s' is already in shared memory\n", __func__, argv[optind]); } if (to_list) bwa_shm_list(); if (to_drop) bwa_shm_destroy(); diff --git a/bwt_gen.c b/bwt_gen.c index 3adcc22..5702791 100644 --- a/bwt_gen.c +++ b/bwt_gen.c @@ -876,7 +876,7 @@ static void BWTIncBuildRelativeRank(bgint_t* __restrict sortedRank, bgint_t* __r bgint_t i, c; bgint_t s, r; bgint_t lastRank, lastIndex; - bgint_t oldInverseSa0RelativeRank = 0; +// bgint_t oldInverseSa0RelativeRank = 0; bgint_t freq; lastIndex = numItem; @@ -887,7 +887,7 @@ static void BWTIncBuildRelativeRank(bgint_t* __restrict sortedRank, bgint_t* __r s = seq[numItem]; relativeRank[s] = numItem; if (lastRank == oldInverseSa0) { - oldInverseSa0RelativeRank = numItem; +// oldInverseSa0RelativeRank = numItem; oldInverseSa0++; // so that this segment of code is not run again lastRank++; // so that oldInverseSa0 become a sorted group with 1 item } @@ -920,7 +920,7 @@ static void BWTIncBuildRelativeRank(bgint_t* __restrict sortedRank, bgint_t* __r lastRank = r; relativeRank[s] = i; if (r == oldInverseSa0) { - oldInverseSa0RelativeRank = i; +// oldInverseSa0RelativeRank = i; oldInverseSa0++; // so that this segment of code is not run again lastRank++; // so that oldInverseSa0 become a sorted group with 1 item } @@ -950,14 +950,14 @@ static void BWTIncBuildBwt(unsigned int* insertBwt, const bgint_t *relativeRank, static void BWTIncMergeBwt(const bgint_t *sortedRank, const unsigned int* oldBwt, const unsigned int *insertBwt, unsigned int* __restrict mergedBwt, const bgint_t numOldBwt, const bgint_t numInsertBwt) { - unsigned int bitsInWordMinusBitPerChar; +// unsigned int bitsInWordMinusBitPerChar; bgint_t leftShift, rightShift; bgint_t o; bgint_t oIndex, iIndex, mIndex; bgint_t mWord, mChar, oWord, oChar; bgint_t numInsert; - bitsInWordMinusBitPerChar = BITS_IN_WORD - BIT_PER_CHAR; +// bitsInWordMinusBitPerChar = BITS_IN_WORD - BIT_PER_CHAR; oIndex = 0; iIndex = 0; diff --git a/bwtindex.c b/bwtindex.c index c8a1e75..0ff19b9 100644 --- a/bwtindex.c +++ b/bwtindex.c @@ -36,6 +36,7 @@ #include "utils.h" #include "rle.h" #include "rope.h" +#include "ertindex.h" #ifdef _DIVBWT #include "divsufsort.h" @@ -258,8 +259,41 @@ int bwa_build_kmer(int argc, char *argv[]) return 0; } +int bwa_bwt2ert(int argc, char *argv[]) +{ + char prefix[1024]; + char ert_kmer_file[1124]; + int c, num_threads = 1; + while ((c = getopt(argc, argv, "t:")) >= 0) + { + switch (c) + { + case 't': + num_threads = atoi(optarg); + break; + default: + return 1; + } + } + if (optind + 1 > argc) + { + fprintf(stderr, "Usage: fastbwa bwt2ert \n"); + return 1; + } + // fprintf(stderr, "%d %d %d\n", optind, argc, num_threads); + sprintf(prefix, "%s", argv[optind]); + sprintf(ert_kmer_file, "%s.%s", prefix, "ert.kmer.table"); + fprintf(stderr, "%s\n", ert_kmer_file); + // Load BWT index + bwaidx_t *bid = bwa_idx_load_from_disk(prefix, BWA_IDX_BNS | BWA_IDX_BWT | BWA_IDX_PAC); + // Build ERT + buildERTKmerTrees(ert_kmer_file, bid, prefix, num_threads, ERT_MAX_READ_LEN); + return 0; +} + int bwa_index(int argc, char *argv[]) // the "index" command { + int num_threads = 1; int c, algo_type = BWTALGO_AUTO, is_64 = 0, block_size = 10000000; char *prefix = 0, *str; while ((c = getopt(argc, argv, "6a:p:b:")) >= 0) { @@ -272,6 +306,10 @@ int bwa_index(int argc, char *argv[]) // the "index" command break; case 'p': prefix = strdup(optarg); break; case '6': is_64 = 1; break; + case 't': + num_threads = atoi(optarg); + assert(num_threads > 0 && num_threads < 256); + break; case 'b': block_size = strtol(optarg, &str, 10); if (*str == 'G' || *str == 'g') block_size *= 1024 * 1024 * 1024; @@ -287,6 +325,7 @@ int bwa_index(int argc, char *argv[]) // the "index" command fprintf(stderr, "Usage: fastbwa index [options] \n\n"); fprintf(stderr, "Options: -a STR BWT construction algorithm: bwtsw, is or rb2 [auto]\n"); fprintf(stderr, " -p STR prefix of the index [same as fasta name]\n"); + fprintf(stderr, " -t INT number of threads for ERT index building [%d]\n", num_threads); fprintf(stderr, " -b INT block size for the bwtsw algorithm (effective with -a bwtsw) [%d]\n", block_size); fprintf(stderr, " -6 index files named as .64.* instead of .* \n"); fprintf(stderr, "\n"); diff --git a/ertindex.c b/ertindex.c new file mode 100644 index 0000000..789710f --- /dev/null +++ b/ertindex.c @@ -0,0 +1,946 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "utils.h" +#include "ertindex.h" + +#define _set_pac(pac, l, c) ((pac)[(l) >> 2] |= (c) << (((l) & 3) << 1)) +#define _set_pac_orig(pac, l, c) ((pac)[(l) >> 2] |= (c) << ((~(l) & 3) << 1)) + +const uint8_t char_count_size_in_bits = 8; +const uint8_t hits_count_size_in_bits = 8; +const uint8_t ref_ptr_size_in_bits = 40; +const uint8_t leaf_offset_ptr_size_in_bits = 8; +const uint8_t other_offset_ptr_size_in_bits = 32; + +// int test_max_intv = 0; + +static inline void getNumBranchesForKmer(bwtintv_t ok[4], int* numBranches, uint8_t* uniform_bp) { + uint8_t i; + for (i = 0; i < 4; ++i) { + if (ok[i].x[2] > 0) { *numBranches += 1; *uniform_bp = i; } + } +} + +// 从右向左的顺序将kmer转换成query +static inline void kmertoquery(uint64_t x, uint8_t *a, int l) +{ + int i; + for (i = 0; i < l; ++i) { + a[i] = (uint8_t)((x >> (i << 1)) & 0x3); + } +} + +static inline uint64_t addBytesForEntry(byte_type_t type, int count, int numHits) +{ + uint64_t numBits = 0; + switch(type) { + case CODE: + numBits = 8; + break; + case LEAF_COUNT: + numBits = (hits_count_size_in_bits); + break; + case LEAF_HITS: + numBits = (ref_ptr_size_in_bits * numHits); + break; + case UNIFORM_COUNT: // "Uniform" + numBits = (char_count_size_in_bits); + break; + case UNIFORM_BP: + numBits = (count << 1); + break; + case LEAF_PTR: // "Leaf Offset Pointer" + numBits = leaf_offset_ptr_size_in_bits; + break; + case OTHER_PTR: + numBits = other_offset_ptr_size_in_bits; + break; + case EMPTY_NODE: + numBits = 0; + break; + default : + break; + } + return (numBits % 8) ? (numBits / 8 + 1) : numBits / 8; +} + +void addChildNode(node_t* p, node_t* c) { + assert(p->numChildren <= 3); + p->child_nodes[p->numChildren++] = c; + c->parent_node = p; +} + +void handleLeaf(const bwaidx_t *bid, bwtintv_t ik, node_t *n, int step) +{ + n->type = LEAF; + n->numHits = ik.x[2]; + n->hits = (uint64_t *)calloc(n->numHits, sizeof(uint64_t)); + assert(n->hits != NULL); + if (step == 1) { + uint64_t ref_pos = 0; + int j = 0; + for (j = 0; j < n->numHits; ++j) { + ref_pos = fmt_sa(bid->fmt, ik.x[0] + j); + // ref_pos = bwt_sa(bid->bwt, ik.x[0] + j); + n->hits[j] = ref_pos; + } + } +} + +void handleDivergence(const bwaidx_t *bid, bwtintv_t ok[4], int depth, node_t *parent_node, int step, int max_depth) +{ + int i; + bwtintv_t ok_copy[4]; + bwtintv_t ik_new; + memcpy_bwamem(ok_copy, 4 * sizeof(bwtintv_t), ok, 4 * sizeof(bwtintv_t), __FILE__, __LINE__); + for (i = 3; i >= 0; --i) { + node_t *n = (node_t *)calloc(1, sizeof(node_t)); + assert(n != NULL); + n->numChildren = 0; + memset(n->child_nodes, 0, 4 * sizeof(node_t *)); + n->pos = 0; + n->num_bp = 0; + if (ok_copy[i].x[2] == 0) { //!< Empty node + n->type = EMPTY; + n->numHits = 0; + memcpy_bwamem(n->seq, ERT_MAX_READ_LEN * sizeof(uint8_t), parent_node->seq, (depth - 1) * sizeof(uint8_t), __FILE__, __LINE__); + n->l_seq = depth; + addChildNode(parent_node, n); + } else if (ok_copy[i].x[2] > 1 && depth != max_depth) { // 没到最大长度,可以继续扩展 + ik_new = ok_copy[i]; ik_new.info = depth + 1; + memcpy_bwamem(n->seq, ERT_MAX_READ_LEN * sizeof(uint8_t), parent_node->seq, parent_node->l_seq * sizeof(uint8_t), __FILE__, __LINE__); + assert(depth >= 0); + n->seq[depth] = i; + n->pos = depth; + n->num_bp = 1; + n->l_seq = depth + 1; + n->numHits = ok_copy[i].x[2]; + n->type = DIVERGE; + addChildNode(parent_node, n); + ert_build_kmertree(bid, ik_new, ok, depth + 1, n, step, max_depth); + } else { // leaf intv.x[2] == 1 or depth = max_depth + memcpy_bwamem(n->seq, ERT_MAX_READ_LEN * sizeof(uint8_t), parent_node->seq, parent_node->l_seq * sizeof(uint8_t), __FILE__, __LINE__); + n->seq[depth] = i; + n->pos = depth; + n->num_bp = 1; + n->l_seq = depth + 1; + handleLeaf(bid, ok_copy[i], n, step); + addChildNode(parent_node, n); + } + } +} + +void ert_build_kmertree(const bwaidx_t *bid, bwtintv_t ik, bwtintv_t ok[4], int curDepth, node_t *parent_node, int step, int max_depth) +{ + uint8_t uniform_bp = 0; + int numBranches = 0, depth = curDepth; + bwt_extend(bid->bwt, &ik, ok, 0); //!< Extend right by 1bp + /// Check if we need to make a uniform entry + getNumBranchesForKmer(ok, &numBranches, &uniform_bp); + bwtintv_t ik_new; + /// If we find a uniform entry, extend further till we diverge or hit a leaf node + if (numBranches == 1) { + uint8_t uniformExtend = 1; + ik_new = ok[uniform_bp]; ik_new.info = depth+1; + node_t *n = (node_t *)calloc(1, sizeof(node_t)); + assert(n != NULL); + n->numChildren = 0; + memset(n->child_nodes, 0, 4 * sizeof(node_t *)); + memcpy_bwamem(n->seq, ERT_MAX_READ_LEN * sizeof(uint8_t), parent_node->seq, parent_node->l_seq * sizeof(uint8_t), __FILE__, __LINE__); + assert(depth >= 0); + n->seq[depth] = uniform_bp; // ?放在最后的位置上 + n->numHits = ok[uniform_bp].x[2]; + n->l_seq = depth + 1; // depth=16 + n->pos = depth; + n->num_bp = 1; + addChildNode(parent_node, n); + if (depth < max_depth) { + bwtintv_t ok_init; + memcpy_bwamem(&ok_init, sizeof(bwtintv_t), &ok[uniform_bp], sizeof(bwtintv_t), __FILE__, __LINE__); + while (uniformExtend) { // 一个一个bp进行扩展 + numBranches = 0; uniform_bp = 0; + depth += 1; + bwt_extend(bid->bwt, &ik_new, ok, 0); //!< Extend right by 1bp + getNumBranchesForKmer(ok, &numBranches, &uniform_bp); + assert(numBranches != 0); + if (numBranches == 1) { //seq[depth] = uniform_bp; + n->l_seq = depth + 1; + n->num_bp++; + /// Hit a multi-hit leaf node + if (depth == max_depth) { + // if (test_max_intv < ik_new.x[2]) test_max_intv = ik_new.x[2]; + // fprintf(stderr, "depth: %d, num hit: %ld, max: %d\n", depth, ik_new.x[2], test_max_intv); + uniformExtend = 0; + handleLeaf(bid, ok_init, n, step); + } + } else { //!< Diverge + uniformExtend = 0; + n->type = UNIFORM; + handleDivergence(bid, ok, depth, n, step, max_depth); + } + } + } else { //bwt, &ik, ok, 0); //!< ok contains the result of BWT extension + if (ok[c].x[2] != prevHits) { //!< hit set changes + lep1 |= (1ULL << j); + } + /// Extend right till k-mer has zero hits + if (ok[c].x[2] >= 1) { prevHits = ok[c].x[2]; ik = ok[c]; ik.info = kmerSize + j + 1; } + else { break; } + } + uint64_t num_hits = ok[c].x[2]; + if (num_hits == 0) { + xmer_data = ((lep1 & LEP_MASK) << METADATA_BITWIDTH) | INVALID; + } + else if (num_hits == 1) { + xmer_data = ((lep1 & LEP_MASK) << METADATA_BITWIDTH) | (SINGLE_HIT_LEAF); + if (step == 1) { + mlt_data[mlt_byte_idx] = 0; //!< Not a multi-hit + } + mlt_byte_idx++; + if (step == 1) { + uint64_t ref_pos = 0; + ref_pos = fmt_sa(bid->fmt, ok[c].x[0]); + // ref_pos = bwt_sa(bid->bwt, ok[c].x[0]); + uint64_t leaf_data = ref_pos << 1; + memcpy_bwamem(&mlt_data[mlt_byte_idx], 5 * sizeof(uint8_t), &leaf_data, 5 * sizeof(uint8_t), __FILE__, __LINE__); + } + mlt_byte_idx += 5; + *numHits += 1; + } + else { + xmer_data = ((lep1 & LEP_MASK) << METADATA_BITWIDTH) | (INFREQUENT); + node_t *n = (node_t *)calloc(1, sizeof(node_t)); + assert(n != NULL); + n->type = DIVERGE; + n->pos = 0; + n->num_bp = 0; + memcpy_bwamem(n->seq, ERT_MAX_READ_LEN * sizeof(uint8_t), aq, kmerSize, __FILE__, __LINE__); + n->l_seq = kmerSize; + memcpy_bwamem(&n->seq[n->l_seq], xmerSize * sizeof(uint8_t), aq1, xmerSize * sizeof(uint8_t), __FILE__, __LINE__); + n->l_seq += xmerSize; + n->parent_node = 0; + n->numChildren = 0; + memset(n->child_nodes, 0, 4 * sizeof(node_t *)); + n->start_addr = mlt_byte_idx; + ert_build_kmertree(bid, ik, ok, kmerSize + j, n, step, max_depth); + ert_traverse_kmertree(n, mlt_data, mh_data, &mlt_byte_idx, &mh_byte_idx, kmerSize + j, numHits, + max_next_ptr, next_ptr_width, step); + ert_destroy_kmertree(n); + } + if (num_hits < 20) { + xmer_entry = (mlt_offset << KMER_DATA_BITWIDTH) | (num_hits << 17) | xmer_data; + } + else { + xmer_entry = (mlt_offset << KMER_DATA_BITWIDTH) | xmer_data; + } + uint64_t ptr_width = (next_ptr_width < 4) ? next_ptr_width : 0; + xmer_entry |= (ptr_width << 22); + if (step == 1) { + memcpy_bwamem(&mlt_data[byte_idx], 8 * sizeof(uint8_t), &xmer_entry, 8 * sizeof(uint8_t), __FILE__, __LINE__); + } + byte_idx += 8; + mlt_offset = mlt_byte_idx; + ik = ik_copy; + prevHits = ik_copy.x[2]; + } + *size = mlt_byte_idx; + *mh_size = mh_byte_idx; +} + +void addCode(uint8_t *mlt_data, uint64_t *byte_idx, uint8_t code, int step) +{ + if (step == 1) { + memcpy_bwamem(&mlt_data[*byte_idx], sizeof(uint8_t), &code, sizeof(uint8_t), __FILE__, __LINE__); + } + *byte_idx += 1; +} + +void addUniformNode(uint8_t *mlt_data, uint64_t *byte_idx, int count, uint8_t *uniformBases, uint64_t hitCount, int step) +{ + int numBytesForBP = addBytesForEntry(UNIFORM_BP, count, 0); + assert(count < ERT_MAX_READ_LEN); + if (step == 1) { + memcpy_bwamem(&mlt_data[*byte_idx], sizeof(uint16_t), &count, sizeof(uint16_t), __FILE__, __LINE__); + } + *byte_idx += 2; + if (step == 1) { + int j; + uint8_t packUniformBases[numBytesForBP]; + memset(packUniformBases, 0, numBytesForBP * sizeof(uint8_t)); + for (j = 0; j < count; ++j) { + _set_pac_orig(packUniformBases, j, uniformBases[j]); + } + memcpy_bwamem(&mlt_data[*byte_idx], numBytesForBP * sizeof(uint8_t), packUniformBases, numBytesForBP * sizeof(uint8_t), __FILE__, __LINE__); + } + *byte_idx += numBytesForBP; +} + + + +void addLeafNode(uint8_t *mlt_data, uint64_t *byte_idx, uint64_t ref_pos, int step) { + if (step == 1) { + uint64_t leaf_data = (ref_pos << 1); + memcpy_bwamem(&mlt_data[*byte_idx], 5 * sizeof(uint8_t), &leaf_data, 5 * sizeof(uint8_t), __FILE__, __LINE__); + } + *byte_idx += 5; +} + +void addMultiHitLeafNode(uint8_t* mlt_data, uint64_t* byte_idx, uint64_t count, uint64_t* hits, int step) +{ + uint16_t k = 0; + for (k = 0; k < count; ++k) { + if (step == 1) { + uint64_t leaf_data = (hits[k] << 1) | 1ULL; + memcpy_bwamem(&mlt_data[*byte_idx], 5 * sizeof(uint8_t), &leaf_data, 5 * sizeof(uint8_t), __FILE__, __LINE__); + } + *byte_idx += 5; + } +} + +void addMultiHitLeafCount(uint8_t* mlt_data, uint64_t* byte_idx, uint64_t count, int step) +{ + if (step == 1) { + memcpy_bwamem(&mlt_data[*byte_idx], 2 * sizeof(uint8_t), &count, 2 * sizeof(uint8_t), __FILE__, __LINE__); + } + *byte_idx += 2; +} + +void addMultiHitLeafPtr(uint8_t *mlt_data, uint64_t *byte_idx, uint64_t mh_byte_idx, int step) +{ + if (step == 1) { + uint64_t mh_data = (mh_byte_idx << 1) | 1ULL; + memcpy_bwamem(&mlt_data[*byte_idx], 5 * sizeof(uint8_t), &mh_data, 5 * sizeof(uint8_t), __FILE__, __LINE__); + } + *byte_idx += 5; +} + +void ert_traverse_kmertree(node_t *n, uint8_t *mlt_data, uint8_t *mh_data, uint64_t *size, uint64_t *mh_size, int depth, uint64_t *numHits, uint64_t *max_ptr, uint64_t next_ptr_width, int step) +{ + int j = 0; + int cur_depth = depth; + uint64_t byte_idx = *size; + uint64_t mh_byte_idx = *mh_size; // 匹配位置开始记录的索引位置 + uint8_t code = 0; + assert(n->numChildren != 0); + if (n->numChildren == 1) { + node_t *child = n->child_nodes[0]; + uint8_t c = child->seq[child->pos]; + if (child->type == LEAF) { + // + // FIXME: In rare cases, when one of the occurrences of the k-mer is at the end of the reference, + // # hits for parent node is not equal to the sum of #hits of children nodes, and we trigger the assertion below + // This should not affect results as long as readLength > kmerSize + // assert(child->numHits > 1); + code |= (LEAF << (c << 1)); + addCode(mlt_data, &byte_idx, code, step); + addMultiHitLeafPtr(mlt_data, &byte_idx, mh_byte_idx, step); + addMultiHitLeafCount(mh_data, &mh_byte_idx, child->numHits, step); + addMultiHitLeafNode(mh_data, &mh_byte_idx, child->numHits, child->hits, step); + *numHits += child->numHits; + } + else { + assert(child->type == UNIFORM); + code |= (UNIFORM << (c << 1)); + addCode(mlt_data, &byte_idx, code, step); + addUniformNode(mlt_data, &byte_idx, child->num_bp, &child->seq[child->pos], child->numHits, step); + ert_traverse_kmertree(child, mlt_data, mh_data, &byte_idx, &mh_byte_idx, cur_depth + child->num_bp, + numHits, max_ptr, next_ptr_width, step); + } + } + else { + uint8_t numEmpty = 0, numLeaves = 0; + for (j = 0; j < n->numChildren; ++j) { + node_t *child = n->child_nodes[j]; + uint8_t c = child->seq[child->pos]; + if (child->type == EMPTY) { numEmpty++; } + else if (child->type == LEAF) { numLeaves++; code |= (LEAF << (c << 1)); } + else { code |= (DIVERGE << (c << 1)); } + } + uint8_t numPointers = ((4 - numEmpty - numLeaves) > 0) ? (4 - numEmpty - numLeaves) : 0; + uint64_t start_byte_idx = byte_idx; + addCode(mlt_data, &byte_idx, code, step); + uint64_t ptr_byte_idx = byte_idx; + uint64_t ptrToOtherNodes[numPointers + 1]; //!< These point to children. We have one more child than number of pointers + memset(ptrToOtherNodes, 0, (numPointers + 1) * sizeof(uint64_t)); + uint64_t numHitsForChildren[numPointers + 1]; + memset(numHitsForChildren, 0, (numPointers + 1) * sizeof(uint64_t)); + uint64_t other_idx = 0; + if (numPointers > 0) { byte_idx += (numPointers * next_ptr_width); } + for (j = 0; j < n->numChildren; ++j) { + node_t *child = n->child_nodes[j]; + if (child->type == LEAF) { + if (child->numHits == 1) { + addLeafNode(mlt_data, &byte_idx, child->hits[0], step); + } + else { + addMultiHitLeafPtr(mlt_data, &byte_idx, mh_byte_idx, step); + addMultiHitLeafCount(mh_data, &mh_byte_idx, child->numHits, step); + addMultiHitLeafNode(mh_data, &mh_byte_idx, child->numHits, child->hits, step); + } + } + } + if (numPointers > 0) { ptrToOtherNodes[other_idx] = byte_idx; } + for (j = 0; j < n->numChildren; ++j) { + node_t *child = n->child_nodes[j]; + assert(child->type != UNIFORM); // why? + if (child->type == DIVERGE) { + ert_traverse_kmertree(child, mlt_data, mh_data, &byte_idx, &mh_byte_idx, + cur_depth+1, numHits, max_ptr, next_ptr_width, step); + numHitsForChildren[other_idx] = child->numHits; + other_idx++; + ptrToOtherNodes[other_idx] = byte_idx; + } + } + for (j = 0; j < numPointers; ++j) { + uint64_t pointerToNextNode = (ptrToOtherNodes[j] - start_byte_idx); + if (pointerToNextNode > *max_ptr) { + *max_ptr = pointerToNextNode; + } + assert(pointerToNextNode < (1 << 26)); + } + // Fill up pointers based on size of previous children + if (step == 1) { + for (j = 0; j < numPointers; ++j) { + uint64_t pointerToNextNode = (ptrToOtherNodes[j] - start_byte_idx); + assert(pointerToNextNode < (1 << 26)); + uint64_t reseed_data = 0; + if (numHitsForChildren[j] < 20) { + reseed_data = (pointerToNextNode << 6) | (numHitsForChildren[j]); + } else { + reseed_data = (pointerToNextNode << 6); + } + memcpy_bwamem(&mlt_data[ptr_byte_idx], next_ptr_width * sizeof(uint8_t), &reseed_data, next_ptr_width * sizeof(uint8_t), __FILE__, __LINE__); + ptr_byte_idx += next_ptr_width; + } + } + } + *size = byte_idx; + *mh_size = mh_byte_idx; +} + +void ert_destroy_kmertree(node_t* n) +{ + int j; + if (n == NULL) { + return; + } + if (n->hits) { + free(n->hits); + } + for (j = 0; j < n->numChildren; ++j) { + ert_destroy_kmertree(n->child_nodes[j]); + } + free(n); +} + +// +// This function builds the ERT index. +// Note on pointers to child nodes: When building the radix tree for each k-mer, +// we try 3 values for pointers to child nodes, 2,3,4 B and choose the smallest +// one possible. +// +void *buildIndex(void *arg) +{ + thread_data_t *data = (thread_data_t *)arg; + bwtintv_t ik, ok[4]; + uint64_t idx = 0; + uint8_t aq[kmerSize]; + int i; + uint8_t c; + uint64_t lep, prevHits, numBytesPerKmer, numBytesForMh, ref_pos, total_hits = 0, ptr = 0, max_next_ptr = 0; + uint64_t next_ptr_width = 0; + uint64_t nKmerSmallPtr = 0, nKmerMedPtr = 0, nKmerLargePtr = 0; + uint16_t kmer_data = 0; + + + // File to write the multi-level tree index + char ml_tbl_file_name[PATH_MAX]; + sprintf(ml_tbl_file_name, "%s.ert.mlt_table_%d", data->filePrefix, data->tid); + + // Log progress + char log_file_name[PATH_MAX]; + sprintf(log_file_name, "%s.log_%d", data->filePrefix, data->tid); + + FILE *ml_tbl_fd = 0, *log_fd = 0; + ml_tbl_fd = fopen(ml_tbl_file_name, "wb"); + if (ml_tbl_fd == NULL) + { + printf("\nCan't open file or file doesn't exist. mlt_table errno = %d\n", errno); + pthread_exit(NULL); + } + if (bwa_verbose >= 4) + { + log_fd = fopen(log_file_name, "w"); + if (log_fd == NULL) + { + printf("\nCan't open file or file doesn't exist. log errno = %d\n", errno); + pthread_exit(NULL); + } + log_file(log_fd, "Start: %lu End: %lu", data->startKmer, data->endKmer); + } + + // + // Loop for each k-mer and compute LEP when the hit set changes + // + double tmp_time = realtime(), elapsed_time; + int p = 0; + for (idx = data->startKmer; idx < data->endKmer; ++idx) + { + max_next_ptr = 0; + next_ptr_width = 0; + c = 0; + lep = 0; // k-1-bit LEP + prevHits = 0; + numBytesPerKmer = 0; + numBytesForMh = 0; + kmertoquery(idx, aq, kmerSize); // represent k-mer as uint8_t* + assert(aq[0] >= 0 && aq[0] <= 3); + bwt_set_intv(data->bid->bwt, aq[0], ik); // the initial interval of a single base + ik.info = 1; + prevHits = ik.x[2]; + + // 打印进度信息 + if (data->tid == 0 && (idx - data->startKmer) % ((data->endKmer - data->startKmer) / 100) == 0) { + if (p++ > 0) { + elapsed_time = realtime() - tmp_time; + fprintf(stderr, "[step: %d] %d%% percent complished. %f s elapsed.\n", data->step, p - 1, elapsed_time); + } + } + // + // Backward search k-mer + // + for (i = 1; i < kmerSize; ++i) + { + c = 3 - aq[i]; + bwt_extend(data->bid->bwt, &ik, ok, 0); // ok contains the result of BWT extension + if (ok[c].x[2] != prevHits) { // hit set changes + lep |= (1ULL << (i - 1)); + } + // + // Extend left till k-mer has zero hits + // + if (ok[c].x[2] >= 1) { prevHits = ok[c].x[2]; ik = ok[c]; ik.info = i + 1; } + else { break; } + } + + uint64_t num_hits = ok[c].x[2]; + if (num_hits == 0) { // "Empty" - k-mer absent in the reference genome + kmer_data = ((lep & LEP_MASK) << METADATA_BITWIDTH) | INVALID; + } else if (num_hits == 1) { // "Leaf" - k-mer has a single hit in the reference genome + kmer_data = ((lep & LEP_MASK) << METADATA_BITWIDTH) | (SINGLE_HIT_LEAF); + numBytesPerKmer = 6; // 该15-kmer下所包含的所有匹配位置信息只需要6字节就能表示(因为只有一个匹配) + uint8_t byte_idx = 0; + uint8_t mlt_data[numBytesPerKmer]; + if (data->step == 1) { mlt_data[byte_idx] = 0; } // Mark that the hit is not a multi-hit + byte_idx++; + data->numHits[idx - data->startKmer] += num_hits; + if (data->step == 1) { + // Look up suffix array to identify the hit position + ref_pos = fmt_sa(data->bid->fmt, ok[c].x[0]); + // ref_pos = bwt_sa(data->bid->bwt, ok[c].x[0]); + uint64_t leaf_data = ref_pos << 1; + memcpy_bwamem(&mlt_data[byte_idx], 5 * sizeof(uint8_t), &leaf_data, 5 * sizeof(uint8_t), __FILE__, __LINE__); + fwrite(mlt_data, sizeof(uint8_t), numBytesPerKmer, ml_tbl_fd); + } + byte_idx += 5; + } + // + // If the number of hits for the k-mer does not exceed the HIT_THRESHOLD, + // prefer a radix-tree over a multi-level table as the radix tree for the + // k-mer is likely to be sparse. + // + else if (num_hits <= HIT_THRESHOLD) { + kmer_data = ((lep & LEP_MASK) << METADATA_BITWIDTH) | (INFREQUENT); + node_t *n = (node_t *)calloc(1, sizeof(node_t)); + assert(n != NULL); + n->type = DIVERGE; + n->pos = 0; + n->num_bp = 0; + memcpy_bwamem(n->seq, ERT_MAX_READ_LEN * sizeof(uint8_t), aq, kmerSize * sizeof(uint8_t), __FILE__, __LINE__); + n->l_seq = kmerSize; + n->parent_node = 0; + n->numChildren = 0; + n->numHits = num_hits; + n->child_nodes[0] = n->child_nodes[1] = n->child_nodes[2] = n->child_nodes[3] = 0; + n->start_addr = 0; + uint8_t *mlt_data = 0; + next_ptr_width = 2; + uint8_t *mh_data = 0; + uint64_t size = 0; + if (data->step == 1) { + if (idx != (numKmers - 1)) { // 不等于最后一个处理的kmer + size = (data->byte_offsets[idx+1] >> KMER_DATA_BITWIDTH) - (data->byte_offsets[idx] >> KMER_DATA_BITWIDTH); // 本kmer所占存储大小 + assert(size < (1 << 26)); // 小于64MB + } else { // FIXME: This is a hack. We know the size of every k-mer tree except the last-kmer + size = 1 << 26; + } + next_ptr_width = (((data->byte_offsets[idx] >> 22) & 3) == 0) ? 4 : ((data->byte_offsets[idx] >> 22) & 3); + mlt_data = (uint8_t *)calloc(size, sizeof(uint8_t)); + assert(mlt_data != NULL); + mh_data = (uint8_t *)calloc(size, sizeof(uint8_t)); + assert(mh_data != NULL); + } + ert_build_kmertree(data->bid, ik, ok, i, n, data->step, data->readLength - 1); // n作为parent node + + // Reserve space for pointer to start of multi-hit address space + numBytesPerKmer = 4; // 4个字节的指针 + + // Traverse tree and place data in memory space + ert_traverse_kmertree(n, mlt_data, mh_data, &numBytesPerKmer, &numBytesForMh, + i, &data->numHits[idx - data->startKmer], &max_next_ptr, next_ptr_width, data->step); + + if (data->step == 0 || data->step == 1) { + if (max_next_ptr >= 1024 && max_next_ptr < 262144) { + next_ptr_width = 3; + max_next_ptr = 0; + numBytesPerKmer = 4; + numBytesForMh = 0; + ert_traverse_kmertree(n, mlt_data, mh_data, &numBytesPerKmer, &numBytesForMh, i, + &data->numHits[idx - data->startKmer], &max_next_ptr, next_ptr_width, data->step); + } + if (max_next_ptr >= 262144) { + next_ptr_width = 4; + max_next_ptr = 0; + numBytesPerKmer = 4; + numBytesForMh = 0; + ert_traverse_kmertree(n, mlt_data, mh_data, &numBytesPerKmer, &numBytesForMh, i, + &data->numHits[idx - data->startKmer], &max_next_ptr, next_ptr_width, data->step); + } + } + ert_destroy_kmertree(n); + assert(numBytesPerKmer < (1 << 26)); + // assert(numBytesForMh < (1 << 24)); + if (data->step == 1) { + if (idx != numKmers-1) assert((numBytesPerKmer+numBytesForMh) == size); + memcpy_bwamem(mlt_data, 4*sizeof(uint8_t), &numBytesPerKmer, 4*sizeof(uint8_t), __FILE__, __LINE__); + fwrite(mlt_data, sizeof(uint8_t), numBytesPerKmer, ml_tbl_fd); + free(mlt_data); + fwrite(mh_data, sizeof(uint8_t), numBytesForMh, ml_tbl_fd); + free(mh_data); + } + } + // + // If the number of hits for the k-mer exceeds the HIT_THRESHOLD, + // prefer a multi-level table to encode the suffixes for the + // k-mer + // + else { + kmer_data = ((lep & LEP_MASK) << METADATA_BITWIDTH) | (FREQUENT); + uint8_t *mlt_data = 0; + uint8_t *mh_data = 0; + next_ptr_width = 2; + uint64_t size = 0; + if (data->step == 1) { + if (idx != (numKmers - 1)) { + size = (data->byte_offsets[idx+1] >> KMER_DATA_BITWIDTH) - (data->byte_offsets[idx] >> KMER_DATA_BITWIDTH); + assert(size < (1 << 26)); + } + else { //!< FIXME: Hack. We do not store the size of the last-kmer + size = 1 << 26; + } + next_ptr_width = (((data->byte_offsets[idx] >> 22) & 3) == 0) ? 4 : ((data->byte_offsets[idx] >> 22) & 3); + mlt_data = (uint8_t *)calloc(size, sizeof(uint8_t)); + assert(mlt_data != NULL); + mh_data = (uint8_t *)calloc(size, sizeof(uint8_t)); + assert(mh_data != NULL); + } + numBytesPerKmer = 4; + ert_build_table(data->bid, ik, ok, mlt_data, mh_data, &numBytesPerKmer, + &numBytesForMh, aq, &data->numHits[idx - data->startKmer], &max_next_ptr, next_ptr_width, data->step, + data->readLength - 1); + if (data->step == 0 || data->step == 1) { + if (max_next_ptr >= 1024 && max_next_ptr < 262144) { + next_ptr_width = 3; + max_next_ptr = 0; + numBytesPerKmer = 4; + numBytesForMh = 0; + ert_build_table(data->bid, ik, ok, mlt_data, mh_data, + &numBytesPerKmer, &numBytesForMh, aq, &data->numHits[idx-data->startKmer], + &max_next_ptr, next_ptr_width, data->step, data->readLength - 1); + } + if (max_next_ptr >= 262144) { + next_ptr_width = 4; + max_next_ptr = 0; + numBytesPerKmer = 4; + numBytesForMh = 0; + ert_build_table(data->bid, ik, ok, mlt_data, mh_data, + &numBytesPerKmer, &numBytesForMh, aq, &data->numHits[idx-data->startKmer], + &max_next_ptr, next_ptr_width, data->step, data->readLength - 1); + } + } + // + // Traverse tree and place data in memory + // + assert(numBytesPerKmer < (1 << 26)); + // assert(numBytesForMh < (1 << 24)); + if (data->step == 1) { + if (idx != numKmers-1) assert((numBytesPerKmer+numBytesForMh) == size); + memcpy_bwamem(mlt_data, 4*sizeof(uint8_t), &numBytesPerKmer, 4*sizeof(uint8_t), __FILE__, __LINE__); + fwrite(mlt_data, sizeof(uint8_t), numBytesPerKmer, ml_tbl_fd); + free(mlt_data); + fwrite(mh_data, sizeof(uint8_t), numBytesForMh, ml_tbl_fd); + free(mh_data); + } + } + if (num_hits < 20) { + data->kmer_table[idx - data->startKmer] = (ptr << KMER_DATA_BITWIDTH) | (num_hits << 17) | kmer_data; + } + else { + data->kmer_table[idx - data->startKmer] = (ptr << KMER_DATA_BITWIDTH) | kmer_data; + } + + ptr += (numBytesPerKmer + numBytesForMh); + + if (next_ptr_width == 2) { + nKmerSmallPtr++; + } + else if (next_ptr_width == 3) { + nKmerMedPtr++; + } + else if (next_ptr_width == 4) { + nKmerLargePtr++; + next_ptr_width = 0; + } + data->kmer_table[idx - data->startKmer] |= (next_ptr_width << 22); + if (bwa_verbose >= 4) { + if (idx == data->endKmer-1) { + log_file(log_fd, "TotalSize:%lu\n", ptr); + } + if ((idx-data->startKmer) % 10000000 == 0) { + log_file(log_fd, "%lu,%lu,%lu", idx, numBytesPerKmer, ptr); + } + } + + total_hits += data->numHits[idx - data->startKmer]; + } + + data->end_offset = ptr; + if (bwa_verbose >= 4) { + log_file(log_fd, "Hits:%lu\n", total_hits); + log_file(log_fd, "nKmersSmallPtrs:%lu", nKmerSmallPtr); + log_file(log_fd, "nKmersMedPtrs:%lu", nKmerMedPtr); + log_file(log_fd, "nKmersLargePtrs:%lu", nKmerLargePtr); + fclose(log_fd); + } + + fclose(ml_tbl_fd); + pthread_exit(NULL); +} + +void buildERTKmerTrees(char *kmer_tbl_file_name, bwaidx_t *bid, char *prefix, int num_threads, int readLength) +{ + FILE *kmer_tbl_fd; + pthread_t thr[num_threads]; + int i, rc; + thread_data_t thr_data[num_threads]; + uint64_t numKmersThread = (uint64_t)ceil(((double)(numKmers)) / num_threads); + if (bwa_verbose >= 3) + { + fprintf(stderr, "[M::%s] Computing tree sizes for each k-mer\n", __func__); + } + // + // STEP 1: Create threads. Each thread builds the index for a fraction of the k-mers + // + for (i = 0; i < num_threads; ++i) + { + thr_data[i].tid = i; + thr_data[i].step = 0; + thr_data[i].readLength = readLength; + thr_data[i].bid = bid; + thr_data[i].startKmer = i * numKmersThread; + thr_data[i].endKmer = ((i + 1) * numKmersThread > numKmers) ? numKmers : (i + 1) * numKmersThread; + thr_data[i].end_offset = 0; + thr_data[i].filePrefix = prefix; + uint64_t numKmersToProcess = thr_data[i].endKmer - thr_data[i].startKmer; + thr_data[i].kmer_table = (uint64_t *)calloc(numKmersToProcess, sizeof(uint64_t)); + assert(thr_data[i].kmer_table != NULL); + thr_data[i].numHits = (uint64_t *)calloc(numKmersToProcess, sizeof(uint64_t)); + assert(thr_data[i].numHits != NULL); + if ((rc = pthread_create(&thr[i], NULL, buildIndex, &thr_data[i]))) + { + fprintf(stderr, "[M::%s] error: pthread_create, rc: %d\n", __func__, rc); + return; + } + } + + // + // block until all threads complete + // + for (i = 0; i < num_threads; ++i) + { + pthread_join(thr[i], NULL); + } + + // + // Compute absolute offsets for each kmer's tree from per-thread relative offsets + // + uint64_t *kmer_table = (uint64_t *)calloc(numKmers, sizeof(uint64_t)); + assert(kmer_table != NULL); + int tidx; + uint64_t kidx; + uint64_t numProcessed = 0; + uint64_t offset = 0; + for (tidx = 0; tidx < num_threads; ++tidx) + { + uint64_t numKmersToProcess = thr_data[tidx].endKmer - thr_data[tidx].startKmer; + for (kidx = 0; kidx < numKmersToProcess; ++kidx) + { + uint64_t rel_offset = thr_data[tidx].kmer_table[kidx] >> KMER_DATA_BITWIDTH; + uint16_t kmer_data = thr_data[tidx].kmer_table[kidx] & KMER_DATA_MASK; + uint64_t ptr_width = (thr_data[tidx].kmer_table[kidx] >> 22) & 3; + uint64_t reseed_hits = (thr_data[tidx].kmer_table[kidx] >> 17) & 0x1F; + kmer_table[numProcessed + kidx] = ((offset + rel_offset) << KMER_DATA_BITWIDTH) + | (ptr_width << 22) + | (reseed_hits << 17) + | (kmer_data); + } + numProcessed += numKmersToProcess; + offset += thr_data[tidx].end_offset; + free(thr_data[tidx].kmer_table); + free(thr_data[tidx].numHits); + } + + // + // STEP 2 : Using estimates of each k-mer's tree size from the previous step, write the index to file + // + uint64_t total_size = offset + (numKmers * 8UL); + if (bwa_verbose >= 3) { + fprintf(stderr, "[M::%s] Total size of ERT index = %lu B (Expected). (k-mer,tree) = (%lu,%lu)\n", __func__, total_size, numKmers * 8UL, offset); + } + + // return; + + for (i = 0; i < num_threads; ++i) { + thr_data[i].tid = i; + thr_data[i].step = 1; + thr_data[i].readLength = readLength; + thr_data[i].bid = bid; + thr_data[i].startKmer = i*numKmersThread; + thr_data[i].endKmer = ((i + 1)*numKmersThread > numKmers) ? numKmers : (i + 1)*numKmersThread; + thr_data[i].end_offset = 0; + thr_data[i].filePrefix = prefix; + uint64_t numKmersToProcess = thr_data[i].endKmer - thr_data[i].startKmer; + thr_data[i].kmer_table = (uint64_t*) calloc(numKmersToProcess, sizeof(uint64_t)); + thr_data[i].numHits = (uint64_t*) calloc(numKmersToProcess, sizeof(uint64_t)); + thr_data[i].byte_offsets = kmer_table; + if ((rc = pthread_create(&thr[i], NULL, buildIndex, &thr_data[i]))) { + fprintf(stderr, "[M::%s] error: pthread_create, rc: %d\n", __func__, rc); + return; + } + } + + for (i = 0; i < num_threads; ++i) { + pthread_join(thr[i], NULL); + } + + if (bwa_verbose >= 3) { + fprintf(stderr, "[M::%s] Merging per-thread tables ...\n", __func__); + } + + // + // Compute absolute offsets for each k-mer tree's root node + // + numProcessed = 0; + offset = 0; + for (tidx = 0; tidx < num_threads; ++tidx) { + uint64_t numKmersToProcess = thr_data[tidx].endKmer - thr_data[tidx].startKmer; + for (kidx = 0; kidx < numKmersToProcess; ++kidx) { + uint64_t rel_offset = thr_data[tidx].kmer_table[kidx] >> KMER_DATA_BITWIDTH; + uint16_t kmer_data = thr_data[tidx].kmer_table[kidx] & KMER_DATA_MASK; + uint64_t ptr_width = (thr_data[tidx].kmer_table[kidx] >> 22) & 3; + uint64_t reseed_hits = (thr_data[tidx].kmer_table[kidx] >> 17) & 0x1F; + kmer_table[numProcessed + kidx] = ((offset + rel_offset) << KMER_DATA_BITWIDTH) + | (ptr_width << 22) + | (reseed_hits << 17) + | (kmer_data); + } + numProcessed += numKmersToProcess; + offset += thr_data[tidx].end_offset; + free(thr_data[tidx].kmer_table); + free(thr_data[tidx].numHits); + } + kmer_tbl_fd = fopen(kmer_tbl_file_name, "wb"); + if (kmer_tbl_fd == NULL) { + fprintf(stderr, "[M::%s] Can't open file or file doesn't exist.\n", __func__); + exit(1); + } + fwrite(kmer_table, sizeof(uint64_t), numKmers, kmer_tbl_fd); + fclose(kmer_tbl_fd); + free(kmer_table); + + // + // Merge all per-thread trees + // + const int file_buf_size = 64 * 1024 * 1024; + uint8_t *file_buf = (uint8_t *)malloc(file_buf_size); // 64MB + char ml_tbl_file_name[PATH_MAX] = {}; + sprintf(ml_tbl_file_name, "%s.ert.mlt.table", prefix); + + if (remove(ml_tbl_file_name) == 0) { + fprintf(stderr, "[M::%s] Overwriting existing index file (tree)\n", __func__); + } + + FILE *o_mlt = fopen(ml_tbl_file_name, "wb"); + if (o_mlt == NULL) + { + fprintf(stderr, "[M::%s] Can't open output index file for writing.\n", __func__); + exit(1); + } + for (uint64_t tidx = 0; tidx < num_threads; ++tidx) { + sprintf(ml_tbl_file_name, "%s.ert.mlt_table_%ld", prefix, tidx); + FILE *i_mlt = fopen(ml_tbl_file_name, "rb"); + if (i_mlt == NULL) { + fprintf(stderr, "[M::%s] Can't open per-thread index file for thread %ld\n", __func__, tidx); + exit(1); + } + int fr = 0; + while((fr = fread(file_buf, 1, file_buf_size, i_mlt)) != 0) { + fwrite(file_buf, 1, fr, o_mlt); + } + if (remove(ml_tbl_file_name) != 0) { + fprintf(stderr, "[M::%s] Can't remove per-thread index file (tree) for thread %ld\n", __func__, tidx); + exit(1); + } + } + free(file_buf); +} diff --git a/ertindex.h b/ertindex.h new file mode 100644 index 0000000..3482244 --- /dev/null +++ b/ertindex.h @@ -0,0 +1,106 @@ +#ifndef BWA_ERT_H +#define BWA_ERT_H + +#include +#include "kvec.h" +#include "bwa.h" + +#define ERT_MAX_READ_LEN 301 +#define kmerSize 15 +#define numKmers 1073741824 +#define xmerSize 4 +#define numXmers 256 +#define TILE_SIZE 64 +// #define PRINT_SMEM +#define PREFIX_LENGTH 3 +#define LEP_MASK 0x3FFF +#define KMER_DATA_BITWIDTH 24 +#define KMER_DATA_MASK 0xFFFF +#define METADATA_BITWIDTH 2 +#define METADATA_MASK 0x3 +#define INVALID 0 +#define SINGLE_HIT_LEAF 1 +#define INFREQUENT 2 +#define FREQUENT 3 +#define HIT_THRESHOLD 256 +#define DRAM_PAGE_SIZE 24576 +#define LEAF_TBL_BASE_PTR_WIDTH 3 +#define LEAF_TBL_HIT_COUNT_WIDTH 3 +#define MAX_HITS_PER_READ 2000000 + +typedef enum +{ + EMPTY, + LEAF, + UNIFORM, + DIVERGE +} note_type_t; + +typedef struct _node_t +{ + note_type_t type; + int pos; + int num_bp; + int l_seq; + int numChildren; + uint64_t numHits; + uint64_t start_addr; + uint64_t *hits; + uint8_t seq[ERT_MAX_READ_LEN + 1]; + struct _node_t *parent_node; + struct _node_t *child_nodes[4]; +} node_t; + +typedef kvec_t(node_t) node_v; + +typedef node_t *node_ptr_t; + +typedef struct +{ + int tid; + int step; + int readLength; + uint64_t *kmer_table; + uint64_t startKmer; + uint64_t endKmer; + bwaidx_t *bid; + uint64_t *numHits; + char *filePrefix; + uint64_t *byte_offsets; + uint64_t end_offset; +} thread_data_t; + +// FIXME : Add to options later +extern const uint8_t char_count_size_in_bits; +extern const uint8_t hits_count_size_in_bits; +extern const uint8_t ref_ptr_size_in_bits; +extern const uint8_t leaf_offset_ptr_size_in_bits; +extern const uint8_t other_offset_ptr_size_in_bits; + +typedef enum +{ + CODE, + EMPTY_NODE, + LEAF_COUNT, + LEAF_HITS, + UNIFORM_COUNT, + UNIFORM_BP, + LEAF_PTR, + OTHER_PTR +} byte_type_t; + +void ert_build_kmertree(const bwaidx_t *bid, bwtintv_t ik, bwtintv_t ok[4], int curDepth, node_t *parent_node, int step, int max_depth); + +void handleDivergence(const bwaidx_t *bid, bwtintv_t ok[4], int depth, node_t *parent_node, int step, int max_depth); + +void handleLeaf(const bwaidx_t *bid, bwtintv_t ik, node_t *n, int step); + +void ert_build_table(const bwaidx_t *bid, bwtintv_t ik, bwtintv_t ok[4], uint8_t *mlt_data, uint8_t *mh_data, uint64_t *size, uint64_t *mh_size, uint8_t *aq, uint64_t *numHits, uint64_t *max_next_ptr, uint64_t next_ptr_width, int step, int max_depth); + +void ert_traverse_kmertree(node_t *n, uint8_t *mlt_data, uint8_t *mh_data, uint64_t *byte_idx, uint64_t *mh_byte_idx, int depth, uint64_t *numHits, uint64_t *max_ptr, uint64_t next_ptr_width, int step); + +void ert_destroy_kmertree(node_t *n); + +void buildERTKmerTrees(char *kmer_tbl_file_name, bwaidx_t *bid, char *prefix, int num_threads, int readLength); + +#endif \ No newline at end of file diff --git a/ertseeding.c b/ertseeding.c new file mode 100644 index 0000000..49d59c6 --- /dev/null +++ b/ertseeding.c @@ -0,0 +1,3511 @@ +#include "ertseeding.h" + + +#define smem_lt(a, b) ((a).start == (b).start ? (a).end > (b).end : (a).start < (b).start) +KSORT_INIT(mem_smem_ert, mem_t, smem_lt) + +#define smem_lt_2(a, b) ((a).start == (b).start ? (a).end < (b).end : (a).start < (b).start) +KSORT_INIT(mem_smem_sort_lt_ert, mem_t, smem_lt_2) + + /** + * 256 x 3 (2B,3B,4B ptrs) x 4 (ACGT) table encoding offset to next address given a code byte for a leaf node + */ + unsigned char leaf_table[256 * 3][4] = { + {0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,2,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {2,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,0,2,0},{0,5,0,0}, + {10,5,0,0},{0,5,0,0},{0,7,2,0},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,0,2,0},{0,0,2,0}, + {7,0,2,0},{0,0,2,0},{0,0,4,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,2,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {2,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {2,0,0,0},{0,0,0,0},{0,0,0,0},{0,2,0,0}, + {7,2,0,0},{0,2,0,0},{0,4,0,0},{0,0,0,0}, + {2,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {4,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,0,0,2},{0,5,0,0}, + {10,5,0,0},{0,5,0,0},{0,7,0,2},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,0,0,2},{0,0,0,2}, + {7,0,0,2},{0,0,0,2},{0,0,0,4},{0,0,5,0}, + {10,0,5,0},{0,0,5,0},{0,0,7,2},{0,10,5,0}, + {15,10,5,0},{0,10,5,0},{0,12,7,2},{0,0,5,0}, + {10,0,5,0},{0,0,5,0},{0,0,7,2},{0,0,7,2}, + {12,0,7,2},{0,0,7,2},{0,0,9,4},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,0,0,2},{0,5,0,0}, + {10,5,0,0},{0,5,0,0},{0,7,0,2},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,0,0,2},{0,0,0,2}, + {7,0,0,2},{0,0,0,2},{0,0,0,4},{0,0,0,2}, + {7,0,0,2},{0,0,0,2},{0,0,0,4},{0,7,0,2}, + {12,7,0,2},{0,7,0,2},{0,9,0,4},{0,0,0,2}, + {7,0,0,2},{0,0,0,2},{0,0,0,4},{0,0,0,4}, + {9,0,0,4},{0,0,0,4},{0,0,0,6},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,2,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {2,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,0,2,0},{0,5,0,0}, + {10,5,0,0},{0,5,0,0},{0,7,2,0},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,0,2,0},{0,0,2,0}, + {7,0,2,0},{0,0,2,0},{0,0,4,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,2,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {2,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {2,0,0,0},{0,0,0,0},{0,0,0,0},{0,2,0,0}, + {7,2,0,0},{0,2,0,0},{0,4,0,0},{0,0,0,0}, + {2,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {4,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {2,0,0,0},{0,0,0,0},{0,0,0,0},{0,2,0,0}, + {7,2,0,0},{0,2,0,0},{0,4,0,0},{0,0,0,0}, + {2,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {4,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,2,0}, + {7,0,2,0},{0,0,2,0},{0,0,4,0},{0,7,2,0}, + {12,7,2,0},{0,7,2,0},{0,9,4,0},{0,0,2,0}, + {7,0,2,0},{0,0,2,0},{0,0,4,0},{0,0,4,0}, + {9,0,4,0},{0,0,4,0},{0,0,6,0},{0,0,0,0}, + {2,0,0,0},{0,0,0,0},{0,0,0,0},{0,2,0,0}, + {7,2,0,0},{0,2,0,0},{0,4,0,0},{0,0,0,0}, + {2,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {4,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {4,0,0,0},{0,0,0,0},{0,0,0,0},{0,4,0,0}, + {9,4,0,0},{0,4,0,0},{0,6,0,0},{0,0,0,0}, + {4,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {6,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,3,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {3,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,0,3,0},{0,5,0,0}, + {10,5,0,0},{0,5,0,0},{0,8,3,0},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,0,3,0},{0,0,3,0}, + {8,0,3,0},{0,0,3,0},{0,0,6,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,3,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {3,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {3,0,0,0},{0,0,0,0},{0,0,0,0},{0,3,0,0}, + {8,3,0,0},{0,3,0,0},{0,6,0,0},{0,0,0,0}, + {3,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {6,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,0,0,3},{0,5,0,0}, + {10,5,0,0},{0,5,0,0},{0,8,0,3},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,0,0,3},{0,0,0,3}, + {8,0,0,3},{0,0,0,3},{0,0,0,6},{0,0,5,0}, + {10,0,5,0},{0,0,5,0},{0,0,8,3},{0,10,5,0}, + {15,10,5,0},{0,10,5,0},{0,13,8,3},{0,0,5,0}, + {10,0,5,0},{0,0,5,0},{0,0,8,3},{0,0,8,3}, + {13,0,8,3},{0,0,8,3},{0,0,11,6},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,0,0,3},{0,5,0,0}, + {10,5,0,0},{0,5,0,0},{0,8,0,3},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,0,0,3},{0,0,0,3}, + {8,0,0,3},{0,0,0,3},{0,0,0,6},{0,0,0,3}, + {8,0,0,3},{0,0,0,3},{0,0,0,6},{0,8,0,3}, + {13,8,0,3},{0,8,0,3},{0,11,0,6},{0,0,0,3}, + {8,0,0,3},{0,0,0,3},{0,0,0,6},{0,0,0,6}, + {11,0,0,6},{0,0,0,6},{0,0,0,9},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,3,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {3,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,0,3,0},{0,5,0,0}, + {10,5,0,0},{0,5,0,0},{0,8,3,0},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,0,3,0},{0,0,3,0}, + {8,0,3,0},{0,0,3,0},{0,0,6,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,3,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {3,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {3,0,0,0},{0,0,0,0},{0,0,0,0},{0,3,0,0}, + {8,3,0,0},{0,3,0,0},{0,6,0,0},{0,0,0,0}, + {3,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {6,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {3,0,0,0},{0,0,0,0},{0,0,0,0},{0,3,0,0}, + {8,3,0,0},{0,3,0,0},{0,6,0,0},{0,0,0,0}, + {3,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {6,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,3,0}, + {8,0,3,0},{0,0,3,0},{0,0,6,0},{0,8,3,0}, + {13,8,3,0},{0,8,3,0},{0,11,6,0},{0,0,3,0}, + {8,0,3,0},{0,0,3,0},{0,0,6,0},{0,0,6,0}, + {11,0,6,0},{0,0,6,0},{0,0,9,0},{0,0,0,0}, + {3,0,0,0},{0,0,0,0},{0,0,0,0},{0,3,0,0}, + {8,3,0,0},{0,3,0,0},{0,6,0,0},{0,0,0,0}, + {3,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {6,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {6,0,0,0},{0,0,0,0},{0,0,0,0},{0,6,0,0}, + {11,6,0,0},{0,6,0,0},{0,9,0,0},{0,0,0,0}, + {6,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {9,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,4,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {4,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,0,4,0},{0,5,0,0}, + {10,5,0,0},{0,5,0,0},{0,9,4,0},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,0,4,0},{0,0,4,0}, + {9,0,4,0},{0,0,4,0},{0,0,8,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,4,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {4,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {4,0,0,0},{0,0,0,0},{0,0,0,0},{0,4,0,0}, + {9,4,0,0},{0,4,0,0},{0,8,0,0},{0,0,0,0}, + {4,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {8,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,0,0,4},{0,5,0,0}, + {10,5,0,0},{0,5,0,0},{0,9,0,4},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,0,0,4},{0,0,0,4}, + {9,0,0,4},{0,0,0,4},{0,0,0,8},{0,0,5,0}, + {10,0,5,0},{0,0,5,0},{0,0,9,4},{0,10,5,0}, + {15,10,5,0},{0,10,5,0},{0,14,9,4},{0,0,5,0}, + {10,0,5,0},{0,0,5,0},{0,0,9,4},{0,0,9,4}, + {14,0,9,4},{0,0,9,4},{0,0,13,8},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,0,0,4},{0,5,0,0}, + {10,5,0,0},{0,5,0,0},{0,9,0,4},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,0,0,4},{0,0,0,4}, + {9,0,0,4},{0,0,0,4},{0,0,0,8},{0,0,0,4}, + {9,0,0,4},{0,0,0,4},{0,0,0,8},{0,9,0,4}, + {14,9,0,4},{0,9,0,4},{0,13,0,8},{0,0,0,4}, + {9,0,0,4},{0,0,0,4},{0,0,0,8},{0,0,0,8}, + {13,0,0,8},{0,0,0,8},{0,0,0,12},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,4,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {4,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,0,4,0},{0,5,0,0}, + {10,5,0,0},{0,5,0,0},{0,9,4,0},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,0,4,0},{0,0,4,0}, + {9,0,4,0},{0,0,4,0},{0,0,8,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {5,0,0,0},{0,0,0,0},{0,4,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {4,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {4,0,0,0},{0,0,0,0},{0,0,0,0},{0,4,0,0}, + {9,4,0,0},{0,4,0,0},{0,8,0,0},{0,0,0,0}, + {4,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {8,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {4,0,0,0},{0,0,0,0},{0,0,0,0},{0,4,0,0}, + {9,4,0,0},{0,4,0,0},{0,8,0,0},{0,0,0,0}, + {4,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {8,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,4,0}, + {9,0,4,0},{0,0,4,0},{0,0,8,0},{0,9,4,0}, + {14,9,4,0},{0,9,4,0},{0,13,8,0},{0,0,4,0}, + {9,0,4,0},{0,0,4,0},{0,0,8,0},{0,0,8,0}, + {13,0,8,0},{0,0,8,0},{0,0,12,0},{0,0,0,0}, + {4,0,0,0},{0,0,0,0},{0,0,0,0},{0,4,0,0}, + {9,4,0,0},{0,4,0,0},{0,8,0,0},{0,0,0,0}, + {4,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {8,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {8,0,0,0},{0,0,0,0},{0,0,0,0},{0,8,0,0}, + {13,8,0,0},{0,8,0,0},{0,12,0,0},{0,0,0,0}, + {8,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {12,0,0,0},{0,0,0,0},{0,0,0,0} + }; + +/** + * 256 x 3 (2B,3B,4B ptrs) x 4 (ACGT) table encoding offset to next address given a code byte for an internal 'DIVERGE' node + */ +unsigned char code_table[256 * 3][4] = { + {0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{2,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{2,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{2,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{2,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{2,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{2,0,0,0},{0,2,0,0}, + {0,2,0,0},{0,2,0,0},{4,2,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{2,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{2,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{2,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{2,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{2,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{2,0,0,0},{0,2,0,0}, + {0,2,0,0},{0,2,0,0},{4,2,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{2,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{2,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{2,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{2,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{2,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{2,0,0,0},{0,2,0,0}, + {0,2,0,0},{0,2,0,0},{4,2,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{2,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{2,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{2,0,0,0},{0,2,0,0}, + {0,2,0,0},{0,2,0,0},{4,2,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{2,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{2,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{2,0,0,0},{0,2,0,0}, + {0,2,0,0},{0,2,0,0},{4,2,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{2,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{2,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{2,0,0,0},{0,2,0,0}, + {0,2,0,0},{0,2,0,0},{4,2,0,0},{0,0,2,0}, + {0,0,2,0},{0,0,2,0},{4,0,2,0},{0,0,2,0}, + {0,0,2,0},{0,0,2,0},{4,0,2,0},{0,0,2,0}, + {0,0,2,0},{0,0,2,0},{4,0,2,0},{0,4,2,0}, + {0,4,2,0},{0,4,2,0},{6,4,2,0}, + {0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{3,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{3,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{3,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{3,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{3,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{3,0,0,0},{0,3,0,0}, + {0,3,0,0},{0,3,0,0},{6,3,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{3,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{3,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{3,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{3,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{3,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{3,0,0,0},{0,3,0,0}, + {0,3,0,0},{0,3,0,0},{6,3,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{3,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{3,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{3,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{3,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{3,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{3,0,0,0},{0,3,0,0}, + {0,3,0,0},{0,3,0,0},{6,3,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{3,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{3,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{3,0,0,0},{0,3,0,0}, + {0,3,0,0},{0,3,0,0},{6,3,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{3,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{3,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{3,0,0,0},{0,3,0,0}, + {0,3,0,0},{0,3,0,0},{6,3,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{3,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{3,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{3,0,0,0},{0,3,0,0}, + {0,3,0,0},{0,3,0,0},{6,3,0,0},{0,0,3,0}, + {0,0,3,0},{0,0,3,0},{6,0,3,0},{0,0,3,0}, + {0,0,3,0},{0,0,3,0},{6,0,3,0},{0,0,3,0}, + {0,0,3,0},{0,0,3,0},{6,0,3,0},{0,6,3,0}, + {0,6,3,0},{0,6,3,0},{9,6,3,0}, + {0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{4,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{4,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{4,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{4,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{4,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{4,0,0,0},{0,4,0,0}, + {0,4,0,0},{0,4,0,0},{8,4,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{4,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{4,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{4,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{4,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{4,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{4,0,0,0},{0,4,0,0}, + {0,4,0,0},{0,4,0,0},{8,4,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{4,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{4,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{4,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{4,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{4,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{4,0,0,0},{0,4,0,0}, + {0,4,0,0},{0,4,0,0},{8,4,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{4,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{4,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{4,0,0,0},{0,4,0,0}, + {0,4,0,0},{0,4,0,0},{8,4,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{4,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{4,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{4,0,0,0},{0,4,0,0}, + {0,4,0,0},{0,4,0,0},{8,4,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{4,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{4,0,0,0},{0,0,0,0}, + {0,0,0,0},{0,0,0,0},{4,0,0,0},{0,4,0,0}, + {0,4,0,0},{0,4,0,0},{8,4,0,0},{0,0,4,0}, + {0,0,4,0},{0,0,4,0},{8,0,4,0},{0,0,4,0}, + {0,0,4,0},{0,0,4,0},{8,0,4,0},{0,0,4,0}, + {0,0,4,0},{0,0,4,0},{8,0,4,0},{0,8,4,0}, + {0,8,4,0},{0,8,4,0},{12,8,4,0} +}; + +/** + * Return integer key from k-mer string. + * + * @param str Query sequence + * @param keysize K-mer length + * @param index Index into query sequence + * @param seq_len Length of query sequence + * @param end_flag Flag to indicate end of query + * @param idx_first_N Index of first ambiguous bp in the k-mer + * + * @return Integer key (hash) for k-mer + */ +static inline uint32_t getHashKey (const uint8_t *str, const int keysize, int index, int seq_len, int* end_flag, int* idx_first_N) { + int i; + uint32_t key = 0; + int len = keysize; + if (index + keysize > seq_len) { + *end_flag = 1; + len = seq_len - index; + } + for (i = 0; i < len; ++i) { + if (str[i] != 4) { + key |= ((uint32_t)(str[i]) << (i << 1)); + } + else { + *idx_first_N = i; + break; + } + } + return key; +} + +uint8_t *get_seq(int64_t l_pac, const uint8_t *pac, int64_t beg, int64_t end, + int64_t *len, uint8_t *ref_string, uint8_t *seqb) +{ + uint8_t *seq = 0; + if (end < beg) end ^= beg, beg ^= end, end ^= beg; // if end is smaller, swap + if (end > l_pac<<1) end = l_pac<<1; + if (beg < 0) beg = 0; + if (beg >= l_pac || end <= l_pac) { + *len = end - beg; + if (beg >= l_pac) { // reverse strand + seq = ref_string + beg; + } else { // forward strand + seq = ref_string + beg; + } + + } else *len = 0; // if bridging the forward-reverse boundary, return nothing + return seq; +} + + +/** + * Compute offset to child and return address of child node + * + * @param raux read parameters + * @param mlt_data radix tree of k-mer + * @param code type of child nodes (EMPTY:00, LEAF:01, UNIFORM:10, DIVERGE:11) - 2b encoded + * @param c next character in read (used to traverse tree) + * @param byteIdx byte index into the mlt_data radix tree + * + */ +void getOffsetToChildNode(read_aux_t* raux, uint8_t* mlt_data, uint8_t code, uint8_t c, uint64_t* byteIdx) { + uint64_t nextByteIdx = *byteIdx; + uint64_t startByteIdx = nextByteIdx - 1; + int offset = code_table[((raux->ptr_width - 2) << 8) + code][c]; + uint32_t reseed_data = 0; + nextByteIdx += offset; + memcpy_bwamem(&reseed_data, raux->ptr_width * sizeof(uint8_t), &mlt_data[nextByteIdx], raux->ptr_width * sizeof(uint8_t), __FILE__, __LINE__); + uint32_t jumpByteIdx = reseed_data >> 6; + raux->num_hits = (reseed_data & 0x3F); + nextByteIdx = startByteIdx + jumpByteIdx; + *byteIdx = nextByteIdx; +} + + +/** + * Compute offset to start of leaf data + * + * @param raux read parameters + * @param code type of child nodes (EMPTY:00, LEAF:01, UNIFORM:10, DIVERGE:11) - 2b encoded + * @param c next character in read (used to traverse tree) + * + */ +static inline int getOffsetToLeafData(read_aux_t* raux, uint8_t code, uint8_t c) { + return leaf_table[((raux->ptr_width - 2) << 8) + code][c]; +} + +/** + * This routine does depth-first tree traversal starting from an internal node to obtain all hits at leaf nodes + * + * @param raux read parameters + * @param mlt_data radix tree of k-mer + * @param byteIdx byte index into the mlt_data radix tree + * @param mem maximal-exact-match storage + * @param bc next character in read (used to traverse tree) + * @param hits list of hits for read + */ +void getNextByteIdx_dfs(read_aux_t* raux, uint8_t* mlt_data, uint64_t* byte_idx, mem_t* mem, uint8_t bc, u64v* hits) { + + uint64_t nextByteIdx = *byte_idx; + uint64_t ref_pos = 0; + uint8_t c; + c = 3 - bc; + mem->skip_ref_fetch = 1; // MEM cannot be extended by leaf decompression + uint8_t code = mlt_data[nextByteIdx++]; + uint8_t code_c = (code >> (c << 1)) & 3; + assert(code != 0); + if (code_c == LEAF) { // Hit a leaf node + int k; + uint64_t leaf_data = 0; + nextByteIdx += getOffsetToLeafData(raux, code, c); + memcpy_bwamem(&leaf_data, 5 * sizeof(uint8_t), &mlt_data[nextByteIdx], 5 * sizeof(uint8_t), __FILE__, __LINE__); + if (leaf_data & 1) { // Found a multi-hit leaf node + nextByteIdx = raux->mh_start_addr + (leaf_data >> 1); + memcpy_bwamem(&raux->num_hits, 2 * sizeof(uint8_t), &mlt_data[nextByteIdx], 2 * sizeof(uint8_t), __FILE__, __LINE__); + nextByteIdx += 2; + mem->hitcount += raux->num_hits; + for (k = 0; k < raux->num_hits; ++k) { + memcpy_bwamem(&ref_pos, 5 * sizeof(uint8_t), &mlt_data[nextByteIdx], 5 * sizeof(uint8_t), __FILE__, __LINE__); + kv_push(uint64_t, *hits, ref_pos >> 1); + nextByteIdx += 5; + ref_pos = 0; + } + } + else { // Single-hit leaf node + raux->num_hits = 1; + mem->hitcount += raux->num_hits; + kv_push(uint64_t, *hits, leaf_data >> 1); + } + } + else if (code_c == UNIFORM) { // Multi-character internal node + int countBP = *((uint16_t*) &mlt_data[nextByteIdx]); + nextByteIdx += 2; + int numBitsForBP = countBP << 1; + int numBytesForBP = (numBitsForBP % 8) ? (numBitsForBP / 8 + 1) : (numBitsForBP / 8); + uint8_t packedBP[numBytesForBP]; + memcpy_bwamem(packedBP, numBytesForBP * sizeof(uint8_t), &mlt_data[nextByteIdx], numBytesForBP * sizeof(uint8_t), __FILE__, __LINE__); + nextByteIdx += numBytesForBP; + uint8_t k; + uint64_t startByteIdx = nextByteIdx; + for (k = 0; k < 4; ++k) { + getNextByteIdx_dfs(raux, mlt_data, &startByteIdx, mem, k, hits); + startByteIdx = nextByteIdx; + } + } + else if (code_c == DIVERGE) { // Single-character internal node + getOffsetToChildNode(raux, mlt_data, code, c, &nextByteIdx); + uint8_t k; + uint64_t startByteIdx = nextByteIdx; + for (k = 0; k < 4; ++k) { + getNextByteIdx_dfs(raux, mlt_data, &startByteIdx, mem, k, hits); + startByteIdx = nextByteIdx; + } + } + *byte_idx = nextByteIdx; +} + +/** + * Gather all hits for the MEM + * + * @param raux read parameters + * @param mlt_data radix tree of k-mer + * @param byteIdx byte index into the mlt_data radix tree + * @param mem MEM including hits + * @param hits list of hits for read + */ +void leaf_gather(read_aux_t* raux, uint8_t* mlt_data, uint64_t* byte_idx, mem_t* mem, u64v* hits) { + uint8_t k; + uint64_t startByteIdx = *byte_idx; + uint64_t tmpByteIdx = startByteIdx; + for (k = 0; k < 4; ++k) { + getNextByteIdx_dfs(raux, mlt_data, &tmpByteIdx, mem, k, hits); + tmpByteIdx = startByteIdx; + } +} + +/** + * Traverse tree during backward search(seeding). Similar to getNextByteIdx, except that we don't compute LEP + * + * @param raux read parameters + * @param mlt_data radix tree of k-mer + * @param byteIdx byte index into the mlt_data radix tree + * @param i index into read buffer + * @param mem maximal-exact-match storage + * @param hits list of hits for read + */ +void getNextByteIdx_backward(read_aux_t* raux, uint8_t* mlt_data, uint64_t* byte_idx, int* i, mem_t* mem, u64v* hits) { + uint64_t nextByteIdx = *byte_idx; + uint64_t ref_pos = 0; + uint8_t c, code, code_c; + if (raux->read_buf[*i] != 4) { + c = 3 - raux->read_buf[*i]; + code = mlt_data[nextByteIdx++]; + code_c = (code >> (c << 1)) & 3; + assert(code != 0); + } + else { // Terminate MEM search when we hit an 'N' + code_c = EMPTY; + } + if (code_c == EMPTY) { // Gather leaves later during forward traversal + mem->rc_end = *i; + mem->fetch_leaves = 1; + } + else if (code_c == LEAF) { // Hit a leaf node + *i += 1; + mem->rc_end = *i; + int k; + uint64_t leaf_data = 0; + nextByteIdx += getOffsetToLeafData(raux, code, c); + memcpy_bwamem(&leaf_data, 5 * sizeof(uint8_t), &mlt_data[nextByteIdx], 5 * sizeof(uint8_t), __FILE__, __LINE__); + if (leaf_data & 1) { // Found a multi-hit leaf node + nextByteIdx = raux->mh_start_addr + (leaf_data >> 1); + memcpy_bwamem(&raux->num_hits, 2 * sizeof(uint8_t), &mlt_data[nextByteIdx], 2 * sizeof(uint8_t), __FILE__, __LINE__); + nextByteIdx += 2; + mem->hitcount += raux->num_hits; + for (k = 0; k < raux->num_hits; ++k) { + memcpy_bwamem(&ref_pos, 5 * sizeof(uint8_t), &mlt_data[nextByteIdx], 5 * sizeof(uint8_t), __FILE__, __LINE__); + nextByteIdx += 5; + kv_push(uint64_t, *hits, ref_pos >> 1); + ref_pos = 0; + } + // + // We found a multi-hit. But to report hits in the same order as BWA-MEM, + // we will fetch the hits again in a forward tree traversal + // + mem->fetch_leaves = 1; + } + else { // Single-hit leaf node + mem->hitcount += 1; + kv_push(uint64_t, *hits, leaf_data >> 1); + } + } + else if (code_c == UNIFORM) { // Multi-character internal node + uint32_t j; + int countBP = *((uint16_t*) &mlt_data[nextByteIdx]); + nextByteIdx += 2; + int numBitsForBP = countBP << 1; + int numBytesForBP = (numBitsForBP % 8) ? (numBitsForBP / 8 + 1) : (numBitsForBP / 8); + uint8_t packedBP[numBytesForBP]; + memcpy_bwamem(packedBP, numBytesForBP * sizeof(uint8_t), &mlt_data[nextByteIdx], numBytesForBP * sizeof(uint8_t), __FILE__, __LINE__); + nextByteIdx += numBytesForBP; + // Unpack base pairs + uint8_t unpackedBP[countBP]; + for (j = 0; j < countBP; ++j) { + unpackedBP[j] = ((packedBP[j >> 2] >> ((~(j) & 3) << 1)) & 3); + } + // Count number of matching base pairs with read + for (j = 0; j < countBP; ++j) { + if ((*i + j) >= raux->l_seq) { + break; + } + if (raux->read_buf[*i+j] == 4) { + break; + } + if (3 - raux->read_buf[*i+j] != unpackedBP[j]) { + break; + } + } + *i += j; + if (j == countBP) { // We match all bps + if (*i < raux->l_seq) { + getNextByteIdx_backward(raux, mlt_data, &nextByteIdx, i, mem, hits); + } + else { + mem->rc_end = *i; + } + } + else { // Did not match all bps. Gather leaves for MEM in a later forward traversal + mem->rc_end = *i; + mem->fetch_leaves = 1; + } + } + else { // Single-character internal node + getOffsetToChildNode(raux, mlt_data, code, c, &nextByteIdx); + *i += 1; + if (*i < raux->l_seq) { + getNextByteIdx_backward(raux, mlt_data, &nextByteIdx, i, mem, hits); + } + else { + mem->rc_end = *i; + } + } + *byte_idx = nextByteIdx; +} + +/** + * Traverse tree during backward search (reseeding). Terminate when fewer than 'raux->limit' hits are found for + * an internal node + * + * @param raux read parameters + * @param mlt_data radix tree of k-mer + * @param byteIdx byte index into the mlt_data radix tree + * @param i index into read buffer + * @param mem maximal-exact-match storage + * @param hits list of hits for each read + */ +void getNextByteIdx_backward_wlimit(read_aux_t* raux, uint8_t* mlt_data, uint64_t* byte_idx, int* i, mem_t* mem, u64v* hits) { + + uint64_t nextByteIdx = *byte_idx; + uint64_t ref_pos = 0; + uint8_t c, code, code_c; + if (raux->read_buf[*i] != 4) { + c = 3 - raux->read_buf[*i]; + code = mlt_data[nextByteIdx++]; + code_c = (code >> (c << 1)) & 3; + assert(code != 0); + } + else { // Terminate MEM search when we hit an 'N' + code_c = EMPTY; + } + if (code_c == EMPTY) { // Gather leaves later during forward traversal + mem->rc_end = *i; + mem->fetch_leaves = 1; + } + else if (code_c == LEAF) { // Hit a leaf node + int k; + uint64_t leaf_data = 0; + nextByteIdx += getOffsetToLeafData(raux, code, c); + memcpy_bwamem(&leaf_data, 5 * sizeof(uint8_t), &mlt_data[nextByteIdx], 5 * sizeof(uint8_t), __FILE__, __LINE__); + if (leaf_data & 1) { // Found a multi-hit leaf node + nextByteIdx = raux->mh_start_addr + (leaf_data >> 1); + memcpy_bwamem(&raux->num_hits, 2 * sizeof(uint8_t), &mlt_data[nextByteIdx], 2 * sizeof(uint8_t), __FILE__, __LINE__); + nextByteIdx += 2; + // Hits exceed reseeding threshold + if (raux->num_hits >= raux->limit) { + mem->hitcount += raux->num_hits; + for (k = 0; k < raux->num_hits; ++k) { + memcpy_bwamem(&ref_pos, 5 * sizeof(uint8_t), &mlt_data[nextByteIdx], 5 * sizeof(uint8_t), __FILE__, __LINE__); + nextByteIdx += 5; + kv_push(uint64_t, *hits, ref_pos >> 1); + ref_pos = 0; + } + *i += 1; + } + } + // + // We found a multi-hit. But to report hits in the same order as BWA-MEM, + // we will fetch the hits again in a forward tree traversal + // + mem->fetch_leaves = 1; + mem->rc_end = *i; + } + else if (code_c == UNIFORM) { // Multi-character internal node + uint32_t j; + int countBP = *((uint16_t*) &mlt_data[nextByteIdx]); + nextByteIdx += 2; + int numBitsForBP = countBP << 1; + int numBytesForBP = (numBitsForBP % 8) ? (numBitsForBP / 8 + 1) : (numBitsForBP / 8); + uint8_t packedBP[numBytesForBP]; + memcpy_bwamem(packedBP, numBytesForBP * sizeof(uint8_t), &mlt_data[nextByteIdx], numBytesForBP * sizeof(uint8_t), __FILE__, __LINE__); + nextByteIdx += numBytesForBP; + // Unpack base pairs + uint8_t unpackedBP[countBP]; + for (j = 0; j < countBP; ++j) { + unpackedBP[j] = ((packedBP[j >> 2] >> ((~(j) & 3) << 1)) & 3); + } + // Count number of matching base pairs with read + for (j = 0; j < countBP; ++j) { + if ((*i + j) >= raux->l_seq) { + break; + } + if (raux->read_buf[*i+j] == 4) { + break; + } + if (3 - raux->read_buf[*i+j] != unpackedBP[j]) { + break; + } + } + *i += j; + if (j == countBP) { // We match all bps + if (*i < raux->l_seq) { + getNextByteIdx_backward_wlimit(raux, mlt_data, &nextByteIdx, i, mem, hits); + } + else { + mem->rc_end = *i; + mem->fetch_leaves = 1; + } + } + else { // Did not match all bps. Gather leaves for MEM in a later forward traversal + mem->rc_end = *i; + mem->fetch_leaves = 1; + } + } + else { // Single-character internal node + raux->num_hits = 0; + getOffsetToChildNode(raux, mlt_data, code, c, &nextByteIdx); + // In the internal nodes, raux->num_hits = 0 is used to reprsent # hits > 20 + if ((raux->num_hits == 0) || (raux->num_hits >= raux->limit)) { + *i += 1; + if (*i < raux->l_seq) { + getNextByteIdx_backward_wlimit(raux, mlt_data, &nextByteIdx, i, mem, hits); + } + else { + mem->rc_end = *i; + mem->fetch_leaves = 1; + } + } + else { + mem->rc_end = *i; + mem->fetch_leaves = 1; + } + } + *byte_idx = nextByteIdx; +} + +/** + * Traverse tree during forward search (seeding). + * + * @param raux read parameters + * @param mlt_data radix tree of k-mer + * @param byteIdx byte index into the mlt_data radix tree + * @param i index into read buffer + * @param mem maximal-exact-match storage + * @param hits list of hits for read + */ +void getNextByteIdx(read_aux_t* raux, uint8_t* mlt_data, uint64_t* byte_idx, int* i, mem_t* mem, u64v* hits) { + + uint64_t nextByteIdx = *byte_idx; + uint64_t parent_byte_idx = nextByteIdx; + uint64_t ref_pos = 0; + uint8_t c, code, code_c; + if (raux->read_buf[*i] != 4) { + c = 3 - raux->read_buf[*i]; + code = mlt_data[nextByteIdx++]; + code_c = (code >> (c << 1)) & 3; + assert(code != 0); + } + else { // Terminate MEM search when we hit an 'N' + code_c = EMPTY; + } + uint64_t lep_idx = 0; + uint64_t lep_bit_idx = 0; + if (code_c == EMPTY) { + if (mem->start == 0) { // FIXME: Gather leaves later during forward traversal even for MEMs starting at read_pos = 0 + int mem_len = *i; + if (mem_len >= raux->min_seed_len) { // Only gather leaves when MEM length exceeds threshold + uint64_t startByteIdx = parent_byte_idx; + leaf_gather(raux, mlt_data, &startByteIdx, mem, hits); + } + } + // Update LEP for backward search + lep_idx = raux->nextLEPBit >> 6; + lep_bit_idx = raux->nextLEPBit & (0x3FULL); + raux->lep[lep_idx] |= (1ULL << lep_bit_idx); + raux->nextLEPBit += 1; + } + else if (code_c == LEAF) { // Hit a leaf node + int k; + uint64_t leaf_data = 0; + nextByteIdx += getOffsetToLeafData(raux, code, c); + memcpy_bwamem(&leaf_data, 5 * sizeof(uint8_t), &mlt_data[nextByteIdx], 5 * sizeof(uint8_t), __FILE__, __LINE__); + if (leaf_data & 1) { // Found a multi-hit leaf node + nextByteIdx = raux->mh_start_addr + (leaf_data >> 1); + memcpy_bwamem(&raux->num_hits, 2 * sizeof(uint8_t), &mlt_data[nextByteIdx], 2 * sizeof(uint8_t), __FILE__, __LINE__); + nextByteIdx += 2; + mem->hitcount += raux->num_hits; + for (k = 0; k < raux->num_hits; ++k) { + memcpy_bwamem(&ref_pos, 5 * sizeof(uint8_t), &mlt_data[nextByteIdx], 5 * sizeof(uint8_t), __FILE__, __LINE__); + kv_push(uint64_t, *hits, ref_pos >> 1); + nextByteIdx += 5; + ref_pos = 0; + } + } + else { // Single-hit leaf node + raux->num_hits = 1; + mem->hitcount += raux->num_hits; + kv_push(uint64_t, *hits, leaf_data >> 1); + } + // Update LEP for backward search + lep_idx = raux->nextLEPBit >> 6; + lep_bit_idx = raux->nextLEPBit & (0x3FULL); + raux->lep[lep_idx] |= (1ULL << lep_bit_idx); + raux->nextLEPBit += 1; + *i += 1; + } + else if (code_c == UNIFORM) { // Multi-character internal node + uint32_t j; + int countBP = *((uint16_t*) &mlt_data[nextByteIdx]); + nextByteIdx += 2; + int numBitsForBP = countBP << 1; + int numBytesForBP = (numBitsForBP % 8) ? (numBitsForBP / 8 + 1) : (numBitsForBP / 8); + uint8_t packedBP[numBytesForBP]; + memcpy_bwamem(packedBP, numBytesForBP * sizeof(uint8_t), &mlt_data[nextByteIdx], numBytesForBP * sizeof(uint8_t), __FILE__, __LINE__); + nextByteIdx += numBytesForBP; + // Unpack base pairs + uint8_t unpackedBP[countBP]; + for (j = 0; j < countBP; ++j) { + unpackedBP[j] = ((packedBP[j >> 2] >> ((~(j) & 3) << 1)) & 3); + } + // Count number of matching base pairs with read + for (j = 0; j < countBP; ++j) { + if ((*i + j) >= raux->l_seq) { // Don't run past the end of the read + break; + } + if (raux->read_buf[*i+j] == 4) { + break; + } + if ((3 - raux->read_buf[*i+j]) != unpackedBP[j]) { + break; + } + } + raux->nextLEPBit += j; + *i += j; + if (j == countBP) { // If we match all bases of uniform entry + if (*i == raux->l_seq) { // Check if we reached the end of the read + if (mem->start == 0) { + leaf_gather(raux, mlt_data, &nextByteIdx, mem, hits); + } + lep_idx = raux->nextLEPBit >> 6; + lep_bit_idx = raux->nextLEPBit & (0x3FULL); + raux->lep[lep_idx] |= (1ULL << lep_bit_idx); + } + else { + if (*i < raux->l_seq) { + getNextByteIdx(raux, mlt_data, &nextByteIdx, i, mem, hits); + } + } + } + else { + // + // We did not match all bases of uniform entry + // Fetch all hits from leaf nodes for backward extension (dfs :( ) + // + assert(*i <= raux->l_seq); + if (mem->start == 0) { + int mem_len = *i; + if (mem_len >= raux->min_seed_len) { + leaf_gather(raux, mlt_data, &nextByteIdx, mem, hits); + } + } + // Update LEP to start backward search from last matching bp + lep_idx = raux->nextLEPBit >> 6; + lep_bit_idx = raux->nextLEPBit & (0x3FULL); + raux->lep[lep_idx] |= (1ULL << lep_bit_idx); + } + } + else { // Single-character internal node + getOffsetToChildNode(raux, mlt_data, code, c, &nextByteIdx); + lep_idx = raux->nextLEPBit >> 6; + lep_bit_idx = raux->nextLEPBit & (0x3FULL); + raux->lep[lep_idx] |= (1ULL << lep_bit_idx); + raux->nextLEPBit += 1; + *i += 1; + if (*i < raux->l_seq) { + getNextByteIdx(raux, mlt_data, &nextByteIdx, i, mem, hits); + } + else { + if (mem->start == 0) { + leaf_gather(raux, mlt_data, &nextByteIdx, mem, hits); + } + raux->lep[raux->nextLEPBit >> 6] |= (1ULL << (raux->nextLEPBit & (0x3FULL))); + raux->nextLEPBit += 1; + } + } + *byte_idx = nextByteIdx; +} + +/** + * Traverse tree during forward search (reseeding). Terminate when fewer than 'raux->limit' hits are found for an + * internal node + * + * @param raux read parameters + * @param mlt_data radix tree of k-mer + * @param byteIdx byte index into the mlt_data radix tree + * @param i index into read buffer + * @param mem maximal-exact-match storage + * @param visited stack to store list of visited nodes + * @param hits list of hits for read + */ +void getNextByteIdx_wlimit(read_aux_t* raux, uint8_t* mlt_data, uint64_t* byte_idx, int* i, mem_t* mem, path_v* visited, u64v* hits) { + + uint64_t nextByteIdx = *byte_idx; + uint64_t parent_byte_idx = nextByteIdx; + uint64_t ref_pos = 0; + uint8_t c, code, code_c; + if (raux->read_buf[*i] != 4) { + c = 3 - raux->read_buf[*i]; + code = mlt_data[nextByteIdx++]; + code_c = (code >> (c << 1)) & 3; + assert(code != 0); + } + else { // Terminate MEM search when we hit an 'N' + code_c = EMPTY; + } + uint64_t lep_idx = 0; + uint64_t lep_bit_idx = 0; + if (code_c == EMPTY) { + if (mem->start == 0) { + int mem_len = *i; + if (mem_len >= raux->min_seed_len) { + leaf_gather(raux, mlt_data, &parent_byte_idx, mem, hits); + } + } + // Update LEP for backward search + lep_idx = raux->nextLEPBit >> 6; + lep_bit_idx = raux->nextLEPBit & (0x3FULL); + raux->lep[lep_idx] |= (1ULL << lep_bit_idx); + raux->nextLEPBit += 1; + } + else if (code_c == LEAF) { // Hit a leaf node + int k; + uint64_t leaf_data = 0; + nextByteIdx += getOffsetToLeafData(raux, code, c); + memcpy_bwamem(&leaf_data, 5 * sizeof(uint8_t), &mlt_data[nextByteIdx], 5 * sizeof(uint8_t), __FILE__, __LINE__); + if (leaf_data & 1) { // Found a multi-hit leaf node + nextByteIdx = raux->mh_start_addr + (leaf_data >> 1); + memcpy_bwamem(&raux->num_hits, 2 * sizeof(uint8_t), &mlt_data[nextByteIdx], 2 * sizeof(uint8_t), __FILE__, __LINE__); + nextByteIdx += 2; + } + else { // Single-hit leaf node + raux->num_hits = 1; + } + // Hits exceed reseeding threshold + if (raux->num_hits >= raux->limit) { + mem->hitcount += raux->num_hits; + for (k = 0; k < raux->num_hits; ++k) { + memcpy_bwamem(&ref_pos, 5 * sizeof(uint8_t), &mlt_data[nextByteIdx], 5 * sizeof(uint8_t), __FILE__, __LINE__); + kv_push(uint64_t, *hits, ref_pos >> 1); + nextByteIdx += 5; + ref_pos = 0; + } + *i += 1; + } + else { + // Do DFS traversal to gather leaves starting from parent node + if (mem->start == 0) { + int mem_len = *i; + if (mem_len >= raux->min_seed_len) { + node_info_t* tmp_node_info = &kv_pop(*visited); + leaf_gather(raux, mlt_data, &tmp_node_info->byte_idx, mem, hits); + } + } + } + // Update LEP for backward search + lep_idx = raux->nextLEPBit >> 6; + lep_bit_idx = raux->nextLEPBit & (0x3FULL); + raux->lep[lep_idx] |= (1ULL << lep_bit_idx); + raux->nextLEPBit += 1; + } + else if (code_c == UNIFORM) { // Multi-character internal node + uint32_t j; + int countBP = *((uint16_t*) &mlt_data[nextByteIdx]); + nextByteIdx += 2; + int numBitsForBP = countBP << 1; + int numBytesForBP = (numBitsForBP % 8) ? (numBitsForBP / 8 + 1) : (numBitsForBP / 8); + uint8_t packedBP[numBytesForBP]; + memcpy_bwamem(packedBP, numBytesForBP * sizeof(uint8_t), &mlt_data[nextByteIdx], numBytesForBP * sizeof(uint8_t), __FILE__, __LINE__); + nextByteIdx += numBytesForBP; + // Unpack base pairs + uint8_t unpackedBP[countBP]; + for (j = 0; j < countBP; ++j) { + unpackedBP[j] = ((packedBP[j >> 2] >> ((~(j) & 3) << 1)) & 3); + } + // Count number of matching base pairs with read + for (j = 0; j < countBP; ++j) { + if ((*i + j) >= raux->l_seq) { // Don't run past the end of the read + break; + } + if (raux->read_buf[*i+j] == 4) { + break; + } + if ((3 - raux->read_buf[*i+j]) != unpackedBP[j]) { + break; + } + } + raux->nextLEPBit += j; + *i += j; + if (j == countBP) { // If we match all bases of uniform entry + if (*i == raux->l_seq) { // Check if we reached the end of the read + if (mem->start == 0) { + leaf_gather(raux, mlt_data, &nextByteIdx, mem, hits); + } + lep_idx = raux->nextLEPBit >> 6; + lep_bit_idx = raux->nextLEPBit & (0x3FULL); + raux->lep[lep_idx] |= (1ULL << lep_bit_idx); + } + else { + if (*i < raux->l_seq) { + getNextByteIdx_wlimit(raux, mlt_data, &nextByteIdx, i, mem, visited, hits); + } + } + } + else { + // + // We did not match all bases of uniform entry + // Fetch all hits from leaf nodes for backward extension (dfs :( ) + // + assert(*i <= raux->l_seq); + if (mem->start == 0) { + int mem_len = *i; + if (mem_len >= raux->min_seed_len) { + leaf_gather(raux, mlt_data, &nextByteIdx, mem, hits); + } + } + lep_idx = raux->nextLEPBit >> 6; + lep_bit_idx = raux->nextLEPBit & (0x3FULL); + raux->lep[lep_idx] |= (1ULL << lep_bit_idx); + } + } + else { + getOffsetToChildNode(raux, mlt_data, code, c, &nextByteIdx); + lep_idx = raux->nextLEPBit >> 6; + lep_bit_idx = raux->nextLEPBit & (0x3FULL); + raux->lep[lep_idx] |= (1ULL << lep_bit_idx); + raux->nextLEPBit += 1; + // In the internal nodes, raux->num_hits = 0 is used to reprsent # hits > 20 + if ((raux->num_hits == 0) || (raux->num_hits >= raux->limit)) { + node_info_t nif; + nif.byte_idx = nextByteIdx; + nif.num_hits = raux->num_hits; + kv_push(node_info_t, *visited, nif); + *i += 1; + if (*i < raux->l_seq) { + getNextByteIdx_wlimit(raux, mlt_data, &nextByteIdx, i, mem, visited, hits); + } + else { + if (mem->start == 0) { + leaf_gather(raux, mlt_data, &nextByteIdx, mem, hits); + } + raux->lep[raux->nextLEPBit >> 6] |= (1ULL << (raux->nextLEPBit & (0x3FULL))); + raux->nextLEPBit += 1; + } + } + else { + // Do DFS traveral to gather leaves from parent node + if (mem->start == 0) { + int mem_len = *i; + if (mem_len >= raux->min_seed_len) { + node_info_t* tmp_node_info = &kv_pop(*visited); + leaf_gather(raux, mlt_data, &tmp_node_info->byte_idx, mem, hits); + } + } + } + } + *byte_idx = nextByteIdx; +} + +/** + * Traverse tree during forward search (LAST). Terminate when fewer than 'raux->limit' hits are found for an internal node + * Minimum MEM length for LAST is opt->min_seed_len + 1 + * + * @param raux read parameters + * @param mlt_data radix tree of k-mer + * @param byteIdx byte index into the mlt_data radix tree + * @param i index into read buffer + * @param mem maximal-exact match found + * @param visited stack to store list of visited nodes + * @param hits list of hits for read + */ +void getNextByteIdx_last(read_aux_t* raux, uint8_t* mlt_data, uint64_t* byte_idx, int* i, mem_t* mem, u64v* hits) { + + uint64_t nextByteIdx = *byte_idx; + uint64_t ref_pos = 0; + uint8_t c, code, code_c; + if (raux->read_buf[*i] != 4) { + c = 3 - raux->read_buf[*i]; + code = mlt_data[nextByteIdx++]; + code_c = (code >> (c << 1)) & 3; + assert(code != 0); + } + else { // Terminate MEM search when we hit an 'N' + code_c = EMPTY; + } + if (code_c == EMPTY) { + *i += 1; + } + else if (code_c == LEAF) { // Hit a leaf node + int k; + uint64_t leaf_data = 0; + nextByteIdx += getOffsetToLeafData(raux, code, c); + memcpy_bwamem(&leaf_data, 5 * sizeof(uint8_t), &mlt_data[nextByteIdx], 5 * sizeof(uint8_t), __FILE__, __LINE__); + if (leaf_data & 1) { // multi-hit leaf + nextByteIdx = raux->mh_start_addr + (leaf_data >> 1); + memcpy_bwamem(&raux->num_hits, 2 * sizeof(uint8_t), &mlt_data[nextByteIdx], 2 * sizeof(uint8_t), __FILE__, __LINE__); + nextByteIdx += 2; + mem->hitcount += raux->num_hits; + for (k = 0; k < raux->num_hits; ++k) { + memcpy_bwamem(&ref_pos, 5 * sizeof(uint8_t), &mlt_data[nextByteIdx], 5 * sizeof(uint8_t), __FILE__, __LINE__); + kv_push(uint64_t, *hits, ref_pos >> 1); + nextByteIdx += 5; + ref_pos = 0; + } + } + else { // single-hit leaf + raux->num_hits = 1; + mem->hitcount += raux->num_hits; + kv_push(uint64_t, *hits, leaf_data >> 1); + } + *i += 1; + } + else if (code_c == UNIFORM) { // Multi-character internal node + uint32_t j; + int countBP = *((uint16_t*) &mlt_data[nextByteIdx]); + nextByteIdx += 2; + int numBitsForBP = countBP << 1; + int numBytesForBP = (numBitsForBP % 8) ? (numBitsForBP / 8 + 1) : (numBitsForBP / 8); + uint8_t packedBP[numBytesForBP]; + memcpy_bwamem(packedBP, numBytesForBP * sizeof(uint8_t), &mlt_data[nextByteIdx], numBytesForBP * sizeof(uint8_t), __FILE__, __LINE__); + nextByteIdx += numBytesForBP; + // Unpack base pairs + uint8_t unpackedBP[countBP]; + for (j = 0; j < countBP; ++j) { + unpackedBP[j] = ((packedBP[j >> 2] >> ((~(j) & 3) << 1)) & 3); + } + // Count number of matching base pairs with read + for (j = 0; j < countBP; ++j) { + if ((*i + j) >= raux->l_seq) { //!< Don't run past the end of the read + break; + } + if (raux->read_buf[*i+j] == 4) { + break; + } + if ((3 - raux->read_buf[*i+j]) != unpackedBP[j]) { + break; + } + } + *i += j; + int len = *i - mem->start; + // + // LAST stop criterion: MEM is sufficiently long and not too frequent + // + int stop_extension = (raux->num_hits > 0 && + raux->num_hits < raux->limit && + len >= (raux->min_seed_len + 1)) ? 1 : 0; + if (stop_extension) { + leaf_gather(raux, mlt_data, &nextByteIdx, mem, hits); + *i = mem->start + (raux->min_seed_len + 1); + } + else { + if (j == countBP) { + if (*i < raux->l_seq) { + getNextByteIdx_last(raux, mlt_data, &nextByteIdx, i, mem, hits); + } + } + else { + *i += 1; + } + } + } + else if (code_c == DIVERGE) { + getOffsetToChildNode(raux, mlt_data, code, c, &nextByteIdx); + *i += 1; + int len = *i - mem->start; + // + // LAST stop criterion: MEM is sufficiently long and not too frequent + // + int stop_extension = (raux->num_hits > 0 && + raux->num_hits < raux->limit && + len >= (raux->min_seed_len + 1)) ? 1 : 0; + if (stop_extension) { + leaf_gather(raux, mlt_data, &nextByteIdx, mem, hits); + } + else { + if (*i < raux->l_seq) { + getNextByteIdx_last(raux, mlt_data, &nextByteIdx, i, mem, hits); + } + } + } + *byte_idx = nextByteIdx; +} + +/** + * Main backward search function (seeding). Lookup up k-mer and/or x-mer table and identify root of ERT + * + * @param iaux index parameters + * @param raux read parameters + * @param i index into read buffer + * @param mem maximal-exact match found + * @param hits list of hits for read + */ +void leftExtend(index_aux_t* iaux, read_aux_t* raux, int* i, mem_t* mem, u64v* hits) { + + uint8_t code; + uint8_t* mlt_data; + uint64_t byte_idx = 0, ref_pos = 0, kmer_entry = 0, start_addr = 0; + uint32_t hashval = 0; + int idx_first_N = -1; + hashval = getHashKey(&raux->read_buf[*i], kmerSize, *i, raux->l_seq, 0, &idx_first_N); + if (idx_first_N != -1) { + *i += (kmerSize + xmerSize); + mem->rc_end = *i; + return; + } + // index-table lookup + kmer_entry = iaux->kmer_offsets[hashval]; + // index-table entry type + code = kmer_entry & METADATA_MASK; + // pointer to root of tree + start_addr = kmer_entry >> KMER_DATA_BITWIDTH; + // width used for internal pointers in tree + raux->ptr_width = (((kmer_entry >> 22) & 3) == 0) ? 4 : ((kmer_entry >> 22) & 3); + byte_idx = 0; + if (code == INVALID) { // k-mer absent + *i += (kmerSize + xmerSize); + mem->rc_end = *i; + } + else if (code == SINGLE_HIT_LEAF) { // single-hit k-mer + mlt_data = &iaux->mlt_table[start_addr]; + byte_idx++; + memcpy_bwamem(&ref_pos, 5 * sizeof(uint8_t), &mlt_data[byte_idx], 5 * sizeof(uint8_t), __FILE__, __LINE__); + byte_idx += 5; + mem->hitcount += 1; + kv_push(uint64_t, *hits, ref_pos >> 1); + *i += kmerSize; + mem->rc_end = *i; + } + else if (code == INFREQUENT) { // k-mer has fewer than 256 hits + *i += kmerSize; + mlt_data = &iaux->mlt_table[start_addr]; + if (*i < raux->l_seq) { + // + // First 4 bytes of tree store the start of all multi-hit leaves for the k-mer + // + memcpy_bwamem(&raux->mh_start_addr, 4 * sizeof(uint8_t), &mlt_data[byte_idx], 4 * sizeof(uint8_t), __FILE__, __LINE__); + byte_idx += 4; + getNextByteIdx_backward(raux, mlt_data, &byte_idx, i, mem, hits); + } + else { + mem->rc_end = *i; + } + } + else { // k-mer has large, dense tree, do an additional x-mer lookup + uint64_t xmer_entry; + uint64_t ptr = 0; + *i += kmerSize; + mlt_data = &iaux->mlt_table[start_addr]; + hashval = getHashKey(&raux->read_buf[*i], xmerSize, *i, raux->l_seq, 0, &idx_first_N); + memcpy_bwamem(&raux->mh_start_addr, 4 * sizeof(uint8_t), &mlt_data[byte_idx], 4 * sizeof(uint8_t), __FILE__, __LINE__); + byte_idx += 4; + memcpy_bwamem(&xmer_entry, 8 * sizeof(uint8_t), &mlt_data[byte_idx + (hashval << 3)], 8 * sizeof(uint8_t), __FILE__, __LINE__); + code = xmer_entry & METADATA_MASK; + ptr = xmer_entry >> KMER_DATA_BITWIDTH; + if (idx_first_N != -1) { + *i += xmerSize; + mem->rc_end = *i; + return; + } + if (code == INVALID) { + *i += xmerSize; + mem->rc_end = *i; + } + else if (code == SINGLE_HIT_LEAF) { + byte_idx = ptr; + byte_idx++; + memcpy_bwamem(&ref_pos, 5 * sizeof(uint8_t), &mlt_data[byte_idx], 5 * sizeof(uint8_t), __FILE__, __LINE__); + byte_idx += 5; + mem->hitcount += 1; + kv_push(uint64_t, *hits, ref_pos >> 1); + *i += xmerSize; + mem->rc_end = *i; + } + else { + byte_idx = ptr; + *i += xmerSize; + if (*i < raux->l_seq) { + getNextByteIdx_backward(raux, mlt_data, &byte_idx, i, mem, hits); + } + else { + mem->rc_end = *i; + } + } + } +} + +/** + * Main backward search function (reseeding). Lookup up k-mer and/or x-mer table and identify root of ERT. + * Return early if root node has fewer than 'raux->limit' hits + * + * @param iaux index parameters + * @param raux read parameters + * @param i index into read buffer + * @param mem maximal-exact-match storage + * @param hits list of hits for read + */ +void leftExtend_wlimit(index_aux_t* iaux, read_aux_t* raux, int* i, mem_t* mem, u64v* hits) { + + uint8_t code; + uint8_t* mlt_data; + uint64_t byte_idx = 0, kmer_entry = 0, start_addr = 0; + uint32_t hashval = 0; + int idx_first_N = -1; + hashval = getHashKey(&raux->read_buf[*i], kmerSize, *i, raux->l_seq, 0, &idx_first_N); + if (idx_first_N != -1) { + *i += (kmerSize + xmerSize); + mem->rc_end = *i; + return; + } + // index-table lookup + kmer_entry = iaux->kmer_offsets[hashval]; + // index-table entry type + code = kmer_entry & METADATA_MASK; + // pointer to root of tree + start_addr = kmer_entry >> KMER_DATA_BITWIDTH; + // width used for internal pointers in tree + raux->ptr_width = (((kmer_entry >> 22) & 3) == 0) ? 4 : ((kmer_entry >> 22) & 3); + // # hits for k-mer (0 if > 20 hits) + raux->num_hits = (kmer_entry >> 17) & 0x1F; + byte_idx = 0; + if (code == INVALID) { // k-mer absent + *i += (kmerSize + xmerSize); + mem->rc_end = *i; + } + else if (code == SINGLE_HIT_LEAF) { // single-hit k-mer + *i += (kmerSize + xmerSize); + mem->rc_end = *i; + } + else if (code == INFREQUENT) { // k-mer has fewer than 256 hits + *i += kmerSize; + mlt_data = &iaux->mlt_table[start_addr]; + if ((raux->num_hits == 0) || (raux->num_hits >= raux->limit)) { + if (*i < raux->l_seq) { + // + // First 4 bytes of tree store the start of all multi-hit leaves for the k-mer + // + memcpy_bwamem(&raux->mh_start_addr, 4 * sizeof(uint8_t), &mlt_data[byte_idx], 4 * sizeof(uint8_t), __FILE__, __LINE__); + byte_idx += 4; + getNextByteIdx_backward_wlimit(raux, mlt_data, &byte_idx, i, mem, hits); + } + else { + // Leaf gathering to be done later + mem->rc_end = *i; + mem->fetch_leaves = 1; + } + } + else { + mem->rc_end = *i; + } + } + else { // k-mer has large, dense tree, do an additional x-mer lookup + uint64_t xmer_entry; + uint64_t ptr = 0; + *i += kmerSize; + mlt_data = &iaux->mlt_table[start_addr]; + hashval = getHashKey(&raux->read_buf[*i], xmerSize, *i, raux->l_seq, 0, &idx_first_N); + // + // First 4 bytes of tree store the start of all multi-hit leaves for the k-mer + // This helps in decoding nodes and creates compact trees + // + memcpy_bwamem(&raux->mh_start_addr, 4 * sizeof(uint8_t), &mlt_data[byte_idx], 4 * sizeof(uint8_t), __FILE__, __LINE__); + byte_idx += 4; + memcpy_bwamem(&xmer_entry, 8 * sizeof(uint8_t), &mlt_data[byte_idx + (hashval << 3)], 8 * sizeof(uint8_t), __FILE__, __LINE__); + code = xmer_entry & METADATA_MASK; + ptr = xmer_entry >> KMER_DATA_BITWIDTH; + raux->num_hits = (xmer_entry >> 17) & 0x1F; + if (idx_first_N != -1) { + *i += xmerSize; + mem->rc_end = *i; + return; + } + if (code == INVALID) { + *i += xmerSize; + mem->rc_end = *i; + } + else if (code == SINGLE_HIT_LEAF) { + *i += xmerSize; + mem->rc_end = *i; + } + else { + byte_idx = ptr; + *i += xmerSize; + if ((raux->num_hits == 0) || (raux->num_hits >= raux->limit)) { + if (*i < raux->l_seq) { + getNextByteIdx_backward_wlimit(raux, mlt_data, &byte_idx, i, mem, hits); + } + else { + // Leaf gathering + mem->rc_end = *i; + mem->fetch_leaves = 1; + } + } + else { + mem->rc_end = *i; + } + } + } +} + +/** + * Fetch hits for all MEMs identified after backward search (termination conditions based on reseeding) + * + * @param raux read parameters + * @param mlt_data radix tree of k-mer + * @param byteIdx byte index into the mlt_data radix tree + * @param idx index into read buffer + * @param mem maximal-exact-match storage + * @param hits list of hits for read + */ +void getNextByteIdx_fetch_leaves_prefix_reseed(read_aux_t* raux, uint8_t* mlt_data, uint64_t* byte_idx, int idx, mem_t* mem, path_v* visited, u64v* hits) { + + uint64_t nextByteIdx = *byte_idx; + uint64_t parent_byte_idx = nextByteIdx; + uint64_t ref_pos = 0; + uint8_t c; + int i = idx; + assert(raux->read_buf[i] != 4); // Should not see N in SMEMs + c = 3 - raux->read_buf[i]; + uint8_t code = mlt_data[nextByteIdx++]; + uint8_t code_c = (code >> (c << 1)) & 3; + assert(code != 0); + if (code_c == EMPTY) { // Do leaf gathering + mem->end = i; + int mem_len = mem->end - mem->start; + if (mem_len >= raux->min_seed_len) { + leaf_gather(raux, mlt_data, &parent_byte_idx, mem, hits); + } + } + else if (code_c == LEAF) { + int k; + uint64_t leaf_data = 0; + nextByteIdx += getOffsetToLeafData(raux, code, c); + memcpy_bwamem(&leaf_data, 5 * sizeof(uint8_t), &mlt_data[nextByteIdx], 5 * sizeof(uint8_t), __FILE__, __LINE__); + if (leaf_data & 1) { + nextByteIdx = raux->mh_start_addr + (leaf_data >> 1); + memcpy_bwamem(&raux->num_hits, 2 * sizeof(uint8_t), &mlt_data[nextByteIdx], 2 * sizeof(uint8_t), __FILE__, __LINE__); + nextByteIdx += 2; + } + else { + raux->num_hits = 1; + } + if (raux->num_hits >= raux->limit) { + mem->hitcount += raux->num_hits; + for (k = 0; k < raux->num_hits; ++k) { + memcpy_bwamem(&ref_pos, 5 * sizeof(uint8_t), &mlt_data[nextByteIdx], 5 * sizeof(uint8_t), __FILE__, __LINE__); + kv_push(uint64_t, *hits, ref_pos >> 1); + nextByteIdx += 5; + ref_pos = 0; + } + i += 1; + mem->end = i; + mem->is_multi_hit = 1; // decompress leaf node for potentially longer match + } + else { + mem->end = i; + int mem_len = mem->end - mem->start; + if (mem_len >= raux->min_seed_len) { + node_info_t* tmp_node_info = &kv_pop(*visited); + leaf_gather(raux, mlt_data, &tmp_node_info->byte_idx, mem, hits); + } + } + } + else if (code_c == UNIFORM) { + uint32_t j; + int countBP = *((uint16_t*) &mlt_data[nextByteIdx]); + nextByteIdx += 2; + int numBitsForBP = countBP << 1; + int numBytesForBP = (numBitsForBP % 8) ? (numBitsForBP / 8 + 1) : (numBitsForBP / 8); + uint8_t packedBP[numBytesForBP]; + memcpy_bwamem(packedBP, numBytesForBP * sizeof(uint8_t), &mlt_data[nextByteIdx], numBytesForBP * sizeof(uint8_t), __FILE__, __LINE__); + nextByteIdx += numBytesForBP; + // Unpack base pairs + uint8_t unpackedBP[countBP]; + for (j = 0; j < countBP; ++j) { + unpackedBP[j] = ((packedBP[j >> 2] >> ((~(j) & 3) << 1)) & 3); + } + // Count number of matching base pairs with read + for (j = 0; j < countBP; ++j) { + if ((i + j) >= raux->l_seq) { + break; + } + if (3 - raux->read_buf[i+j] != unpackedBP[j]) { + break; + } + } + i += j; + if (j == countBP) { + if (i < raux->l_seq) { + getNextByteIdx_fetch_leaves_prefix_reseed(raux, mlt_data, &nextByteIdx, i, mem, visited, hits); + } + else { + mem->end = i; + int mem_len = mem->end - mem->start; + if (mem_len >= raux->min_seed_len) { + leaf_gather(raux, mlt_data, &nextByteIdx, mem, hits); + } + } + } + else { + mem->end = i; + int mem_len = mem->end - mem->start; + if (mem_len >= raux->min_seed_len) { + leaf_gather(raux, mlt_data, &nextByteIdx, mem, hits); + } + } + } + else if (code_c == DIVERGE) { + raux->num_hits = 0; + getOffsetToChildNode(raux, mlt_data, code, c, &nextByteIdx); + if ((raux->num_hits == 0) || (raux->num_hits >= raux->limit)) { + node_info_t nif; + nif.byte_idx = nextByteIdx; + nif.num_hits = raux->num_hits; + kv_push(node_info_t, *visited, nif); + i += 1; + if (i < raux->l_seq) { + getNextByteIdx_fetch_leaves_prefix_reseed(raux, mlt_data, &nextByteIdx, i, mem, visited, hits); + } + else { + mem->end = i; + int mem_len = mem->end - mem->start; + if (mem_len >= raux->min_seed_len) { + leaf_gather(raux, mlt_data, &nextByteIdx, mem, hits); + } + } + } + else { + mem->end = i; + int mem_len = mem->end - mem->start; + if (mem_len >= raux->min_seed_len) { + node_info_t* tmp_node_info = &kv_pop(*visited); + leaf_gather(raux, mlt_data, &tmp_node_info->byte_idx, mem, hits); + } + } + } + *byte_idx = nextByteIdx; + +} + +/** + * Fetch hits for all MEMs identified after backward search (extend beyond mem->end) + * + * @param raux read parameters + * @param mlt_data radix tree of k-mer + * @param byteIdx byte index into the mlt_data radix tree + * @param idx index into read buffer + * @param mem maximal-exact-match storage + * @param hits list of hits for read + */ +void getNextByteIdx_fetch_leaves_prefix(read_aux_t* raux, uint8_t* mlt_data, uint64_t* byte_idx, int idx, mem_t* mem, u64v* hits) { + + uint64_t nextByteIdx = *byte_idx; + uint64_t ref_pos = 0; + uint8_t c; + int i = idx; + assert(raux->read_buf[i] != 4); // Should not see N in SMEMs + c = 3 - raux->read_buf[i]; + uint8_t code = mlt_data[nextByteIdx++]; + uint8_t code_c = (code >> (c << 1)) & 3; + assert(code != 0); + if (code_c == EMPTY) { // Do leaf gathering + mem->end = i; + int mem_len = mem->end - mem->start; + if (mem_len >= raux->min_seed_len) { + nextByteIdx = *byte_idx; + leaf_gather(raux, mlt_data, &nextByteIdx, mem, hits); + } + } + else if (code_c == LEAF) { + int k; + uint64_t leaf_data = 0; + nextByteIdx += getOffsetToLeafData(raux, code, c); + memcpy_bwamem(&leaf_data, 5 * sizeof(uint8_t), &mlt_data[nextByteIdx], 5 * sizeof(uint8_t), __FILE__, __LINE__); + if (leaf_data & 1) { + nextByteIdx = raux->mh_start_addr + (leaf_data >> 1); + memcpy_bwamem(&raux->num_hits, 2 * sizeof(uint8_t), &mlt_data[nextByteIdx], 2 * sizeof(uint8_t), __FILE__, __LINE__); + nextByteIdx += 2; + mem->hitcount += raux->num_hits; + for (k = 0; k < raux->num_hits; ++k) { + memcpy_bwamem(&ref_pos, 5 * sizeof(uint8_t), &mlt_data[nextByteIdx], 5 * sizeof(uint8_t), __FILE__, __LINE__); + kv_push(uint64_t, *hits, ref_pos >> 1); + nextByteIdx += 5; + ref_pos = 0; + } + } + else { + raux->num_hits = 1; + mem->hitcount += raux->num_hits; + kv_push(uint64_t, *hits, leaf_data >> 1); + } + i += 1; + mem->end = i; + } + else if (code_c == UNIFORM) { + uint32_t j; + int countBP = *((uint16_t*) &mlt_data[nextByteIdx]); + nextByteIdx += 2; + int numBitsForBP = countBP << 1; + int numBytesForBP = (numBitsForBP % 8) ? (numBitsForBP / 8 + 1) : (numBitsForBP / 8); + uint8_t packedBP[numBytesForBP]; + memcpy_bwamem(packedBP, numBytesForBP * sizeof(uint8_t), &mlt_data[nextByteIdx], numBytesForBP * sizeof(uint8_t), __FILE__, __LINE__); + nextByteIdx += numBytesForBP; + // Unpack base pairs + uint8_t unpackedBP[countBP]; + for (j = 0; j < countBP; ++j) { + unpackedBP[j] = ((packedBP[j >> 2] >> ((~(j) & 3) << 1)) & 3); + } + // Count number of matching base pairs with read + for (j = 0; j < countBP; ++j) { + if ((i + j) >= raux->l_seq) { + break; + } + if (3 - raux->read_buf[i+j] != unpackedBP[j]) { + break; + } + } + i += j; + if (j == countBP) { + if (i < raux->l_seq) { + getNextByteIdx_fetch_leaves_prefix(raux, mlt_data, &nextByteIdx, i, mem, hits); + } + else { + mem->end = i; + int mem_len = mem->end - mem->start; + if (mem_len >= raux->min_seed_len) { + leaf_gather(raux, mlt_data, &nextByteIdx, mem, hits); + } + } + } + else { + mem->end = i; + int mem_len = mem->end - mem->start; + if (mem_len >= raux->min_seed_len) { + leaf_gather(raux, mlt_data, &nextByteIdx, mem, hits); + } + } + } + else if (code_c == DIVERGE) { + raux->num_hits = 0; + getOffsetToChildNode(raux, mlt_data, code, c, &nextByteIdx); + i += 1; + if (i < raux->l_seq) { + getNextByteIdx_fetch_leaves_prefix(raux, mlt_data, &nextByteIdx, i, mem, hits); + } + else { + mem->end = i; + int mem_len = mem->end - mem->start; + if (mem_len >= raux->min_seed_len) { + leaf_gather(raux, mlt_data, &nextByteIdx, mem, hits); + } + } + } + +} + +/** + * Forward traversal to fetch hits for all MEMs identified after backward search. + * + * @param raux read parameters + * @param mlt_data radix tree of k-mer + * @param byteIdx byte index into the mlt_data radix tree + * @param idx index into read buffer + * @param mem maximal-exact-match storage + * @param hits list of hits for read + */ +void getNextByteIdx_fetch_leaves(read_aux_t* raux, uint8_t* mlt_data, uint64_t* byte_idx, int idx, mem_t* mem, u64v* hits) { + + uint64_t nextByteIdx = *byte_idx; + uint64_t ref_pos = 0; + uint8_t c; + int i = idx; + assert(raux->read_buf[i] != 4); // Should not see N in SMEMs + c = 3 - raux->read_buf[i]; + uint8_t code = mlt_data[nextByteIdx++]; + uint8_t code_c = (code >> (c << 1)) & 3; + assert(code != 0); + assert(code_c != EMPTY); + if (code_c == LEAF) { + int k; + uint64_t leaf_data = 0; + nextByteIdx += getOffsetToLeafData(raux, code, c); + memcpy_bwamem(&leaf_data, 5 * sizeof(uint8_t), &mlt_data[nextByteIdx], 5 * sizeof(uint8_t), __FILE__, __LINE__); + if (leaf_data & 1) { + nextByteIdx = raux->mh_start_addr + (leaf_data >> 1); + memcpy_bwamem(&raux->num_hits, 2 * sizeof(uint8_t), &mlt_data[nextByteIdx], 2 * sizeof(uint8_t), __FILE__, __LINE__); + nextByteIdx += 2; + mem->hitcount += raux->num_hits; + for (k = 0; k < raux->num_hits; ++k) { + memcpy_bwamem(&ref_pos, 5 * sizeof(uint8_t), &mlt_data[nextByteIdx], 5 * sizeof(uint8_t), __FILE__, __LINE__); + kv_push(uint64_t, *hits, ref_pos >> 1); + nextByteIdx += 5; + ref_pos = 0; + } + } + else { + raux->num_hits = 1; + mem->hitcount += raux->num_hits; + kv_push(uint64_t, *hits, leaf_data >> 1); + } + i += 1; + } + else if (code_c == UNIFORM) { + uint32_t j; + int countBP = *((uint16_t*) &mlt_data[nextByteIdx]); + nextByteIdx += 2; + int numBitsForBP = countBP << 1; + int numBytesForBP = (numBitsForBP % 8) ? (numBitsForBP / 8 + 1) : (numBitsForBP / 8); + uint8_t packedBP[numBytesForBP]; + memcpy_bwamem(packedBP, numBytesForBP * sizeof(uint8_t), &mlt_data[nextByteIdx], numBytesForBP * sizeof(uint8_t), __FILE__, __LINE__); + nextByteIdx += numBytesForBP; + // Unpack base pairs + uint8_t unpackedBP[countBP]; + for (j = 0; j < countBP; ++j) { + unpackedBP[j] = ((packedBP[j >> 2] >> ((~(j) & 3) << 1)) & 3); + } + // Count number of matching base pairs with read + for (j = 0; j < countBP; ++j) { + if ((i + j) >= raux->l_seq) { + break; + } + if (3 - raux->read_buf[i+j] != unpackedBP[j]) { + break; + } + } + i += j; + if (j == countBP) { + if (i < mem->end) { // only extend till end of MEM found during previous backward search + getNextByteIdx_fetch_leaves(raux, mlt_data, &nextByteIdx, i, mem, hits); + } + else { + leaf_gather(raux, mlt_data, &nextByteIdx, mem, hits); + } + } + else { + leaf_gather(raux, mlt_data, &nextByteIdx, mem, hits); + } + } + else if (code_c == DIVERGE) { + raux->num_hits = 0; + getOffsetToChildNode(raux, mlt_data, code, c, &nextByteIdx); + i += 1; + if (i < mem->end) { // only extend till end of MEM found during previous backward search + getNextByteIdx_fetch_leaves(raux, mlt_data, &nextByteIdx, i, mem, hits); + } + else { + leaf_gather(raux, mlt_data, &nextByteIdx, mem, hits); + } + } + +} + +/** + * Extend as much as possible to the right based on reseeding criteria. + * + * Return early if root node has fewer than 'raux->limit' hits + * + * @param iaux index parameters + * @param raux read parameters + * @param mem maximal-exact-match storage + * @param hits list of hits for read + */ +void rightExtend_fetch_leaves_prefix_reseed(index_aux_t* iaux, read_aux_t* raux, mem_t* mem, u64v* hits) { + + uint8_t code; + uint8_t* mlt_data; + uint64_t byte_idx = 0, kmer_entry = 0, start_addr = 0; + uint32_t hashval = 0; + int flag = 0; + int i = mem->start; + int idx_first_N = -1; + hashval = getHashKey(&raux->read_buf[i], kmerSize, i, raux->l_seq, &flag, &idx_first_N); + // index-table lookup + kmer_entry = iaux->kmer_offsets[hashval]; + // index-table entry type + code = kmer_entry & METADATA_MASK; + // pointer to root of tree + start_addr = kmer_entry >> KMER_DATA_BITWIDTH; + raux->mh_start_addr = 0; + // width used for internal pointers in tree + raux->ptr_width = (((kmer_entry >> 22) & 3) == 0) ? 4 : ((kmer_entry >> 22) & 3); + // # hits for k-mer (0 if > 20 hits) + raux->num_hits = (kmer_entry >> 17) & 0x1F; + byte_idx = 0; + assert(code != INVALID); + if (code == SINGLE_HIT_LEAF) { + mem->end = i; + } + else if (code == INFREQUENT) { // k-mer has fewer than 256 hits + if ((raux->num_hits == 0) || (raux->num_hits >= raux->limit)) { + i += kmerSize; + mlt_data = &iaux->mlt_table[start_addr]; + // + // First 4 bytes of tree store the start of all multi-hit leaves for the k-mer + // + memcpy_bwamem(&raux->mh_start_addr, 4 * sizeof(uint8_t), &mlt_data[byte_idx], 4 * sizeof(uint8_t), __FILE__, __LINE__); + byte_idx += 4; + if (i < raux->l_seq) { + path_v visited; + kv_init(visited); + node_info_t nif; + nif.byte_idx = byte_idx; + nif.num_hits = raux->num_hits; + kv_push(node_info_t, visited, nif); + getNextByteIdx_fetch_leaves_prefix_reseed(raux, mlt_data, &byte_idx, i, mem, &visited, hits); + kv_destroy(visited); + } + else { + mem->end = i; + int mem_len = mem->end - mem->start; + if (mem_len >= raux->min_seed_len) { + leaf_gather(raux, mlt_data, &byte_idx, mem, hits); + } + } + } + else { + mem->end = i; + } + } + else if (code == FREQUENT) { // k-mer has large, dense tree, do an additional x-mer lookup + uint64_t xmer_entry; + uint64_t ptr = 0; + mlt_data = &iaux->mlt_table[start_addr]; + hashval = getHashKey(&raux->read_buf[i + kmerSize], xmerSize, i + kmerSize, raux->l_seq, 0, &idx_first_N); + memcpy_bwamem(&raux->mh_start_addr, 4 * sizeof(uint8_t), &mlt_data[byte_idx], 4 * sizeof(uint8_t), __FILE__, __LINE__); + byte_idx += 4; + memcpy_bwamem(&xmer_entry, 8 * sizeof(uint8_t), &mlt_data[byte_idx + (hashval << 3)], 8 * sizeof(uint8_t), __FILE__, __LINE__); + code = xmer_entry & METADATA_MASK; + ptr = xmer_entry >> KMER_DATA_BITWIDTH; + // 5 bits to encode number of hits at each node + raux->num_hits = (xmer_entry >> 17) & 0x1F; + if (code == INVALID) { + mem->end = i; + } + else if (code == SINGLE_HIT_LEAF) { + mem->end = i; + } + else { + // When a node has greater than 20 (opt->max_mem->intv) hits, we store a 0 in the hits field + if ((raux->num_hits == 0) || (raux->num_hits >= raux->limit)) { + byte_idx = ptr; + i += (kmerSize + xmerSize); + if (i < raux->l_seq) { + path_v visited; + kv_init(visited); + node_info_t nif; + nif.byte_idx = byte_idx; + nif.num_hits = raux->num_hits; + kv_push(node_info_t, visited, nif); + getNextByteIdx_fetch_leaves_prefix_reseed(raux, mlt_data, &byte_idx, i, mem, &visited, hits); + kv_destroy(visited); + } + else { + mem->end = i; + int mem_len = mem->end - mem->start; + if (mem_len >= raux->min_seed_len) { + leaf_gather(raux, mlt_data, &byte_idx, mem, hits); + } + } + } + else { + mem->end = i; + } + } + } +} + +/** + * Extend as much as possible to the right and gather leaves if MEM length >= opt->min_seed_len. + * + * @param iaux index parameters + * @param raux read parameters + * @param mem maximal-exact-match storage + * @param hits list of hits for read + */ +void rightExtend_fetch_leaves_prefix(index_aux_t* iaux, read_aux_t* raux, mem_t* mem, u64v* hits) { + + uint8_t code; + uint8_t* mlt_data; + uint64_t byte_idx = 0, ref_pos = 0, kmer_entry = 0, start_addr = 0; + uint32_t hashval = 0; + int flag = 0; + int i = mem->start; + int idx_first_N = -1; + hashval = getHashKey(&raux->read_buf[i], kmerSize, i, raux->l_seq, &flag, &idx_first_N); + // index-table lookup + kmer_entry = iaux->kmer_offsets[hashval]; + // index-table entry type + code = kmer_entry & METADATA_MASK; + // pointer to root of tree + start_addr = kmer_entry >> KMER_DATA_BITWIDTH; + raux->mh_start_addr = 0; + // width used for internal pointers in tree, 2 bits, ptr_width = 4 is encoded as 0 + raux->ptr_width = (((kmer_entry >> 22) & 3) == 0) ? 4 : ((kmer_entry >> 22) & 3); + byte_idx = 0; + assert(code != INVALID); + if (code == SINGLE_HIT_LEAF) { + mlt_data = &iaux->mlt_table[start_addr]; + byte_idx++; + memcpy_bwamem(&ref_pos, 5 * sizeof(uint8_t), &mlt_data[byte_idx], 5 * sizeof(uint8_t), __FILE__, __LINE__); + mem->hitcount += 1; + kv_push(uint64_t, *hits, ref_pos >> 1); + byte_idx += 5; + i += kmerSize; + mem->end = i; + } + else if (code == INFREQUENT) { // k-mer has fewer than 256 hits + i += kmerSize; + mlt_data = &iaux->mlt_table[start_addr]; + // + // First 4 bytes of tree store the start of all multi-hit leaves for the k-mer + // + memcpy_bwamem(&raux->mh_start_addr, 4 * sizeof(uint8_t), &mlt_data[byte_idx], 4 * sizeof(uint8_t), __FILE__, __LINE__); + byte_idx += 4; + if (i < raux->l_seq) { + getNextByteIdx_fetch_leaves_prefix(raux, mlt_data, &byte_idx, i, mem, hits); + } + else { + mem->end = i; + int mem_len = mem->end - mem->start; + if (mem_len >= raux->min_seed_len) { + leaf_gather(raux, mlt_data, &byte_idx, mem, hits); + } + } + } + else if (code == FREQUENT) { // k-mer has large, dense tree, do an additional x-mer lookup + uint64_t xmer_entry; + uint64_t ptr = 0; + mlt_data = &iaux->mlt_table[start_addr]; + hashval = getHashKey(&raux->read_buf[i + kmerSize], xmerSize, i + kmerSize, raux->l_seq, 0, &idx_first_N); + memcpy_bwamem(&raux->mh_start_addr, 4 * sizeof(uint8_t), &mlt_data[byte_idx], 4 * sizeof(uint8_t), __FILE__, __LINE__); + byte_idx += 4; + memcpy_bwamem(&xmer_entry, 8 * sizeof(uint8_t), &mlt_data[byte_idx + (hashval << 3)], 8 * sizeof(uint8_t), __FILE__, __LINE__); + code = xmer_entry & METADATA_MASK; + ptr = xmer_entry >> KMER_DATA_BITWIDTH; + if (code == INVALID) { + mem->end = i; + } + else if (code == SINGLE_HIT_LEAF) { + byte_idx = ptr; + byte_idx++; + memcpy_bwamem(&ref_pos, 5 * sizeof(uint8_t), &mlt_data[byte_idx], 5 * sizeof(uint8_t), __FILE__, __LINE__); + mem->hitcount += 1; + kv_push(uint64_t, *hits, ref_pos >> 1); + byte_idx += 5; + i += (kmerSize + xmerSize); + mem->end = i; + } + else { + byte_idx = ptr; + i += (kmerSize + xmerSize); + if (i < raux->l_seq) { + getNextByteIdx_fetch_leaves_prefix(raux, mlt_data, &byte_idx, i, mem, hits); + } + else { + mem->end = i; + int mem_len = mem->end - mem->start; + if (mem_len >= raux->min_seed_len) { + leaf_gather(raux, mlt_data, &byte_idx, mem, hits); + } + } + } + } +} + +/** + * Fetch hits for all MEMs identified after backward search. + * + * Note that backward search functions above only perform tree traversal without gathering hits as they will be fetched in + * a different order than required by BWA-MEM (i.e., all hits for MEM need to be sorted by right-context). We re-traverse and + * the tree and fetch the hits in the correct order below (this has a minor performance penalty) + * + * Return early if root node has fewer than 'raux->limit' hits + * + * @param iaux index parameters + * @param raux read parameters + * @param mem maximal-exact-match storage + * @param hits list of hits for read + */ +void rightExtend_fetch_leaves(index_aux_t* iaux, read_aux_t* raux, mem_t* mem, u64v* hits) { + + uint8_t code; + uint8_t* mlt_data; + uint64_t byte_idx = 0, kmer_entry = 0, start_addr = 0; + uint32_t hashval = 0; + int flag = 0; + int i = mem->start; + int end = mem->end; + int idx_first_N = -1; + hashval = getHashKey(&raux->read_buf[i], kmerSize, i, raux->l_seq, &flag, &idx_first_N); + // index-table lookup + kmer_entry = iaux->kmer_offsets[hashval]; + // index-table entry type + code = kmer_entry & METADATA_MASK; + // pointer to root of tree + start_addr = kmer_entry >> KMER_DATA_BITWIDTH; + raux->mh_start_addr = 0; + // width used for internal pointers in tree + raux->ptr_width = (((kmer_entry >> 22) & 3) == 0) ? 4 : ((kmer_entry >> 22) & 3); + byte_idx = 0; + assert(code != INVALID); + assert(code != SINGLE_HIT_LEAF); + if (code == INFREQUENT) { // k-mer has fewer than 256 hits + i += kmerSize; + mlt_data = &iaux->mlt_table[start_addr]; + // + // First 4 bytes of tree store the start of all multi-hit leaves for the k-mer + // + memcpy_bwamem(&raux->mh_start_addr, 4 * sizeof(uint8_t), &mlt_data[byte_idx], 4 * sizeof(uint8_t), __FILE__, __LINE__); + byte_idx += 4; + if (i < end) { + getNextByteIdx_fetch_leaves(raux, mlt_data, &byte_idx, i, mem, hits); + } + else { + leaf_gather(raux, mlt_data, &byte_idx, mem, hits); + } + } + else if (code == FREQUENT) { // k-mer has large, dense tree, do an additional x-mer lookup + uint64_t xmer_entry; + uint64_t ptr = 0; + i += kmerSize; + mlt_data = &iaux->mlt_table[start_addr]; + hashval = getHashKey(&raux->read_buf[i], xmerSize, i, raux->l_seq, 0, &idx_first_N); + memcpy_bwamem(&raux->mh_start_addr, 4 * sizeof(uint8_t), &mlt_data[byte_idx], 4 * sizeof(uint8_t), __FILE__, __LINE__); + byte_idx += 4; + memcpy_bwamem(&xmer_entry, 8 * sizeof(uint8_t), &mlt_data[byte_idx + (hashval << 3)], 8 * sizeof(uint8_t), __FILE__, __LINE__); + code = xmer_entry & METADATA_MASK; + ptr = xmer_entry >> KMER_DATA_BITWIDTH; + assert(code != INVALID); + assert(code != SINGLE_HIT_LEAF); + byte_idx = ptr; + i += xmerSize; + if (i < end) { + getNextByteIdx_fetch_leaves(raux, mlt_data, &byte_idx, i, mem, hits); + } + else { + leaf_gather(raux, mlt_data, &byte_idx, mem, hits); + } + } +} + +/** + * Main forward search function (seeding). Lookup up k-mer and/or x-mer table and identify root of ERT + * + * @param iaux index parameters + * @param raux read parameters + * @param i Index into read buffer + * @param mem maximal-exact-match storage + * @param hits list of hits for read + */ +void rightExtend(index_aux_t* iaux, read_aux_t* raux, int* i, mem_t* mem, u64v* hits) { + + uint8_t code; + uint8_t* mlt_data; + uint64_t byte_idx = 0, ref_pos = 0, kmer_entry = 0, start_addr = 0; + uint32_t hashval = 0; + uint64_t lep_data = 0; + int flag = 0; + int idx_first_N = -1; + hashval = getHashKey(&raux->read_buf[*i], kmerSize, *i, raux->l_seq, &flag, &idx_first_N); + // index-table lookup + kmer_entry = iaux->kmer_offsets[hashval]; + // index-table entry type + code = kmer_entry & METADATA_MASK; + lep_data = (kmer_entry >> METADATA_BITWIDTH) & LEP_MASK; + // pointer to root of tree + start_addr = kmer_entry >> KMER_DATA_BITWIDTH; + uint64_t mlt_start_addr = raux->mlt_start_addr = start_addr; + raux->mh_start_addr = 0; + // width used for internal pointers in tree + raux->ptr_width = (((kmer_entry >> 22) & 3) == 0) ? 4 : ((kmer_entry >> 22) & 3); + // LEP takes up kmerSize-1 bits. Last LEP bit is at position = kmerSize-2. + if (*i <= 64-kmerSize) { + raux->lep[0] |= (lep_data << *i); + } + else if (*i < 64) { + raux->lep[0] |= (lep_data << *i); + raux->lep[1] |= (lep_data >> (64-*i)); + } + else if (*i <= 128-kmerSize) { + raux->lep[1] |= (lep_data << (*i-64)); + } + else if (*i < 128) { + raux->lep[1] |= (lep_data << (*i-64)); + raux->lep[2] |= (lep_data >> (128-*i)); + } + else if (*i <= 192-kmerSize) { + raux->lep[2] |= (lep_data << (*i-128)); + } + else if (*i < 192) { + raux->lep[2] |= (lep_data << (*i-128)); + raux->lep[3] |= (lep_data >> (192-*i)); + } + else if (*i <= 256-kmerSize) { + raux->lep[3] |= (lep_data << (*i-192)); + } + else if (*i < 256) { + raux->lep[3] |= (lep_data << (*i-192)); + raux->lep[4] |= (lep_data >> (256-*i)); + } + else { + raux->lep[4] |= (lep_data << (*i-256)); + } + raux->nextLEPBit = *i + kmerSize - 1; + byte_idx = 0; + // We found an ambiguous base in the kmer. Stop extension at ambiguous base and record LEP + if (idx_first_N != -1) { + if (*i != 0) { + raux->nextLEPBit = *i + idx_first_N - 1; + raux->lep[raux->nextLEPBit >> 6] |= (1ULL << (raux->nextLEPBit & (0x3FULL))); + } + *i += idx_first_N; + return; + } + if (flag) { + raux->nextLEPBit = (raux->l_seq - 1); + *i = raux->l_seq; + raux->lep[raux->nextLEPBit >> 6] |= (1ULL << (raux->nextLEPBit & (0x3FULL))); + return; + } + if (code == INVALID) { + // Do backward extension using LEP + *i += (kmerSize + xmerSize); + } + else if (code == SINGLE_HIT_LEAF) { + mlt_data = &iaux->mlt_table[start_addr]; + byte_idx++; + memcpy_bwamem(&ref_pos, 5 * sizeof(uint8_t), &mlt_data[byte_idx], 5 * sizeof(uint8_t), __FILE__, __LINE__); + mem->hitcount += 1; + kv_push(uint64_t, *hits, ref_pos >> 1); + byte_idx += 5; + *i += kmerSize; + } + else if (code == INFREQUENT) { // k-mer has fewer than 256 hits + *i += kmerSize; + mlt_data = &iaux->mlt_table[start_addr]; + if (*i < raux->l_seq) { + // + // First 4 bytes of tree store the start of all multi-hit leaves for the k-mer + // + memcpy_bwamem(&raux->mh_start_addr, 4 * sizeof(uint8_t), &mlt_data[byte_idx], 4 * sizeof(uint8_t), __FILE__, __LINE__); + byte_idx += 4; + getNextByteIdx(raux, mlt_data, &byte_idx, i, mem, hits); + } + else { + raux->lep[raux->nextLEPBit >> 6] |= (1ULL << (raux->nextLEPBit & (0x3FULL))); + raux->nextLEPBit += 1; + } + } + else if (code == FREQUENT) { // k-mer has large, dense tree, do an additional x-mer lookup + uint64_t xmer_entry; + uint64_t ptr = 0; + *i += kmerSize; + int k; + uint64_t lepBit = 0; + flag = 0; + mlt_data = &iaux->mlt_table[mlt_start_addr]; + hashval = getHashKey(&raux->read_buf[*i], xmerSize, *i, raux->l_seq, &flag, &idx_first_N); + memcpy_bwamem(&raux->mh_start_addr, 4 * sizeof(uint8_t), &mlt_data[byte_idx], 4 * sizeof(uint8_t), __FILE__, __LINE__); + byte_idx += 4; + memcpy_bwamem(&xmer_entry, 8 * sizeof(uint8_t), &mlt_data[byte_idx + (hashval << 3)], 8 * sizeof(uint8_t), __FILE__, __LINE__); + code = xmer_entry & METADATA_MASK; + lep_data = (xmer_entry >> METADATA_BITWIDTH) & 0xF; + ptr = xmer_entry >> KMER_DATA_BITWIDTH; + int xmerLen = 0; + if (raux->l_seq - *i > xmerSize) { + xmerLen = xmerSize; + } + else { + xmerLen = raux->l_seq - *i; + } + for (k = 0; k < xmerLen; ++k) { + lepBit = (lep_data >> k) & 1; + raux->lep[raux->nextLEPBit >> 6] |= (lepBit << (raux->nextLEPBit & (0x3FULL))); + raux->nextLEPBit++; + } + // We found an ambiguous base in the kmer. Stop extension at ambiguous base and record LEP + if (idx_first_N != -1) { + raux->nextLEPBit = *i + idx_first_N - 1; + raux->lep[raux->nextLEPBit >> 6] |= (1ULL << (raux->nextLEPBit & (0x3FULL))); + *i += idx_first_N; + return; + } + if (flag) { + raux->nextLEPBit = (raux->l_seq - 1); + *i = raux->l_seq; + raux->lep[raux->nextLEPBit >> 6] |= (1ULL << (raux->nextLEPBit & (0x3FULL))); + return; + } + if (code == INVALID) { + *i += xmerSize; + } + else if (code == SINGLE_HIT_LEAF) { + byte_idx = ptr; + byte_idx++; + memcpy_bwamem(&ref_pos, 5 * sizeof(uint8_t), &mlt_data[byte_idx], 5 * sizeof(uint8_t), __FILE__, __LINE__); + mem->hitcount += 1; + kv_push(uint64_t, *hits, ref_pos >> 1); + byte_idx += 5; + *i += xmerSize; + } + else { + byte_idx = ptr; + *i += xmerSize; + if (*i < raux->l_seq) { + getNextByteIdx(raux, mlt_data, &byte_idx, i, mem, hits); + } + else { + raux->lep[raux->nextLEPBit >> 6] |= (1ULL << (raux->nextLEPBit & (0x3FULL))); + raux->nextLEPBit += 1; + } + } + } +} + +/** + * Main forward search function (reseeding). Lookup up k-mer and/or x-mer table and identify root of ERT + * Return early if root node has fewer than 'raux->limit' hits + * + * @param iaux index parameters + * @param raux read parameters + * @param i Index into read buffer + * @param mem maximal-exact-match storage + * @param hits list of hits for read + */ +void rightExtend_wlimit(index_aux_t* iaux, read_aux_t* raux, int* i, mem_t* mem, u64v* hits) { + + uint8_t code; + uint8_t* mlt_data; + uint64_t byte_idx = 0, kmer_entry = 0, start_addr = 0; + uint32_t hashval = 0; + uint64_t lep_data = 0; + int flag = 0; + int idx_first_N = -1; + hashval = getHashKey(&raux->read_buf[*i], kmerSize, *i, raux->l_seq, &flag, &idx_first_N); + // index-table lookup + kmer_entry = iaux->kmer_offsets[hashval]; + // index-table entry type + code = kmer_entry & METADATA_MASK; + lep_data = (kmer_entry >> METADATA_BITWIDTH) & LEP_MASK; + // pointer to root of tree + start_addr = kmer_entry >> KMER_DATA_BITWIDTH; + uint64_t mlt_start_addr = raux->mlt_start_addr = start_addr; + raux->mh_start_addr = 0; + // width used for internal pointers in tree, 2 bits, ptr_width = 4 is encoded as 0 + raux->ptr_width = (((kmer_entry >> 22) & 3) == 0) ? 4 : ((kmer_entry >> 22) & 3); + raux->num_hits = (kmer_entry >> 17) & 0x1F; + // LEP takes up kmerSize-1 bits. Last LEP bit is at position = kmerSize-2. + if (*i <= 64-kmerSize) { + raux->lep[0] |= (lep_data << *i); + } + else if (*i < 64) { + raux->lep[0] |= (lep_data << *i); + raux->lep[1] |= (lep_data >> (64-*i)); + } + else if (*i <= 128-kmerSize) { + raux->lep[1] |= (lep_data << (*i-64)); + } + else if (*i < 128) { + raux->lep[1] |= (lep_data << (*i-64)); + raux->lep[2] |= (lep_data >> (128-*i)); + } + else if (*i <= 192-kmerSize) { + raux->lep[2] |= (lep_data << (*i-128)); + } + else if (*i < 192) { + raux->lep[2] |= (lep_data << (*i-128)); + raux->lep[3] |= (lep_data >> (192-*i)); + } + else if (*i <= 256-kmerSize) { + raux->lep[3] |= (lep_data << (*i-192)); + } + else if (*i < 256) { + raux->lep[3] |= (lep_data << (*i-192)); + raux->lep[4] |= (lep_data >> (256-*i)); + } + else { + raux->lep[4] |= (lep_data << (*i-256)); + } + raux->nextLEPBit = *i + kmerSize - 1; + byte_idx = 0; + // We found an ambiguous base in the kmer. Stop extension at ambiguous base and record LEP + if (idx_first_N != -1) { + if (*i != 0) { + raux->nextLEPBit = *i + idx_first_N - 1; + raux->lep[raux->nextLEPBit >> 6] |= (1ULL << (raux->nextLEPBit & (0x3FULL))); + } + *i += idx_first_N; + return; + } + if (flag) { + raux->nextLEPBit = (raux->l_seq - 1); + *i = raux->l_seq; + raux->lep[raux->nextLEPBit >> 6] |= (1ULL << (raux->nextLEPBit & (0x3FULL))); + return; + } + if (code == INVALID) { + // Do backward extension using LEP + *i += (kmerSize + xmerSize); + } + else if (code == SINGLE_HIT_LEAF) { + *i += (kmerSize + xmerSize); + } + else if (code == INFREQUENT) { // k-mer has fewer than 256 hits + *i += kmerSize; + mlt_data = &iaux->mlt_table[mlt_start_addr]; + if ((raux->num_hits == 0) || (raux->num_hits >= raux->limit)) { + if (*i < raux->l_seq) { + path_v visited; + kv_init(visited); + node_info_t nif; + nif.byte_idx = byte_idx; + nif.num_hits = raux->num_hits; + kv_push(node_info_t, visited, nif); + // + // First 4 bytes of tree store the start of all multi-hit leaves for the k-mer + // + memcpy_bwamem(&raux->mh_start_addr, 4 * sizeof(uint8_t), &mlt_data[byte_idx], 4 * sizeof(uint8_t), __FILE__, __LINE__); + byte_idx += 4; + getNextByteIdx_wlimit(raux, mlt_data, &byte_idx, i, mem, &visited, hits); + kv_destroy(visited); + } + else { + raux->lep[raux->nextLEPBit >> 6] |= (1ULL << (raux->nextLEPBit & (0x3FULL))); + raux->nextLEPBit += 1; + } + } + } + else if (code == FREQUENT) { // k-mer has large, dense tree, do an additional x-mer lookup + uint64_t xmer_entry; + uint64_t ptr = 0; + *i += kmerSize; + int k; + uint64_t lepBit = 0; + flag = 0; + mlt_data = &iaux->mlt_table[mlt_start_addr]; + hashval = getHashKey(&raux->read_buf[*i], xmerSize, *i, raux->l_seq, &flag, &idx_first_N); + memcpy_bwamem(&raux->mh_start_addr, 4 * sizeof(uint8_t), &mlt_data[byte_idx], 4 * sizeof(uint8_t), __FILE__, __LINE__); + byte_idx += 4; + memcpy_bwamem(&xmer_entry, 8 * sizeof(uint8_t), &mlt_data[byte_idx + (hashval << 3)], 8 * sizeof(uint8_t), __FILE__, __LINE__); + code = xmer_entry & METADATA_MASK; + lep_data = (xmer_entry >> METADATA_BITWIDTH) & 0xF; + ptr = xmer_entry >> KMER_DATA_BITWIDTH; + // 5 bits to encode hits for each node + raux->num_hits = (xmer_entry >> 17) & 0x1F; + int xmerLen = 0; + if (raux->l_seq - *i > xmerSize) { + xmerLen = xmerSize; + } + else { + xmerLen = raux->l_seq - *i; + } + for (k = 0; k < xmerLen; ++k) { + lepBit = (lep_data >> k) & 1; + raux->lep[raux->nextLEPBit >> 6] |= (lepBit << (raux->nextLEPBit & (0x3FULL))); + raux->nextLEPBit++; + } + // We found an ambiguous base in the kmer. Stop extension at ambiguous base and record LEP + if (idx_first_N != -1) { + raux->nextLEPBit = *i + idx_first_N - 1; + raux->lep[raux->nextLEPBit >> 6] |= (1ULL << (raux->nextLEPBit & (0x3FULL))); + *i += idx_first_N; + return; + } + if (flag) { + raux->nextLEPBit = (raux->l_seq - 1); + *i = raux->l_seq; + raux->lep[raux->nextLEPBit >> 6] |= (1ULL << (raux->nextLEPBit & (0x3FULL))); + return; + } + if (code == INVALID) { + *i += xmerSize; + } + else if (code == SINGLE_HIT_LEAF) { + *i += xmerSize; + } + else { + byte_idx = ptr; + *i += xmerSize; + if ((raux->num_hits == 0) || (raux->num_hits >= raux->limit)) { + if (*i < raux->l_seq) { + path_v visited; + kv_init(visited); + node_info_t nif; + nif.byte_idx = byte_idx; + nif.num_hits = raux->num_hits; + kv_push(node_info_t, visited, nif); + getNextByteIdx_wlimit(raux, mlt_data, &byte_idx, i, mem, &visited, hits); + kv_destroy(visited); + } + else { + raux->lep[raux->nextLEPBit >> 6] |= (1ULL << (raux->nextLEPBit & (0x3FULL))); + raux->nextLEPBit += 1; + } + } + } + } +} + +/* + * Main forward search function for LAST heuristic + * + * @param iaux index parameters + * @param raux read parameters + * @param i Index into read buffer + * @param mem maximal-exact-match storage + * @param hits list of hits for read + */ +void rightExtend_last(index_aux_t* iaux, read_aux_t* raux, int* i, mem_t* mem, u64v* hits) { + + uint8_t code; + uint8_t* mlt_data; + uint64_t byte_idx = 0, ref_pos = 0, kmer_entry = 0, start_addr = 0; + int flag = 0; + uint32_t hashval = 0; + int idx_first_N = -1; + hashval = getHashKey(&raux->read_buf[*i], kmerSize, *i, raux->l_seq, &flag, &idx_first_N); + // We found an ambiguous base in the kmer. Stop extension + if (idx_first_N != -1) { + *i += (idx_first_N + 1); + return; + } + if (flag) { + *i = raux->l_seq; + return; + } + // index-table lookup + kmer_entry = iaux->kmer_offsets[hashval]; + code = kmer_entry & METADATA_MASK; + start_addr = kmer_entry >> KMER_DATA_BITWIDTH; + uint64_t mlt_start_addr = raux->mlt_start_addr = start_addr; + raux->mh_start_addr = 0; + raux->ptr_width = (((kmer_entry >> 22) & 3) == 0) ? 4 : ((kmer_entry >> 22) & 3); + raux->num_hits = (kmer_entry >> 17) & 0x1F; + byte_idx = 0; + if (code == INVALID) { + // Do backward extension using LEP + *i += kmerSize; + } + else if (code == SINGLE_HIT_LEAF) { + mlt_data = &iaux->mlt_table[mlt_start_addr]; + byte_idx++; + memcpy_bwamem(&ref_pos, 5 * sizeof(uint8_t), &mlt_data[byte_idx], 5 * sizeof(uint8_t), __FILE__, __LINE__); + mem->hitcount += 1; + kv_push(uint64_t, *hits, ref_pos >> 1); + byte_idx += 5; + *i += kmerSize; + } + else if (code == INFREQUENT) { + *i += kmerSize; + mlt_data = &iaux->mlt_table[mlt_start_addr]; + if (*i < raux->l_seq) { // Length <= (kmer + xmer). Need not check num_hits here. + memcpy_bwamem(&raux->mh_start_addr, 4 * sizeof(uint8_t), &mlt_data[byte_idx], 4 * sizeof(uint8_t), __FILE__, __LINE__); + byte_idx += 4; + getNextByteIdx_last(raux, mlt_data, &byte_idx, i, mem, hits); + } + } + else if (code == FREQUENT) { + uint64_t xmer_entry; + uint64_t ptr = 0; + *i += kmerSize; + flag = 0; + mlt_data = &iaux->mlt_table[mlt_start_addr]; + hashval = getHashKey(&raux->read_buf[*i], xmerSize, *i, raux->l_seq, &flag, &idx_first_N); + // We found an ambiguous base in the kmer. Stop extension + if (idx_first_N != -1) { + *i += (idx_first_N + 1); + return; + } + if (flag) { + *i = raux->l_seq; + return; + } + memcpy_bwamem(&raux->mh_start_addr, 4 * sizeof(uint8_t), &mlt_data[byte_idx], 4 * sizeof(uint8_t), __FILE__, __LINE__); + byte_idx += 4; + memcpy_bwamem(&xmer_entry, 8 * sizeof(uint8_t), &mlt_data[byte_idx + (hashval << 3)], 8 * sizeof(uint8_t), __FILE__, __LINE__); + code = xmer_entry & METADATA_MASK; + ptr = xmer_entry >> KMER_DATA_BITWIDTH; + raux->num_hits = (xmer_entry >> 17) & 0x1F; + if (code == INVALID) { + *i += xmerSize; + } + else if (code == SINGLE_HIT_LEAF) { + byte_idx = ptr; + byte_idx++; + memcpy_bwamem(&ref_pos, 5 * sizeof(uint8_t), &mlt_data[byte_idx], 5 * sizeof(uint8_t), __FILE__, __LINE__); + mem->hitcount += 1; + kv_push(uint64_t, *hits, ref_pos >> 1); + byte_idx += 5; + *i += xmerSize; + } + else { + byte_idx = ptr; + *i += xmerSize; + if ((raux->num_hits == 0) || (raux->num_hits >= raux->limit) || ((*i - mem->start) < (raux->min_seed_len + 1))) { + if (*i < raux->l_seq) { + getNextByteIdx_last(raux, mlt_data, &byte_idx, i, mem, hits); + } + } + else { // number of hits is less than the limit and seed length >= opt->min_seed_len + leaf_gather(raux, mlt_data, &byte_idx, mem, hits); + } + } + } +} + +/* + * Initialize MEM parameters for backward extension + * + * @param lep LEP bit vector + * @param mem maximal-exact-match + * @param j Index into LEP + * @param seq_len Read length + * @param min_seed_len Minimum seed length + * + * @return mem_valid valid flag to indicate if backward search needs to be performed from given position j + */ +static inline int init_mem(uint64_t* lep, mem_t* mem, int j, int seq_len, int min_seed_len) { + int lep_bit_set = (lep[j >> 6] >> (j & 63)) & 1; + int in_valid_range = (j >= (min_seed_len-1)) ? 1 : 0; + int mem_valid = (lep_bit_set && in_valid_range); + mem->end = j + 1; // [start, end) indexing + mem->rc_start = seq_len - j - 1; + mem->rc_end = mem->rc_start; + mem->skip_ref_fetch = 0; + mem->forward = 0; + mem->fetch_leaves = 0; + mem->hitbeg = mem->hitcount = 0; + mem->end_correction = 0; + mem->is_multi_hit = 0; + return mem_valid; +} + +/* + * Compute final SMEMs and their hits after considering their overlaps + * + * Leaf expansion is also performed to get the actual length of the MEM. + * Note that leaf nodes store compressed suffixes by including a pointer to the reference genome + * + * @param iaux index related parameters + * @param raux read related parameters + * @param mem maximal-exact-match storage + * @param sh helper data structure to keep track of start and end positions of previously identified MEMs + * @param smems List of SMEMs + * + * @return n number of backward extensions to skip + */ +int check_and_add_smem_prefix_reseed(index_aux_t* iaux, read_aux_t* raux, mem_t* mem, smem_helper_t* sh, mem_v* smems, u64v* hits) { + + mem->start = raux->l_seq - mem->rc_end; // Adjust start position of LMEM + int lmemLen = mem->end - mem->start, rmemLen = -1, next_be_point; + if (mem->hitcount > 0 && !mem->skip_ref_fetch) { + int64_t len; + int64_t start_ref_pos = hits->a[mem->hitbeg] - mem->rc_start; + int64_t end_ref_pos = hits->a[mem->hitbeg]; + // Fetch reference to check for extra matching bps on the right + uint8_t* rseq = get_seq(iaux->bns->l_pac, iaux->pac, start_ref_pos, end_ref_pos, &len, iaux->ref_string, 0); + int m, numMatchingBP = 0; + for (m = 1; m <= len; ++m) { + if (rseq[mem->rc_start - m] == raux->read_buf[mem->rc_start - m]) { + numMatchingBP++; + } + else { + break; + } + } + mem->end += numMatchingBP; + mem->end_correction += numMatchingBP; + + start_ref_pos = hits->a[mem->hitbeg] + lmemLen; + end_ref_pos = start_ref_pos + mem->start; + + // Fetch reference to check for extra matching bps on the left + rseq = get_seq(iaux->bns->l_pac, iaux->pac, start_ref_pos, end_ref_pos, &len, iaux->ref_string, 0); + numMatchingBP = 0; + for (m = 0; m < len; ++m) { + if (rseq[m] == raux->read_buf[mem->rc_end + m]) { + numMatchingBP++; + } + else { + break; + } + } + mem->start -= numMatchingBP; + } + // Adjust start position of MEM by extra matching bps + lmemLen = mem->end - mem->start; + next_be_point = mem->end; + if (mem->hitcount == 1) { + if (lmemLen >= raux->min_seed_len) { + kv_push(mem_t, *smems, *mem); + } + else { + next_be_point += (raux->min_seed_len - lmemLen); + } + } + // perform forward extension only for non-empty MEMs from backward extension + else if (mem->fetch_leaves && (mem->start <= (raux->l_seq - raux->min_seed_len))) { + hits->n -= mem->hitcount; + mem->hitbeg = hits->n; + mem->hitcount = 0; + raux->read_buf = raux->unpacked_queue_buf; + rightExtend_fetch_leaves_prefix_reseed(iaux, raux, mem, hits); + raux->read_buf = raux->unpacked_rc_queue_buf; + rmemLen = mem->end - mem->start; + next_be_point = mem->end; + if (mem->hitcount > 0) { + if (mem->is_multi_hit) { + int64_t len; + int64_t start_ref_pos = hits->a[mem->hitbeg] + rmemLen; + int64_t end_ref_pos = hits->a[mem->hitbeg] + raux->l_seq - mem->start; + /// Fetch reference + uint8_t* rseq = get_seq(iaux->bns->l_pac, iaux->pac, start_ref_pos, end_ref_pos, &len, iaux->ref_string, 0); + int m; + int numMatchingBP = 0; + /// Check for matching bases + for (m = 0; m < len; ++m) { + if (rseq[m] == raux->unpacked_queue_buf[mem->end + m]) { + numMatchingBP++; + } + else { + break; + } + } + mem->end += numMatchingBP; + rmemLen = mem->end - mem->start; + next_be_point = mem->end; + } + if (rmemLen >= raux->min_seed_len && mem->end <= sh->mem_end_limit) { + kv_push(mem_t, *smems, *mem); + } + else { + next_be_point += (raux->min_seed_len - rmemLen); + } + } + else { // we don't have min_seed_len match for this start position + assert(rmemLen <= raux->min_seed_len); + next_be_point += (raux->min_seed_len - rmemLen); + } + } + else { + if (lmemLen <= raux->min_seed_len) { + next_be_point += (raux->min_seed_len - lmemLen); + } + } + + return next_be_point; +} + +/* + * Compute final SMEMs and their hits after considering their overlaps + * + * Leaf expansion is also performed to get the actual length of the MEM. + * Note that leaf nodes store compressed suffixes by including a pointer to the reference genome + * + * @param iaux index related parameters + * @param raux read related parameters + * @param mem maximal-exact-match storage + * @param sh helper data structure to keep track of start and end positions of previously identified MEMs + * @param smems list of SMEMs + * @param hits list of hits for read + * + * @return n number of backward extensions to skip + */ +int check_and_add_smem_prefix(index_aux_t* iaux, read_aux_t* raux, mem_t* mem, smem_helper_t* sh, mem_v* smems, u64v* hits) { + + mem->start = raux->l_seq - mem->rc_end; // Adjust start position of LMEM + int lmemLen = mem->end - mem->start, rmemLen = -1, next_be_point; + if (mem->hitcount > 0 && !mem->skip_ref_fetch) { + int64_t len; + int64_t start_ref_pos = hits->a[mem->hitbeg] - mem->rc_start; + int64_t end_ref_pos = hits->a[mem->hitbeg]; + // Fetch reference to check for extra matching bps + uint8_t* rseq = get_seq(iaux->bns->l_pac, iaux->pac, start_ref_pos, end_ref_pos, &len, iaux->ref_string, 0); + int m, numMatchingBP = 0; + for (m = 1; m <= len; ++m) { + if (rseq[mem->rc_start - m] == raux->read_buf[mem->rc_start - m]) { + numMatchingBP++; + } + else { + break; + } + } + mem->end += numMatchingBP; + mem->end_correction += numMatchingBP; + + start_ref_pos = hits->a[mem->hitbeg] + lmemLen; + end_ref_pos = start_ref_pos + mem->start; + // Fetch reference to check for extra matching bps + rseq = get_seq(iaux->bns->l_pac, iaux->pac, start_ref_pos, end_ref_pos, &len, iaux->ref_string, 0); + numMatchingBP = 0; + for (m = 0; m < len; ++m) { + if (rseq[m] == raux->read_buf[mem->rc_end + m]) { + numMatchingBP++; + } + else { + break; + } + } + // free(rseq); + mem->start -= numMatchingBP; + } + lmemLen = mem->end - mem->start; + next_be_point = mem->end; + // skip forward re-traversal for single-git MEMs + if (mem->hitcount == 1) { + if (lmemLen >= raux->min_seed_len) { + kv_push(mem_t, *smems, *mem); + } + else { + next_be_point += (raux->min_seed_len - lmemLen); + } + } + else if (mem->fetch_leaves && (mem->start <= (raux->l_seq - raux->min_seed_len))) { + hits->n -= mem->hitcount; + mem->hitbeg = hits->n; + mem->hitcount = 0; + raux->read_buf = raux->unpacked_queue_buf; + rightExtend_fetch_leaves_prefix(iaux, raux, mem, hits); + raux->read_buf = raux->unpacked_rc_queue_buf; + rmemLen = mem->end - mem->start; + next_be_point = mem->end; + if (mem->hitcount > 0) { + int64_t len; + int64_t start_ref_pos = hits->a[mem->hitbeg] + rmemLen; + int64_t end_ref_pos = hits->a[mem->hitbeg] + raux->l_seq - mem->start; + /// Fetch reference + uint8_t* rseq = get_seq(iaux->bns->l_pac, iaux->pac, start_ref_pos, end_ref_pos, &len, iaux->ref_string, 0); + int m; + int numMatchingBP = 0; + /// Check for matching bases + for (m = 0; m < len; ++m) { + if (rseq[m] == raux->unpacked_queue_buf[mem->end + m]) { + numMatchingBP++; + } + else { + break; + } + } + mem->end += numMatchingBP; + rmemLen = mem->end - mem->start; + next_be_point = mem->end; + if (rmemLen >= raux->min_seed_len) { + kv_push(mem_t, *smems, *mem); + } + else { + next_be_point += (raux->min_seed_len - rmemLen); + } + } + else { // we don't have min_seed_len match for this start position + assert(rmemLen <= raux->min_seed_len); + next_be_point += (raux->min_seed_len - rmemLen); + } + } + else { + assert(lmemLen <= raux->min_seed_len); + next_be_point += (raux->min_seed_len - lmemLen); + } + + return next_be_point; +} + +/* + * Compute final SMEMs and their hits after considering their overlaps + * + * Leaf expansion is also performed to get the actual length of the MEM. + * Note that leaf nodes store compressed suffixes by including a pointer to the reference genome + * + * @param iaux index related parameters + * @param raux read related parameters + * @param mem maximal-exact-match storage + * @param sh helper data structure to keep track of start and end positions of previously identified MEMs + * @param smems list of SMEMs + * @param hits list of hits for read + */ +void check_and_add_smem(index_aux_t* iaux, read_aux_t* raux, mem_t* mem, smem_helper_t* sh, mem_v* smems, u64v* hits) { + + mem->start = raux->l_seq - mem->rc_end; // Adjust start position of LMEM + int lmemLen = mem->end - mem->start; + if (mem->hitcount > 0 && !mem->skip_ref_fetch) { + int64_t len; + int64_t start_ref_pos = hits->a[mem->hitbeg] + lmemLen; + int64_t end_ref_pos = start_ref_pos + mem->start; + // Fetch reference to check for extra matching bps + uint8_t* rseq = get_seq(iaux->bns->l_pac, iaux->pac, start_ref_pos, end_ref_pos, &len, iaux->ref_string, 0); + int m, numMatchingBP = 0; + for (m = 0; m < len; ++m) { + if (rseq[m] == raux->read_buf[mem->rc_end + m]) { + numMatchingBP++; + } + else { + break; + } + } + // Adjust start position of MEM by extra matching bps + mem->start -= numMatchingBP; + } + lmemLen = mem->end - mem->start; + if (lmemLen >= raux->min_seed_len) { + // Check if MEM lies completely within previously discovered MEM + if (mem->start < sh->prevMemStart || mem->end > sh->prevMemEnd) { + // Extra work for equivalency with BWA-MEM. + if (mem->fetch_leaves) { + hits->n -= mem->hitcount; + mem->hitbeg = hits->n; + mem->hitcount = 0; + raux->read_buf = raux->unpacked_queue_buf; + rightExtend_fetch_leaves(iaux, raux, mem, hits); + raux->read_buf = raux->unpacked_rc_queue_buf; + } + if (mem->hitcount > 0) { + mem->pt.c_pivot = sh->curr_pivot; + mem->pt.p_pivot = sh->prev_pivot; + mem->pt.pp_pivot = sh->prev_prev_pivot; + kv_push(mem_t, *smems, *mem); + if (mem->start <= (sh->prev_pivot + 1)) { + sh->stop_be = 1; + } + } + sh->prevMemStart = mem->start; + sh->prevMemEnd = mem->end; + } + } +} + +/* + * This function replaces bwt_smem1() and uses ERT to generate SMEMs + * + * @param iaux index related parameters + * @param raux read related parameters + * @param smems list of SMEMs + * @param hits list of hits for read + */ +void get_seeds_prefix(index_aux_t* iaux, read_aux_t* raux, mem_v* smems, u64v* hits) { + + smem_helper_t sh; + memset(&sh, 0, sizeof(smem_helper_t)); + sh.prevMemStart = raux->l_seq; + sh.prevMemEnd = 0; + int i = 0, j = 0; + sh.prev_pivot = -1; + sh.prev_prev_pivot = -1; + memset(raux->lep, 0, 5 * sizeof(uint64_t)); + while (i < raux->l_seq) { // Begin identifying RMEMs + mem_t rm; + memset(&rm, 0, sizeof(mem_t)); + rm.start = i; + rm.forward = 1; + rm.hitbeg = hits->n; + sh.curr_pivot = rm.start; + raux->read_buf = raux->unpacked_queue_buf; + rightExtend(iaux, raux, &i, &rm, hits); //!< Compute LEP. + // Lazy expansion of leaf nodes. + if (rm.hitcount > 0 && !rm.skip_ref_fetch) { + int64_t len; + int64_t start_ref_pos = hits->a[rm.hitbeg] + i - rm.start; + int64_t end_ref_pos = hits->a[rm.hitbeg] + raux->l_seq - rm.start; + // Fetch reference + uint8_t* rseq = get_seq(iaux->bns->l_pac, iaux->pac, start_ref_pos, end_ref_pos, &len, iaux->ref_string, 0); + int m; + int numMatchingBP = 0; + // Check for matching bases + for (m = 0; m < len; ++m) { + if (rseq[m] == raux->unpacked_queue_buf[i+m]) { + numMatchingBP++; + } + else { + raux->lep[(i+m-1) >> 6] |= (1ULL << ((i+m-1) & (0x3FULL))); + break; + } + } + // Last base of RMEM must have LEP bit set + if (m == len) { + raux->lep[(i+m-1) >> 6] |= (1ULL << ((i+m-1) & (0x3FULL))); + } + i += numMatchingBP; + } + rm.end = i; + int rmemLen = rm.end - rm.start; + // No left-extension for position 0 in read + // rm.start is the current pivot + if (rm.start == 0) { + if (rmemLen >= raux->min_seed_len) { + if (rm.hitcount > 0) { + kv_push(mem_t, *smems, rm); + } + } + else { + hits->n -= rm.hitcount; + } + memset(raux->lep, 0, 5 * sizeof(uint64_t)); + } + else { // perform all backward extensions + hits->n -= rm.hitcount; + uint64_t* lep = raux->lep; + int seq_len = raux->l_seq; + int min_seed_len = raux->min_seed_len; + sh.stop_be = 0; + int min_j = (rm.start > min_seed_len) ? (rm.start-1) : (min_seed_len-1); + int max_j = rm.end - 1; + j = min_j; + sh.prev_pivot = rm.start; + while (j <= max_j) { + mem_t m; + int be_point; + int mem_valid = init_mem(lep, &m, j, seq_len, min_seed_len); + m.hitbeg = hits->n; + int next_j = j + 1; + if (mem_valid) { + be_point = j + 1; + if (be_point >= min_seed_len) { + int rc_i = seq_len - be_point; + raux->read_buf = raux->unpacked_rc_queue_buf; + leftExtend(iaux, raux, &rc_i, &m, hits); + next_j = check_and_add_smem_prefix(iaux, raux, &m, &sh, smems, hits); + } + } + j = next_j; + if (m.end > i) { + i = m.end; + } + } + } + raux->read_buf = raux->unpacked_queue_buf; + // Skip all ambiguous bases + while (i < raux->l_seq) { + if (raux->read_buf[i] == 4) { + ++i; + } + else { + break; + } + } + // Check if there other ambiguous bases within min_seed_len bases of the start of the MEM + while ((i < raux->l_seq) && (i - rm.start) < raux->min_seed_len) { + if (raux->read_buf[i] == 4) { + ++i; + break; + } + ++i; + } + sh.prev_prev_pivot = sh.prev_pivot; + sh.prev_pivot = rm.start; + memset(raux->lep, 0, 5 * sizeof(uint64_t)); + break; // zzh + } +#ifdef PRINT_SMEM + ks_introsort(mem_smem_sort_lt_ert, smems->n, smems->a); // Sort SMEMs based on start pos in read. For DEBUG. + for (i = 0; i < smems->n; ++i) { + // printf("[SMEM]:%d,%d\n", smems->a[i].start, smems->a[i].end); + int idx; + for (idx = 0; idx < smems->a[i].hitcount; ++idx) { + if (smems->a[i].forward || smems->a[i].fetch_leaves) { + printf("[SMEM]:%d,%d,%lu\n", smems->a[i].start, smems->a[i].end, hits->a[smems->a[i].hitbeg + idx]); + } + else { + printf("[SMEM]:%d,%d,%lu\n", smems->a[i].start, smems->a[i].end, (iaux->bns->l_pac << 1) - hits->a[smems->a[i].hitbeg + idx] - (smems->a[i].end - smems->a[i].start - smems->a[i].end_correction)); + } + } + } +#endif +} + +/* + * This function replaces bwt_smem1() and uses ERT to generate SMEMs + * + * @param iaux index related parameters + * @param raux read related parameters + * @param smems list of SMEMs + * @param hits list of hits for read + */ +void get_seeds(index_aux_t* iaux, read_aux_t* raux, mem_v* smems, u64v* hits) { + + smem_helper_t sh; + memset(&sh, 0, sizeof(smem_helper_t)); + sh.prevMemStart = raux->l_seq; + sh.prevMemEnd = 0; + int i = 0, j = 0; + sh.prev_pivot = -1; + sh.prev_prev_pivot = -1; + memset(raux->lep, 0, 5 * sizeof(uint64_t)); + while (i < raux->l_seq) { // Begin identifying RMEMs + mem_t rm; + memset(&rm, 0, sizeof(mem_t)); + rm.start = i; + rm.forward = 1; + rm.hitbeg = hits->n; + sh.curr_pivot = rm.start; + raux->read_buf = raux->unpacked_queue_buf; + rightExtend(iaux, raux, &i, &rm, hits); // Compute LEP. + // Lazy expansion of leaf nodes. + if (rm.hitcount > 0 && !rm.skip_ref_fetch) { + int64_t len; + int64_t start_ref_pos = hits->a[rm.hitbeg] + i - rm.start; + int64_t end_ref_pos = hits->a[rm.hitbeg] + raux->l_seq - rm.start; + // Fetch reference + uint8_t* rseq = get_seq(iaux->bns->l_pac, iaux->pac, start_ref_pos, end_ref_pos, &len, iaux->ref_string, 0); + int m; + int numMatchingBP = 0; + // Check for matching bases + for (m = 0; m < len; ++m) { + if (rseq[m] == raux->unpacked_queue_buf[i+m]) { + numMatchingBP++; + } + else { + raux->lep[(i+m-1) >> 6] |= (1ULL << ((i+m-1) & (0x3FULL))); + break; + } + } + // Last base of RMEM must have LEP bit set + if (m == len) { + raux->lep[(i+m-1) >> 6] |= (1ULL << ((i+m-1) & (0x3FULL))); + } + i += numMatchingBP; + } + rm.end = i; + int rmemLen = rm.end - rm.start; + // No left-extension for position 0 in read + // rm.start is the current pivot + if (rm.start == 0) { + if (rmemLen >= raux->min_seed_len) { + if (rm.hitcount > 0) { + rm.pt.c_pivot = sh.curr_pivot; + rm.pt.p_pivot = sh.prev_pivot; + rm.pt.pp_pivot = sh.prev_prev_pivot; + kv_push(mem_t, *smems, rm); + } + } + else { + hits->n -= rm.hitcount; + } + memset(raux->lep, 0, 5 * sizeof(uint64_t)); + } + else { + hits->n -= rm.hitcount; + uint64_t* lep = raux->lep; + int seq_len = raux->l_seq; + int min_seed_len = raux->min_seed_len; + j = rm.end-1; + sh.stop_be = 0; + int min_j = (rm.start > min_seed_len) ? (rm.start-1) : (min_seed_len-1); + while (j >= min_j) { + mem_t m; + int be_point; + int mem_valid = init_mem(lep, &m, j, seq_len, min_seed_len); + m.hitbeg = hits->n; + if (mem_valid) { + be_point = j + 1; + if (be_point >= min_seed_len) { + int rc_i = seq_len - be_point; + raux->read_buf = raux->unpacked_rc_queue_buf; + leftExtend(iaux, raux, &rc_i, &m, hits); + check_and_add_smem(iaux, raux, &m, &sh, smems, hits); + if (sh.stop_be) break; + } + } + j -= 1; + } + } + raux->read_buf = raux->unpacked_queue_buf; + // Skip all ambiguous bases + while (i < raux->l_seq) { + if (raux->read_buf[i] == 4) { + ++i; + } + else { + break; + } + } + // Check if there other ambiguous bases within min_seed_len bases of the start of the MEM + while ((i < raux->l_seq) && (i - rm.start) < raux->min_seed_len) { + if (raux->read_buf[i] == 4) { + ++i; + break; + } + ++i; + } + sh.prev_prev_pivot = sh.prev_pivot; + sh.prev_pivot = rm.start; + memset(raux->lep, 0, 5 * sizeof(uint64_t)); + break; // zzh + } +#ifdef PRINT_SMEM + ks_introsort(mem_smem_sort_lt_ert, smems->n, smems->a); // Sort SMEMs based on start pos in read. For DEBUG. + for (i = 0; i < smems->n; ++i) { + // printf("[SMEM]:%d,%d\n", smems->a[i].start, smems->a[i].end); + int idx; + for (idx = 0; idx < smems->a[i].hitcount; ++idx) { + if (smems->a[i].forward || smems->a[i].fetch_leaves) { + printf("[SMEM]:%d,%d,%lu\n", smems->a[i].start, smems->a[i].end, hits->a[smems->a[i].hitbeg + idx]); + } + else { + printf("[SMEM]:%d,%d,%lu\n", smems->a[i].start, smems->a[i].end, (iaux->bns->l_pac << 1) - hits->a[smems->a[i].hitbeg + idx] - (smems->a[i].end - smems->a[i].start)); + } + } + } +#endif +} + +/* + * This function performs reseeding of SMEMs + * + * @param iaux index related parameters + * @param raux read related parameters + * @param smems list of SMEMs + * @param start pivot position in read + * @param limit hit threshold below which tree traversal must stop + * @param pt track pivot information to reduce work done during reseeding + * @param hits list of hits for every read + */ +void reseed_prefix(index_aux_t* iaux, read_aux_t* raux, mem_v* smems, int start, int limit, pivot_t* pt, u64v* hits) { + + smem_helper_t sh; + memset(&sh, 0, sizeof(smem_helper_t)); + sh.prevMemStart = raux->l_seq; + sh.prevMemEnd = 0; + int i = start, j = 0; +#ifdef PRINT_SMEM + int old_n = smems->n; +#endif + memset(raux->lep, 0, 5 * sizeof(uint64_t)); + mem_t rm; + memset(&rm, 0, sizeof(mem_t)); + rm.start = i; + rm.forward = 1; + rm.hitbeg = hits->n; + sh.prev_pivot = (rm.start >= pt->c_pivot) ? pt->p_pivot : pt->pp_pivot; + raux->read_buf = raux->unpacked_queue_buf; + raux->limit = limit; + rightExtend_wlimit(iaux, raux, &i, &rm, hits); // Compute LEP. + // Lazy expansion of leaf nodes. + if (rm.hitcount > 0 && !rm.skip_ref_fetch) { + int64_t len; + int64_t start_ref_pos = hits->a[rm.hitbeg] + i - rm.start; + int64_t end_ref_pos = hits->a[rm.hitbeg] + raux->l_seq - rm.start; + // Fetch reference + uint8_t* rseq = get_seq(iaux->bns->l_pac, iaux->pac, start_ref_pos, end_ref_pos, &len, iaux->ref_string, 0); + int m; + int numMatchingBP = 0; + // Check for matching bases + for (m = 0; m < len; ++m) { + if (rseq[m] == raux->unpacked_queue_buf[i+m]) { + numMatchingBP++; + } + else { + raux->lep[(i+m-1) >> 6] |= (1ULL << ((i+m-1) & (0x3FULL))); + break; + } + } + // Last base of RMEM must have LEP bit set + if (m == len) { + raux->lep[(i+m-1) >> 6] |= (1ULL << ((i+m-1) & (0x3FULL))); + } + i += numMatchingBP; + } + rm.end = i; + int rmemLen = rm.end - rm.start; + if (rm.start == 0) { + if (rmemLen >= raux->min_seed_len) { + if (rm.hitcount > 0) { + kv_push(mem_t, *smems, rm); + } + } + else { + hits->n -= rm.hitcount; + } + memset(raux->lep, 0, 5 * sizeof(uint64_t)); + } + // Begin left-extension, i.e., right extension on reverse complemented read + else { + hits->n -= rm.hitcount; + uint64_t* lep = raux->lep; + int seq_len = raux->l_seq; + int min_seed_len = raux->min_seed_len; + sh.stop_be = 0; + int min_j = (rm.start > min_seed_len) ? (rm.start-1) : (min_seed_len-1); + int max_j = rm.end - 1; + j = min_j; + sh.prev_pivot = rm.start; + sh.mem_end_limit = rm.end; + while (j <= max_j) { + mem_t m; + int be_point; + int mem_valid = init_mem(lep, &m, j, seq_len, min_seed_len); + m.hitbeg = hits->n; + int next_j = j + 1; + if (mem_valid) { + be_point = j + 1; + if (be_point >= min_seed_len) { + int rc_i = seq_len - be_point; + raux->read_buf = raux->unpacked_rc_queue_buf; + leftExtend_wlimit(iaux, raux, &rc_i, &m, hits); + next_j = check_and_add_smem_prefix_reseed(iaux, raux, &m, &sh, smems, hits); + } + } + j = next_j; + } + } +#ifdef PRINT_SMEM + ks_introsort(mem_smem_sort_lt_ert, smems->n - old_n, &smems->a[old_n]); // Debug: Sort SMEMs based on start pos in read. + for (i = old_n; i < smems->n; ++i) { + int idx; + for (idx = 0; idx < smems->a[i].hitcount; ++idx) { + if (smems->a[i].forward || smems->a[i].fetch_leaves) { + printf("[Reseed]:%d,%d,%lu\n", smems->a[i].start, smems->a[i].end, hits->a[smems->a[i].hitbeg + idx]); + } + else { + printf("[Reseed]:%d,%d,%lu\n", smems->a[i].start, smems->a[i].end, (iaux->bns->l_pac << 1) - hits->a[smems->a[i].hitbeg + idx] - (smems->a[i].end - smems->a[i].start - smems->a[i].end_correction)); + } + } + } +#endif +} + +/* + * This function performs reseeding of SMEMs + * + * @param iaux index related parameters + * @param raux read related parameters + * @param smems list of SMEMs and their hits + * @param start pivot position in read + * @param limit hit threshold below which tree traversal must stop + * @param pt track pivot information to reduce work done during reseeding + * @param hits list of hits for read + */ +void reseed(index_aux_t* iaux, read_aux_t* raux, mem_v* smems, int start, int limit, pivot_t* pt, u64v* hits) { + + smem_helper_t sh; + memset(&sh, 0, sizeof(smem_helper_t)); + sh.prevMemStart = raux->l_seq; + sh.prevMemEnd = 0; + int i = start, j = 0; +#ifdef PRINT_SMEM + int old_n = smems->n; +#endif + memset(raux->lep, 0, 5 * sizeof(uint64_t)); + mem_t rm; + memset(&rm, 0, sizeof(mem_t)); + rm.start = i; + rm.forward = 1; + rm.hitbeg = hits->n; + sh.prev_pivot = (rm.start >= pt->c_pivot) ? pt->p_pivot : pt->pp_pivot; + raux->read_buf = raux->unpacked_queue_buf; + raux->limit = limit; + rightExtend_wlimit(iaux, raux, &i, &rm, hits); //!< Compute LEP. + // Lazy expansion of leaf nodes. + if (rm.hitcount > 0 && !rm.skip_ref_fetch) { + int64_t len; + int64_t start_ref_pos = hits->a[rm.hitbeg] + i - rm.start; + int64_t end_ref_pos = hits->a[rm.hitbeg] + raux->l_seq - rm.start; + // Fetch reference + uint8_t* rseq = get_seq(iaux->bns->l_pac, iaux->pac, start_ref_pos, end_ref_pos, &len, iaux->ref_string, 0); + int m; + int numMatchingBP = 0; + // Check for matching bases + for (m = 0; m < len; ++m) { + if (rseq[m] == raux->unpacked_queue_buf[i+m]) { + numMatchingBP++; + } + else { + raux->lep[(i+m-1) >> 6] |= (1ULL << ((i+m-1) & (0x3FULL))); + break; + } + } + // Last base of RMEM must have LEP bit set + if (m == len) { + raux->lep[(i+m-1) >> 6] |= (1ULL << ((i+m-1) & (0x3FULL))); + } + i += numMatchingBP; + } + rm.end = i; + int rmemLen = rm.end - rm.start; + if (rm.start == 0) { + if (rmemLen >= raux->min_seed_len) { + if (rm.hitcount > 0) { + kv_push(mem_t, *smems, rm); + } + } + else { + hits->n -= rm.hitcount; + } + memset(raux->lep, 0, 5 * sizeof(uint64_t)); + } + // Begin left-extension, i.e., right extension on reverse complemented read + else { + hits->n -= rm.hitcount; + uint64_t* lep = raux->lep; + int seq_len = raux->l_seq; + int min_seed_len = raux->min_seed_len; + j = rm.end-1; + sh.stop_be = 0; + int min_j = (rm.start > min_seed_len) ? (rm.start-1) : (min_seed_len-1); + while (j >= min_j) { + mem_t m; + int be_point; + int mem_valid = init_mem(lep, &m, j, seq_len, min_seed_len); + m.hitbeg = hits->n; + if (mem_valid) { + be_point = j + 1; + if (be_point >= min_seed_len) { + int rc_i = seq_len - be_point; + raux->read_buf = raux->unpacked_rc_queue_buf; + leftExtend_wlimit(iaux, raux, &rc_i, &m, hits); + check_and_add_smem(iaux, raux, &m, &sh, smems, hits); + if (sh.stop_be) break; + } + } + j -= 1; + } + } +#ifdef PRINT_SMEM + ks_introsort(mem_smem_sort_lt_ert, smems->n - old_n, &smems->a[old_n]); // Debug: Sort SMEMs based on start pos in read. + for (i = old_n; i < smems->n; ++i) { + int idx; + for (idx = 0; idx < smems->a[i].hitcount; ++idx) { + if (smems->a[i].forward || smems->a[i].fetch_leaves) { + printf("[Reseed]:%d,%d,%lu\n", smems->a[i].start, smems->a[i].end, hits->a[smems->a[i].hitbeg + idx]); + } + else { + printf("[Reseed]:%d,%d,%lu\n", smems->a[i].start, smems->a[i].end, (iaux->bns->l_pac << 1) - hits->a[smems->a[i].hitbeg + idx] - (smems->a[i].end - smems->a[i].start)); + } + } + } +#endif +} + +/* + * This function performs the LAST heuristic + * + * @param iaux index related parameters + * @param raux read related parameters + * @param smems list of SMEMs + * @param limit hit threshold above which tree traversal must stop + * @param hits list of hits for read + */ +void last(index_aux_t* iaux, read_aux_t* raux, mem_v* smems, int limit, u64v* hits) { + + int i = 0; +#ifdef PRINT_SMEM + int old_n = smems->n; +#endif + const uint8_t minSeedLen = raux->min_seed_len + 1; // LAST exits seeding only when seed length >= 20 + raux->limit = limit; + while (i < raux->l_seq) { // Begin identifying RMEMs + mem_t rm; + rm.start = i; rm.end = 0; + rm.forward = 1; + rm.skip_ref_fetch = 0; + rm.hitbeg = hits->n; + rm.hitcount = 0; + raux->read_buf = raux->unpacked_queue_buf; + rightExtend_last(iaux, raux, &i, &rm, hits); + // Lazy expansion of leaf nodes. + if (rm.hitcount > 0 && !rm.skip_ref_fetch) { + int64_t len; + int64_t start_ref_pos = hits->a[rm.hitbeg] + i - rm.start; + int64_t end_ref_pos = hits->a[rm.hitbeg] + raux->l_seq - rm.start; + // Fetch reference + uint8_t* rseq = get_seq(iaux->bns->l_pac, iaux->pac, start_ref_pos, end_ref_pos, &len, iaux->ref_string, 0); + int m, numMatchingBP = 0; + // Check for matching bases + for (m = 0; m < len; ++m) { + int seedLen = (i + m) - rm.start; + int match_next_bp = ((seedLen < minSeedLen) || (rm.hitcount >= raux->limit)) ? 1 : 0; + if (match_next_bp) { + if ((rseq[m] == raux->unpacked_queue_buf[i+m])) { + numMatchingBP++; + } + else { + ++i; // Increment i on every mismatch for LAST to match BWA-MEM + hits->n -= rm.hitcount; + rm.hitcount = 0; + break; + } + } + else { + break; + } + } + i += numMatchingBP; + } + rm.end = i; + int rmemLen = rm.end - rm.start; + if (rmemLen >= minSeedLen) { + if (rm.hitcount > 0 && rm.hitcount < raux->limit) { + kv_push(mem_t, *smems, rm); + } + else { + hits->n -= rm.hitcount; + } + } + else { + hits->n -= rm.hitcount; + } + int foundN = 0; + // Skip all ambiguous bases + assert(i > 0); + if (raux->read_buf[i-1] == 4) { // Found an 'N' during lookup + foundN = 1; + } + if (!foundN) { + // Check if there other ambiguous bases within min_seed_len bases of the start of the MEM + while ((i < raux->l_seq) && (i - rm.start) < minSeedLen) { + if (raux->read_buf[i] == 4) { + ++i; + break; + } + ++i; + } + } + } +#ifdef PRINT_SMEM + ks_introsort(mem_smem_sort_lt_ert, smems->n - old_n, &smems->a[old_n]); // Debug: Sort SMEMs based on start pos in read. + for (i = old_n; i < smems->n; ++i) { + int idx; + for (idx = 0; idx < smems->a[i].hitcount; ++idx) { + printf("[LAST]:%d,%d,%lu\n", smems->a[i].start, smems->a[i].end, hits->a[smems->a[i].hitbeg + idx]); + } + } +#endif +} + diff --git a/ertseeding.h b/ertseeding.h new file mode 100644 index 0000000..e217bbe --- /dev/null +++ b/ertseeding.h @@ -0,0 +1,126 @@ +#ifndef ERTSEEDING_HPP +#define ERTSEEDING_HPP + +#include +#include +#include +#include +#include +#include +#include "kstring.h" +#include "ksw.h" +#include "kvec.h" +#include "ksort.h" +#include "utils.h" +#include "bwt.h" +#include "ertindex.h" +#include "bntseq.h" +#include "bwa.h" +#include "profiling.h" +#include "utils.h" + + +/** + * Node state to keep track of while traversing ERT. + */ +typedef struct +{ + int num_hits; + uint64_t byte_idx; +} node_info_t; + +typedef kvec_t(uint64_t) u64v; +typedef kvec_t(uint8_t) u8v; +typedef kvec_t(int) intv; + +typedef kvec_t(node_info_t) path_v; + +/** + * State to keep track of previous pivots for each new MEM search + */ +typedef struct +{ + int c_pivot; // Pivot used to generate the SMEM + int p_pivot; // Previous pivot + int pp_pivot; // Pivot before the previous pivot. Useful in reseeding +} pivot_t; + +/** + * State for each maximal-exact-match (MEM) + */ +typedef struct +{ + uint8_t forward; // RMEM or LMEM. We need this to normalize hit positions + int start; // MEM start position in read + int end; // MEM end position in read. [start, end) + int rc_start; // MEM start position in reverse complemented (RC) read (used for backward search) + int rc_end; // MEM end position in reverse complemented (RC) read (used for backward search) + int skip_ref_fetch; // Skip reference fetch when leaf node need not be decompressed + int fetch_leaves; // Gather all leaves for MEM + int hitbeg; // Index into hit array + int hitcount; // Count of hits + int end_correction; // Amount by which MEM has extended beyond backward search start position in read + int is_multi_hit; + pivot_t pt; +} mem_t; + +typedef kvec_t(mem_t) mem_v; + +/** + * Index-related auxiliary data structures + */ +typedef struct +{ + uint64_t *kmer_offsets; // K-mer table + uint8_t *mlt_table; // Multi-level ERT + const bwt_t *bwt; // FM-index + const bntseq_t *bns; // Input reads sequences + const uint8_t *pac; // Reference genome (2-bit encoded) + uint8_t *ref_string; +} index_aux_t; + +/** + * 'Read' auxiliary data structures + */ +typedef struct +{ + int min_seed_len; // Minimum length of seed + int l_seq; // Read length + int ptr_width; // Size of pointers to child nodes in ERT + int num_hits; // Number of hits for each node in the ERT + int limit; // Number of hits after which extension must be stopped + uint64_t lep[5]; // FIXME: Can support up to 320bp + uint64_t nextLEPBit; // Index into the LEP bit-vector + uint64_t mlt_start_addr; // Start address of multi-level ERT + uint64_t mh_start_addr; // Start address of multi-hits for each k-mer + char *read_name; // Read name + uint8_t *unpacked_queue_buf; // Read sequence (2-bit encoded) + uint8_t *unpacked_rc_queue_buf; // Reverse complemented read (2-bit encoded) + uint8_t *read_buf; // == queue_buf (forward) and == rc_queue_buf (backward) +} read_aux_t; + +/** + * SMEM helper data structure + */ +typedef struct +{ + int prevMemStart; // Start position of previous MEM in the read + int prevMemEnd; // End position of previous MEM in the read + int curr_pivot; // Pivot used for forward/backward search in iteration i + int prev_pivot; // Pivot used in the previous iteration (i-1) + int prev_prev_pivot; // Pivot used in iteration i-2 (useful for reseeding) + int stop_be; // Stop backward search early if no new SMEMs can be found for pivot + int mem_end_limit; +} smem_helper_t; + +void get_seeds(index_aux_t *iaux, read_aux_t *raux, mem_v *smems, u64v *hits); + +void get_seeds_prefix(index_aux_t *iaux, read_aux_t *raux, mem_v *smems, u64v *hits); + +void reseed(index_aux_t *iaux, read_aux_t *raux, mem_v *smems, int start, int limit, pivot_t *pt, u64v *hits); + +void reseed_prefix(index_aux_t *iaux, read_aux_t *raux, mem_v *smems, int start, int limit, pivot_t *pt, u64v *hits); + +void last(index_aux_t *iaux, read_aux_t *raux, mem_v *smems, int limit, u64v *hits); + +#endif diff --git a/fastmap.c b/fastmap.c index dc32f68..82fcb42 100644 --- a/fastmap.c +++ b/fastmap.c @@ -78,6 +78,7 @@ typedef struct { long read_idx; long calc_idx; long write_idx; + int useERT; } ktp_aux_t; // read @@ -370,6 +371,7 @@ int main_mem(int argc, char *argv[]) void *ko = 0, *ko2 = 0; mem_pestat_t pes[4]; ktp_aux_t aux; + int useERT = 0; memset(&aux, 0, sizeof(ktp_aux_t)); memset(pes, 0, 4 * sizeof(mem_pestat_t)); @@ -377,7 +379,7 @@ int main_mem(int argc, char *argv[]) aux.opt = opt = mem_opt_init(); memset(&opt0, 0, sizeof(mem_opt_t)); - while ((c = getopt(argc, argv, "512qpaMCSPVYjuk:c:v:s:r:t:b:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:o:f:W:x:G:h:y:K:X:H:F:z:")) >= 0) { + while ((c = getopt(argc, argv, "512qpaMCSPVYjuk:c:v:s:r:t:b:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:o:f:W:x:G:h:y:K:X:H:F:z:Z")) >= 0) { if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1; else if (c == '1') no_mt_io = 1; else if (c == '2') no_mt_io = 2; @@ -478,6 +480,9 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, "[M::%s] mean insert size: %.3f, stddev: %.3f, max: %d, min: %d\n", __func__, pes[1].avg, pes[1].std, pes[1].high, pes[1].low); } + else if (c == 'Z') { + useERT = 1; + } else return 1; } @@ -548,6 +553,7 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, " (4 sigma from the mean if absent) and min of the insert size distribution.\n"); fprintf(stderr, " FR orientation only. [inferred]\n"); fprintf(stderr, " -u output XB instead of XA; XB is XA with the alignment score and mapping quality added.\n"); + fprintf(stderr, " -Z Use ERT index for seeding\n"); fprintf(stderr, "\n"); fprintf(stderr, "Note: Please read the man page for detailed description of the command line and options.\n"); fprintf(stderr, "\n"); @@ -589,9 +595,16 @@ int main_mem(int argc, char *argv[]) PROF_END(gprof[G_PREPARE], prepare); PROF_START(idx); - aux.idx = bwa_idx_load_from_shm(argv[optind]); + if (useERT) aux.idx = bwa_ertidx_load_from_shm(argv[optind]); + else aux.idx = bwa_fmtidx_load_from_shm(argv[optind]); if (aux.idx == 0) { - if ((aux.idx = bwa_idx_load(argv[optind], BWA_IDX_ALL)) == 0) return 1; // FIXME: memory leak + if (!useERT) { + if ((aux.idx = bwa_idx_load(argv[optind], BWA_IDX_BNS | BWA_IDX_PAC | BWA_IDX_FMT)) == 0) return 1; // FIXME: memory leak + } + else { + if ((aux.idx = bwa_ertidx_load_from_disk(argv[optind])) == 0) return 1; // FIXME: memory leak + } + } else if (bwa_verbose >= 3) fprintf(stderr, "[M::%s] load the bwa index from shared memory\n", __func__); if (ignore_alt) @@ -623,7 +636,7 @@ int main_mem(int argc, char *argv[]) } //fprintf(stderr, "%ld %ld %ld %ld %ld\n", aux.idx->fmt->L2[0], aux.idx->fmt->L2[1], aux.idx->fmt->L2[2], aux.idx->fmt->L2[3], aux.idx->fmt->L2[4]); - //exit(0); + aux.useERT = useERT; aux.w = init_mem_worker(opt, aux.idx->fmt, aux.idx->bns, aux.idx->pac); aux.data = calloc(2, sizeof(ktp_data_t)); @@ -635,8 +648,10 @@ int main_mem(int argc, char *argv[]) aux.wbuf = malloc(aux.wbuf_size); PROF_START(pipeline); + if (no_mt_io == 2) new_pipeline(&aux); else kt_pipeline(no_mt_io? 1 : 2, process, &aux, 3); + PROF_END(gprof[G_PIPELINE], pipeline); // free(hdr_line); diff --git a/ksw_align_avx2.c b/ksw_align_avx2.c index 67ae098..7ccdcaa 100644 --- a/ksw_align_avx2.c +++ b/ksw_align_avx2.c @@ -8,7 +8,7 @@ #include "utils.h" #include "ksw.h" -static const kswr_t g_defr = { 0, -1, -1, -1, -1, -1, -1 }; +// static const kswr_t g_defr = { 0, -1, -1, -1, -1, -1, -1 }; typedef struct _kswq_t { int qlen, slen; diff --git a/main.c b/main.c index 718e7c0..ca98d99 100644 --- a/main.c +++ b/main.c @@ -40,6 +40,7 @@ int bwa_bwt2sa(int argc, char *argv[]); int bwa_bwt2bytesa(int argc, char *argv[]); int bwa_bwt2fmt(int argc, char *argv[]); int bwa_build_kmer(int argc, char *argv[]); +int bwa_bwt2ert(int argc, char *argv[]); int bwa_index(int argc, char *argv[]); int bwt_bwtgen_main(int argc, char *argv[]); @@ -82,6 +83,7 @@ static int usage() fprintf(stderr, " bwt2bytesa generate SA(using byte array) from BWT and Occ\n"); fprintf(stderr, " bwt2fmt generate FMT-Index from BWT\n"); fprintf(stderr, " buildkmer generate kmer index from FMT\n"); + fprintf(stderr, " bwt2ert generate ert index from BWT\n"); fprintf(stderr, "\n"); fprintf(stderr, "Note: To use BWA, you need to first index the genome with `bwa index'.\n" @@ -110,6 +112,7 @@ int main(int argc, char *argv[]) else if (strcmp(argv[1], "bwt2bytesa") == 0) ret = bwa_bwt2bytesa(argc-1, argv+1); else if (strcmp(argv[1], "bwt2fmt") == 0) ret = bwa_bwt2fmt(argc-1, argv+1); else if (strcmp(argv[1], "buildkmer") == 0) ret = bwa_build_kmer(argc - 1, argv + 1); + else if (strcmp(argv[1], "bwt2ert") == 0) ret = bwa_bwt2ert(argc-1, argv+1); else if (strcmp(argv[1], "index") == 0) ret = bwa_index(argc-1, argv+1); else if (strcmp(argv[1], "aln") == 0) ret = bwa_aln(argc-1, argv+1); else if (strcmp(argv[1], "samse") == 0) ret = bwa_sai2sam_se(argc-1, argv+1); diff --git a/run.sh b/run.sh index 594530e..60d5088 100755 --- a/run.sh +++ b/run.sh @@ -44,7 +44,7 @@ out=~/data1/fmt-out.sam #out=/dev/null time ./fastbwa mem -t $thread -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \ - $reference \ + -Z $reference \ $n_r1 \ $n_r2 \ -o $out -2 diff --git a/utils.c b/utils.c index a051c5a..fffd2a7 100644 --- a/utils.c +++ b/utils.c @@ -327,3 +327,21 @@ long peakrss(void) return r.ru_maxrss; #endif } + +int memcpy_bwamem(void *dest, size_t dmax, const void *src, size_t smax, char *file_name, int line_num) +{ +#define RSIZE_MAX_MEM (256UL << 20) /* 256MB */ + if (dmax < smax) + { + fprintf(stderr, "[%s: %d] src size is lager than dest size.(src: %ld; dest: %ld)\n", file_name, line_num, smax, dmax); + exit(EXIT_FAILURE); + } + int64_t bytes_copied; + for (bytes_copied = 0; bytes_copied < smax; bytes_copied += RSIZE_MAX_MEM) + { + int64_t bytes_remaining = smax - bytes_copied; + int64_t bytes_to_copy = (bytes_remaining > RSIZE_MAX_MEM) ? RSIZE_MAX_MEM : bytes_remaining; + memcpy((char *)dest + bytes_copied, (const char *)src + bytes_copied, bytes_to_copy); + } + return 0; +} \ No newline at end of file diff --git a/utils.h b/utils.h index f64be9e..332a7d6 100644 --- a/utils.h +++ b/utils.h @@ -80,6 +80,10 @@ static inline unsigned long long __rdtsc(void) #endif #endif +#define log_file(fd, M, ...) \ + fprintf(fd, M "\n", ##__VA_ARGS__); \ + fflush(fd) + typedef struct { uint64_t x, y; } pair64_t; @@ -129,6 +133,8 @@ extern "C" { void ks_introsort_64 (size_t n, uint64_t *a); void ks_introsort_128(size_t n, pair64_t *a); + int memcpy_bwamem(void *dest, size_t dmax, const void *src, size_t smax, char *file_name, int line_num); + #ifdef __cplusplus } #endif