diff --git a/Makefile b/Makefile index ed85639..726cdf2 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS AR= ar DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC) LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o -AOBJS= QSufSort.o bwt_gen.o bwashm.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \ +AOBJS= QSufSort.o bwt_gen.o rope.o rle.o bwashm.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \ is.o bwtindex.o bwape.o kopen.o pemerge.o maxk.o \ bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \ bwtsw2_chain.o fastmap.o bwtsw2_pair.o @@ -63,7 +63,7 @@ bwt_gen.o: QSufSort.h malloc_wrap.h bwt_lite.o: bwt_lite.h malloc_wrap.h bwtaln.o: bwtaln.h bwt.h bwtgap.h utils.h bwa.h bntseq.h malloc_wrap.h bwtgap.o: bwtgap.h bwt.h bwtaln.h malloc_wrap.h -bwtindex.o: bntseq.h bwt.h utils.h malloc_wrap.h +bwtindex.o: bntseq.h bwt.h utils.h rle.h rope.h malloc_wrap.h bwtsw2_aux.o: bntseq.h bwt_lite.h utils.h bwtsw2.h bwt.h kstring.h bwtsw2_aux.o: malloc_wrap.h bwa.h ksw.h kseq.h ksort.h bwtsw2_chain.o: bwtsw2.h bntseq.h bwt_lite.h bwt.h malloc_wrap.h ksort.h @@ -82,4 +82,6 @@ main.o: kstring.h malloc_wrap.h utils.h malloc_wrap.o: malloc_wrap.h maxk.o: bwa.h bntseq.h bwt.h bwamem.h kseq.h malloc_wrap.h pemerge.o: ksw.h kseq.h malloc_wrap.h kstring.h bwa.h bntseq.h bwt.h utils.h +rle.o: rle.h +rope.o: rle.h rope.h utils.o: utils.h ksort.h malloc_wrap.h kseq.h diff --git a/bwa.1 b/bwa.1 index e30f382..994f96a 100644 --- a/bwa.1 +++ b/bwa.1 @@ -60,11 +60,12 @@ Index database sequences in the FASTA format. Prefix of the output database [same as db filename] .TP .BI -a \ STR -Algorithm for constructing BWT index. BWA implements two algorithms for BWT +Algorithm for constructing BWT index. BWA implements three algorithms for BWT construction: -.B is +.BR is , +.B bwtsw and -.BR bwtsw . +.BR rb2 . The first algorithm is a little faster for small database but requires large RAM and does not work for databases with total length longer than 2GB. The second algorithm is adapted from the BWT-SW source code. It in theory works diff --git a/bwamem.c b/bwamem.c index b32c166..99a6d88 100644 --- a/bwamem.c +++ b/bwamem.c @@ -114,7 +114,7 @@ static void smem_aux_destroy(smem_aux_t *a) static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, int len, const uint8_t *seq, smem_aux_t *a) { int i, k, x = 0, old_n; - int start_width = (opt->flag & MEM_F_SELF_OVLP)? 2 : 1; + int start_width = 1; int split_len = (int)(opt->min_seed_len * opt->split_factor + .499); a->mem.n = 0; // first pass: find all SMEMs @@ -488,13 +488,6 @@ int mem_sort_dedup_patch(const mem_opt_t *opt, const bntseq_t *bns, const uint8_ return m; } -int mem_test_and_remove_exact(const mem_opt_t *opt, int n, mem_alnreg_t *a, int qlen) -{ - if (!(opt->flag & MEM_F_SELF_OVLP) || n == 0 || a->truesc != qlen * opt->a) return n; - memmove(a, a + 1, (n - 1) * sizeof(mem_alnreg_t)); - return n - 1; -} - typedef kvec_t(int) int_v; static void mem_mark_primary_se_core(const mem_opt_t *opt, int n, mem_alnreg_t *a, int_v *z) @@ -1046,8 +1039,6 @@ mem_alnreg_v mem_align1_core(const mem_opt_t *opt, const bwt_t *bwt, const bntse } free(chn.a); regs.n = mem_sort_dedup_patch(opt, bns, pac, (uint8_t*)seq, regs.n, regs.a); - if (opt->flag & MEM_F_SELF_OVLP) - regs.n = mem_test_and_remove_exact(opt, regs.n, regs.a, l_seq); if (bwa_verbose >= 4) { err_printf("* %ld chains remain after removing duplicated chains\n", regs.n); for (i = 0; i < regs.n; ++i) { @@ -1168,12 +1159,8 @@ static void worker2(void *data, int i, int tid) worker_t *w = (worker_t*)data; if (!(w->opt->flag&MEM_F_PE)) { if (bwa_verbose >= 4) printf("=====> Finalizing read '%s' <=====\n", w->seqs[i].name); - if (w->opt->flag & MEM_F_ALN_REG) { - mem_reg2ovlp(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i]); - } else { - mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a, w->n_processed + i); - mem_reg2sam(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i], 0, 0); - } + mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a, w->n_processed + i); + mem_reg2sam(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i], 0, 0); free(w->regs[i].a); } else { if (bwa_verbose >= 4) printf("=====> Finalizing read pair '%s' <=====\n", w->seqs[i<<1|0].name); diff --git a/bwamem.h b/bwamem.h index 8ffe729..730b603 100644 --- a/bwamem.h +++ b/bwamem.h @@ -16,8 +16,6 @@ typedef struct __smem_i smem_i; #define MEM_F_ALL 0x8 #define MEM_F_NO_MULTI 0x10 #define MEM_F_NO_RESCUE 0x20 -#define MEM_F_SELF_OVLP 0x40 -#define MEM_F_ALN_REG 0x80 #define MEM_F_REF_HDR 0x100 #define MEM_F_SOFTCLIP 0x200 #define MEM_F_SMARTPE 0x400 diff --git a/bwamem_extra.c b/bwamem_extra.c index 02e817a..d2e37f4 100644 --- a/bwamem_extra.c +++ b/bwamem_extra.c @@ -87,31 +87,6 @@ mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t * return ar; } -void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a) -{ - int i; - kstring_t str = {0,0,0}; - for (i = 0; i < a->n; ++i) { - const mem_alnreg_t *p = &a->a[i]; - int is_rev, rid, qb = p->qb, qe = p->qe; - int64_t pos, rb = p->rb, re = p->re; - pos = bns_depos(bns, rb < bns->l_pac? rb : re - 1, &is_rev); - rid = bns_pos2rid(bns, pos); - assert(rid == p->rid); - pos -= bns->anns[rid].offset; - kputs(s->name, &str); kputc('\t', &str); - kputw(s->l_seq, &str); kputc('\t', &str); - if (is_rev) qb ^= qe, qe ^= qb, qb ^= qe; // swap - kputw(qb, &str); kputc('\t', &str); kputw(qe, &str); kputc('\t', &str); - kputs(bns->anns[rid].name, &str); kputc('\t', &str); - kputw(bns->anns[rid].len, &str); kputc('\t', &str); - kputw(pos, &str); kputc('\t', &str); kputw(pos + (re - rb), &str); kputc('\t', &str); - ksprintf(&str, "%.3f", (double)p->truesc / opt->a / (qe - qb > re - rb? qe - qb : re - rb)); - kputc('\n', &str); - } - s->sam = str.s; -} - static inline int get_pri_idx(double XA_drop_ratio, const mem_alnreg_t *a, int i) { int k = a[i].secondary_all; diff --git a/bwtindex.c b/bwtindex.c index 2bfd667..826bb5a 100644 --- a/bwtindex.c +++ b/bwtindex.c @@ -34,6 +34,8 @@ #include "bntseq.h" #include "bwt.h" #include "utils.h" +#include "rle.h" +#include "rope.h" #ifdef _DIVBWT #include "divsufsort.h" @@ -63,7 +65,7 @@ bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is) { bwt_t *bwt; ubyte_t *buf, *buf2; - int i, pac_size; + int64_t i, pac_size; FILE *fp; // initialization @@ -90,11 +92,31 @@ bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is) if (use_is) { bwt->primary = is_bwt(buf, bwt->seq_len); } else { -#ifdef _DIVBWT - bwt->primary = divbwt(buf, buf, 0, bwt->seq_len); -#else - err_fatal_simple("libdivsufsort is not compiled in."); -#endif + rope_t *r; + int64_t x; + rpitr_t itr; + const uint8_t *blk; + + r = rope_init(ROPE_DEF_MAX_NODES, ROPE_DEF_BLOCK_LEN); + for (i = bwt->seq_len - 1, x = 0; i >= 0; --i) { + int c = buf[i] + 1; + x = rope_insert_run(r, x, c, 1, 0) + 1; + while (--c >= 0) x += r->c[c]; + } + bwt->primary = x; + rope_itr_first(r, &itr); + x = 0; + while ((blk = rope_itr_next_block(&itr)) != 0) { + const uint8_t *q = blk + 2, *end = blk + 2 + *rle_nptr(blk); + while (q < end) { + int c = 0; + int64_t l; + rle_dec1(q, c, l); + for (i = 0; i < l; ++i) + buf[x++] = c - 1; + } + } + rope_destroy(r); } bwt->bwt = (u_int32_t*)calloc(bwt->bwt_size, 4); for (i = 0; i < bwt->seq_len; ++i) @@ -196,7 +218,7 @@ int bwa_index(int argc, char *argv[]) // the "index" command while ((c = getopt(argc, argv, "6a:p:b:")) >= 0) { switch (c) { case 'a': // if -a is not set, algo_type will be determined later - if (strcmp(optarg, "div") == 0) algo_type = 1; + if (strcmp(optarg, "rb2") == 0) algo_type = 1; else if (strcmp(optarg, "bwtsw") == 0) algo_type = 2; else if (strcmp(optarg, "is") == 0) algo_type = 3; else err_fatal(__func__, "unknown algorithm: '%s'.", optarg); @@ -216,7 +238,7 @@ int bwa_index(int argc, char *argv[]) // the "index" command if (optind + 1 > argc) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: bwa index [options] \n\n"); - fprintf(stderr, "Options: -a STR BWT construction algorithm: bwtsw or is [auto]\n"); + fprintf(stderr, "Options: -a STR BWT construction algorithm: bwtsw, is or rb2 [auto]\n"); fprintf(stderr, " -p STR prefix of the index [same as fasta name]\n"); fprintf(stderr, " -b INT block size for the bwtsw algorithm (effective with -a bwtsw) [%d]\n", block_size); fprintf(stderr, " -6 index files named as .64.* instead of .* \n"); diff --git a/fastmap.c b/fastmap.c index f8266b6..1e49ccb 100644 --- a/fastmap.c +++ b/fastmap.c @@ -130,7 +130,7 @@ int main_mem(int argc, char *argv[]) aux.opt = opt = mem_opt_init(); memset(&opt0, 0, sizeof(mem_opt_t)); - while ((c = getopt(argc, argv, "1epaFMCSPVYjk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:K:X:H:")) >= 0) { + while ((c = getopt(argc, argv, "1paMCSPVYjk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:K:X:H:")) >= 0) { if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1; else if (c == '1') no_mt_io = 1; else if (c == 'x') mode = optarg; @@ -145,8 +145,6 @@ int main_mem(int argc, char *argv[]) else if (c == 'p') opt->flag |= MEM_F_PE | MEM_F_SMARTPE; else if (c == 'M') opt->flag |= MEM_F_NO_MULTI; else if (c == 'S') opt->flag |= MEM_F_NO_RESCUE; - else if (c == 'e') opt->flag |= MEM_F_SELF_OVLP; - else if (c == 'F') opt->flag |= MEM_F_ALN_REG; else if (c == 'Y') opt->flag |= MEM_F_SOFTCLIP; else if (c == 'V') opt->flag |= MEM_F_REF_HDR; else if (c == 'c') opt->max_occ = atoi(optarg), opt0.max_occ = 1; @@ -251,7 +249,6 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, " -m INT perform at most INT rounds of mate rescues for each read [%d]\n", opt->max_matesw); fprintf(stderr, " -S skip mate rescue\n"); fprintf(stderr, " -P skip pairing; mate rescue performed unless -S also in use\n"); - fprintf(stderr, " -e discard full-length exact matches\n"); fprintf(stderr, "\nScoring options:\n\n"); fprintf(stderr, " -A INT score for a sequence match, which scales options -TdBOELU unless overridden [%d]\n", opt->a); fprintf(stderr, " -B INT penalty for a mismatch [%d]\n", opt->b); @@ -263,7 +260,6 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, " pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 (PacBio reads to ref)\n"); fprintf(stderr, " ont2d: -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0 (Oxford Nanopore 2D-reads to ref)\n"); fprintf(stderr, " intractg: -B9 -O16 -L5 (intra-species contigs to ref)\n"); -// fprintf(stderr, " pbread: -k13 -W40 -c1000 -r10 -A1 -B1 -O1 -E1 -N25 -FeaD.001\n"); fprintf(stderr, "\nInput/output options:\n\n"); fprintf(stderr, " -p smart pairing (ignoring in2.fq)\n"); fprintf(stderr, " -R STR read group header line such as '@RG\\tID:foo\\tSM:bar' [null]\n"); @@ -296,21 +292,14 @@ int main_mem(int argc, char *argv[]) if (!opt0.b) opt->b = 9; if (!opt0.pen_clip5) opt->pen_clip5 = 5; if (!opt0.pen_clip3) opt->pen_clip3 = 5; - } else if (strcmp(mode, "pacbio") == 0 || strcmp(mode, "pbref") == 0 || strcmp(mode, "pbread") == 0 || strcmp(mode, "ont2d") == 0) { + } else if (strcmp(mode, "pacbio") == 0 || strcmp(mode, "pbref") == 0 || strcmp(mode, "ont2d") == 0) { if (!opt0.o_del) opt->o_del = 1; if (!opt0.e_del) opt->e_del = 1; if (!opt0.o_ins) opt->o_ins = 1; if (!opt0.e_ins) opt->e_ins = 1; if (!opt0.b) opt->b = 1; if (opt0.split_factor == 0.) opt->split_factor = 10.; - if (strcmp(mode, "pbread") == 0) { // pacbio read-to-read setting; NOT working well! - opt->flag |= MEM_F_ALL | MEM_F_SELF_OVLP | MEM_F_ALN_REG; - if (!opt0.min_chain_weight) opt->min_chain_weight = 40; - if (!opt0.max_occ) opt->max_occ = 1000; - if (!opt0.min_seed_len) opt->min_seed_len = 13; - if (!opt0.max_chain_extend) opt->max_chain_extend = 25; - if (opt0.drop_ratio == 0.) opt->drop_ratio = .001; - } else if (strcmp(mode, "ont2d") == 0) { + if (strcmp(mode, "ont2d") == 0) { if (!opt0.min_chain_weight) opt->min_chain_weight = 20; if (!opt0.min_seed_len) opt->min_seed_len = 14; if (!opt0.pen_clip5) opt->pen_clip5 = 0; @@ -359,8 +348,7 @@ int main_mem(int argc, char *argv[]) opt->flag |= MEM_F_PE; } } - if (!(opt->flag & MEM_F_ALN_REG)) - bwa_print_sam_hdr(aux.idx->bns, hdr_line); + bwa_print_sam_hdr(aux.idx->bns, hdr_line); aux.actual_chunk_size = fixed_chunk_size > 0? fixed_chunk_size : opt->chunk_size * opt->n_threads; kt_pipeline(no_mt_io? 1 : 2, process, &aux, 3); free(hdr_line); diff --git a/kthread.c b/kthread.c index 97359c0..1b13494 100644 --- a/kthread.c +++ b/kthread.c @@ -130,14 +130,14 @@ void kt_pipeline(int n_threads, void *(*func)(void*, int, void*), void *shared_d pthread_mutex_init(&aux.mutex, 0); pthread_cond_init(&aux.cv, 0); - aux.workers = alloca(n_threads * sizeof(ktp_worker_t)); + aux.workers = (ktp_worker_t*)alloca(n_threads * sizeof(ktp_worker_t)); for (i = 0; i < n_threads; ++i) { ktp_worker_t *w = &aux.workers[i]; w->step = 0; w->pl = &aux; w->data = 0; w->index = aux.index++; } - tid = alloca(n_threads * sizeof(pthread_t)); + tid = (pthread_t*)alloca(n_threads * sizeof(pthread_t)); for (i = 0; i < n_threads; ++i) pthread_create(&tid[i], 0, ktp_worker, &aux.workers[i]); for (i = 0; i < n_threads; ++i) pthread_join(tid[i], 0); diff --git a/maxk.c b/maxk.c index 24b9d54..fee5765 100644 --- a/maxk.c +++ b/maxk.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "bwa.h" #include "bwamem.h" #include "kseq.h" diff --git a/rle.c b/rle.c new file mode 100644 index 0000000..221e1cd --- /dev/null +++ b/rle.c @@ -0,0 +1,191 @@ +#include +#include +#include +#include +#include "rle.h" + +const uint8_t rle_auxtab[8] = { 0x01, 0x11, 0x21, 0x31, 0x03, 0x13, 0x07, 0x17 }; + +// insert symbol $a after $x symbols in $str; marginal counts added to $cnt; returns the size increase +int rle_insert_cached(uint8_t *block, int64_t x, int a, int64_t rl, int64_t cnt[6], const int64_t ec[6], int *beg, int64_t bc[6]) +{ + uint16_t *nptr = (uint16_t*)block; + int diff; + + block += 2; // skip the first 2 counting bytes + if (*nptr == 0) { + memset(cnt, 0, 48); + diff = rle_enc1(block, a, rl); + } else { + uint8_t *p, *end = block + *nptr, *q; + int64_t pre, z, l = 0, tot, beg_l; + int c = -1, n_bytes = 0, n_bytes2, t = 0; + uint8_t tmp[24]; + beg_l = bc[0] + bc[1] + bc[2] + bc[3] + bc[4] + bc[5]; + tot = ec[0] + ec[1] + ec[2] + ec[3] + ec[4] + ec[5]; + if (x < beg_l) { + beg_l = 0, *beg = 0; + memset(bc, 0, 48); + } + if (x == beg_l) { + p = q = block + (*beg); z = beg_l; + memcpy(cnt, bc, 48); + } else if (x - beg_l <= ((tot-beg_l)>>1) + ((tot-beg_l)>>3)) { // forward + z = beg_l; p = block + (*beg); + memcpy(cnt, bc, 48); + while (z < x) { + rle_dec1(p, c, l); + z += l; cnt[c] += l; + } + for (q = p - 1; *q>>6 == 2; --q); + } else { // backward + memcpy(cnt, ec, 48); + z = tot; p = end; + while (z >= x) { + --p; + if (*p>>6 != 2) { + l |= *p>>7? (int64_t)rle_auxtab[*p>>3&7]>>4 << t : *p>>3; + z -= l; cnt[*p&7] -= l; + l = 0; t = 0; + } else { + l |= (*p&0x3fL) << t; + t += 6; + } + } + q = p; + rle_dec1(p, c, l); + z += l; cnt[c] += l; + } + *beg = q - block; + memcpy(bc, cnt, 48); + bc[c] -= l; + n_bytes = p - q; + if (x == z && a != c && p < end) { // then try the next run + int tc; + int64_t tl; + q = p; + rle_dec1(q, tc, tl); + if (a == tc) + c = tc, n_bytes = q - p, l = tl, z += l, p = q, cnt[tc] += tl; + } + if (z != x) cnt[c] -= z - x; + pre = x - (z - l); p -= n_bytes; + if (a == c) { // insert to the same run + n_bytes2 = rle_enc1(tmp, c, l + rl); + } else if (x == z) { // at the end; append to the existing run + p += n_bytes; n_bytes = 0; + n_bytes2 = rle_enc1(tmp, a, rl); + } else { // break the current run + n_bytes2 = rle_enc1(tmp, c, pre); + n_bytes2 += rle_enc1(tmp + n_bytes2, a, rl); + n_bytes2 += rle_enc1(tmp + n_bytes2, c, l - pre); + } + if (n_bytes != n_bytes2 && end != p + n_bytes) // size changed + memmove(p + n_bytes2, p + n_bytes, end - p - n_bytes); + memcpy(p, tmp, n_bytes2); + diff = n_bytes2 - n_bytes; + } + return (*nptr += diff); +} + +int rle_insert(uint8_t *block, int64_t x, int a, int64_t rl, int64_t cnt[6], const int64_t ec[6]) +{ + int beg = 0; + int64_t bc[6]; + memset(bc, 0, 48); + return rle_insert_cached(block, x, a, rl, cnt, ec, &beg, bc); +} + +void rle_split(uint8_t *block, uint8_t *new_block) +{ + int n = *(uint16_t*)block; + uint8_t *end = block + 2 + n, *q = block + 2 + (n>>1); + while (*q>>6 == 2) --q; + memcpy(new_block + 2, q, end - q); + *(uint16_t*)new_block = end - q; + *(uint16_t*)block = q - block - 2; +} + +void rle_count(const uint8_t *block, int64_t cnt[6]) +{ + const uint8_t *q = block + 2, *end = q + *(uint16_t*)block; + while (q < end) { + int c; + int64_t l; + rle_dec1(q, c, l); + cnt[c] += l; + } +} + +void rle_print(const uint8_t *block, int expand) +{ + const uint16_t *p = (const uint16_t*)block; + const uint8_t *q = block + 2, *end = block + 2 + *p; + while (q < end) { + int c; + int64_t l, x; + rle_dec1(q, c, l); + if (expand) for (x = 0; x < l; ++x) putchar("$ACGTN"[c]); + else printf("%c%ld", "$ACGTN"[c], (long)l); + } + putchar('\n'); +} + +void rle_rank2a(const uint8_t *block, int64_t x, int64_t y, int64_t *cx, int64_t *cy, const int64_t ec[6]) +{ + int a; + int64_t tot, cnt[6]; + const uint8_t *p; + + y = y >= x? y : x; + tot = ec[0] + ec[1] + ec[2] + ec[3] + ec[4] + ec[5]; + if (tot == 0) return; + if (x <= (tot - y) + (tot>>3)) { + int c = 0; + int64_t l, z = 0; + memset(cnt, 0, 48); + p = block + 2; + while (z < x) { + rle_dec1(p, c, l); + z += l; cnt[c] += l; + } + for (a = 0; a != 6; ++a) cx[a] += cnt[a]; + cx[c] -= z - x; + if (cy) { + while (z < y) { + rle_dec1(p, c, l); + z += l; cnt[c] += l; + } + for (a = 0; a != 6; ++a) cy[a] += cnt[a]; + cy[c] -= z - y; + } + } else { +#define move_backward(_x) \ + while (z >= (_x)) { \ + --p; \ + if (*p>>6 != 2) { \ + l |= *p>>7? (int64_t)rle_auxtab[*p>>3&7]>>4 << t : *p>>3; \ + z -= l; cnt[*p&7] -= l; \ + l = 0; t = 0; \ + } else { \ + l |= (*p&0x3fL) << t; \ + t += 6; \ + } \ + } \ + + int t = 0; + int64_t l = 0, z = tot; + memcpy(cnt, ec, 48); + p = block + 2 + *(const uint16_t*)block; + if (cy) { + move_backward(y) + for (a = 0; a != 6; ++a) cy[a] += cnt[a]; + cy[*p&7] += y - z; + } + move_backward(x) + for (a = 0; a != 6; ++a) cx[a] += cnt[a]; + cx[*p&7] += x - z; + +#undef move_backward + } +} diff --git a/rle.h b/rle.h new file mode 100644 index 0000000..0d59484 --- /dev/null +++ b/rle.h @@ -0,0 +1,77 @@ +#ifndef RLE6_H_ +#define RLE6_H_ + +#include + +#ifdef __GNUC__ +#define LIKELY(x) __builtin_expect((x),1) +#else +#define LIKELY(x) (x) +#endif +#ifdef __cplusplus + +extern "C" { +#endif + + int rle_insert_cached(uint8_t *block, int64_t x, int a, int64_t rl, int64_t cnt[6], const int64_t ec[6], int *beg, int64_t bc[6]); + int rle_insert(uint8_t *block, int64_t x, int a, int64_t rl, int64_t cnt[6], const int64_t end_cnt[6]); + void rle_split(uint8_t *block, uint8_t *new_block); + void rle_count(const uint8_t *block, int64_t cnt[6]); + void rle_rank2a(const uint8_t *block, int64_t x, int64_t y, int64_t *cx, int64_t *cy, const int64_t ec[6]); + #define rle_rank1a(block, x, cx, ec) rle_rank2a(block, x, -1, cx, 0, ec) + + void rle_print(const uint8_t *block, int expand); + +#ifdef __cplusplus +} +#endif + +/****************** + *** 43+3 codec *** + ******************/ + +const uint8_t rle_auxtab[8]; + +#define RLE_MIN_SPACE 18 +#define rle_nptr(block) ((uint16_t*)(block)) + +// decode one run (c,l) and move the pointer p +#define rle_dec1(p, c, l) do { \ + (c) = *(p) & 7; \ + if (LIKELY((*(p)&0x80) == 0)) { \ + (l) = *(p)++ >> 3; \ + } else if (LIKELY(*(p)>>5 == 6)) { \ + (l) = (*(p)&0x18L)<<3L | ((p)[1]&0x3fL); \ + (p) += 2; \ + } else { \ + int n = ((*(p)&0x10) >> 2) + 4; \ + (l) = *(p)++ >> 3 & 1; \ + while (--n) (l) = ((l)<<6) | (*(p)++&0x3fL); \ + } \ + } while (0) + +static inline int rle_enc1(uint8_t *p, int c, int64_t l) +{ + if (l < 1LL<<4) { + *p = l << 3 | c; + return 1; + } else if (l < 1LL<<8) { + *p = 0xC0 | l >> 6 << 3 | c; + p[1] = 0x80 | (l & 0x3f); + return 2; + } else if (l < 1LL<<19) { + *p = 0xE0 | l >> 18 << 3 | c; + p[1] = 0x80 | (l >> 12 & 0x3f); + p[2] = 0x80 | (l >> 6 & 0x3f); + p[3] = 0x80 | (l & 0x3f); + return 4; + } else { + int i, shift = 36; + *p = 0xF0 | l >> 42 << 3 | c; + for (i = 1; i < 8; ++i, shift -= 6) + p[i] = 0x80 | (l>>shift & 0x3f); + return 8; + } +} + +#endif diff --git a/rope.c b/rope.c new file mode 100644 index 0000000..3d85981 --- /dev/null +++ b/rope.c @@ -0,0 +1,318 @@ +#include +#include +#include +#include +#include +#include "rle.h" +#include "rope.h" + +/******************* + *** Memory Pool *** + *******************/ + +#define MP_CHUNK_SIZE 0x100000 // 1MB per chunk + +typedef struct { // memory pool for fast and compact memory allocation (no free) + int size, i, n_elems; + int64_t top, max; + uint8_t **mem; +} mempool_t; + +static mempool_t *mp_init(int size) +{ + mempool_t *mp; + mp = calloc(1, sizeof(mempool_t)); + mp->size = size; + mp->i = mp->n_elems = MP_CHUNK_SIZE / size; + mp->top = -1; + return mp; +} + +static void mp_destroy(mempool_t *mp) +{ + int64_t i; + for (i = 0; i <= mp->top; ++i) free(mp->mem[i]); + free(mp->mem); free(mp); +} + +static inline void *mp_alloc(mempool_t *mp) +{ + if (mp->i == mp->n_elems) { + if (++mp->top == mp->max) { + mp->max = mp->max? mp->max<<1 : 1; + mp->mem = realloc(mp->mem, sizeof(void*) * mp->max); + } + mp->mem[mp->top] = calloc(mp->n_elems, mp->size); + mp->i = 0; + } + return mp->mem[mp->top] + (mp->i++) * mp->size; +} + +/*************** + *** B+ rope *** + ***************/ + +rope_t *rope_init(int max_nodes, int block_len) +{ + rope_t *rope; + rope = calloc(1, sizeof(rope_t)); + if (block_len < 32) block_len = 32; + rope->max_nodes = (max_nodes+ 1)>>1<<1; + rope->block_len = (block_len + 7) >> 3 << 3; + rope->node = mp_init(sizeof(rpnode_t) * rope->max_nodes); + rope->leaf = mp_init(rope->block_len); + rope->root = mp_alloc(rope->node); + rope->root->n = 1; + rope->root->is_bottom = 1; + rope->root->p = mp_alloc(rope->leaf); + return rope; +} + +void rope_destroy(rope_t *rope) +{ + mp_destroy(rope->node); + mp_destroy(rope->leaf); + free(rope); +} + +static inline rpnode_t *split_node(rope_t *rope, rpnode_t *u, rpnode_t *v) +{ // split $v's child. $u is the first node in the bucket. $v and $u are in the same bucket. IMPORTANT: there is always enough room in $u + int j, i = v - u; + rpnode_t *w; // $w is the sibling of $v + if (u == 0) { // only happens at the root; add a new root + u = v = mp_alloc(rope->node); + v->n = 1; v->p = rope->root; // the new root has the old root as the only child + memcpy(v->c, rope->c, 48); + for (j = 0; j < 6; ++j) v->l += v->c[j]; + rope->root = v; + } + if (i != u->n - 1) // then make room for a new node + memmove(v + 2, v + 1, sizeof(rpnode_t) * (u->n - i - 1)); + ++u->n; w = v + 1; + memset(w, 0, sizeof(rpnode_t)); + w->p = mp_alloc(u->is_bottom? rope->leaf : rope->node); + if (u->is_bottom) { // we are at the bottom level; $v->p is a string instead of a node + uint8_t *p = (uint8_t*)v->p, *q = (uint8_t*)w->p; + rle_split(p, q); + rle_count(q, w->c); + } else { // $v->p is a node, not a string + rpnode_t *p = v->p, *q = w->p; // $v and $w are siblings and thus $p and $q are cousins + p->n -= rope->max_nodes>>1; + memcpy(q, p + p->n, sizeof(rpnode_t) * (rope->max_nodes>>1)); + q->n = rope->max_nodes>>1; // NB: this line must below memcpy() as $q->n and $q->is_bottom are modified by memcpy() + q->is_bottom = p->is_bottom; + for (i = 0; i < q->n; ++i) + for (j = 0; j < 6; ++j) + w->c[j] += q[i].c[j]; + } + for (j = 0; j < 6; ++j) // compute $w->l and update $v->c + w->l += w->c[j], v->c[j] -= w->c[j]; + v->l -= w->l; // update $v->c + return v; +} + +int64_t rope_insert_run(rope_t *rope, int64_t x, int a, int64_t rl, rpcache_t *cache) +{ // insert $a after $x symbols in $rope and the returns rank(a, x) + rpnode_t *u = 0, *v = 0, *p = rope->root; // $v is the parent of $p; $u and $v are at the same level and $u is the first node in the bucket + int64_t y = 0, z = 0, cnt[6]; + int n_runs; + do { // top-down update. Searching and node splitting are done together in one pass. + if (p->n == rope->max_nodes) { // node is full; split + v = split_node(rope, u, v); // $v points to the parent of $p; when a new root is added, $v points to the root + if (y + v->l < x) // if $v is not long enough after the split, we need to move both $p and its parent $v + y += v->l, z += v->c[a], ++v, p = v->p; + } + u = p; + if (v && x - y > v->l>>1) { // then search backwardly for the right node to descend + p += p->n - 1; y += v->l; z += v->c[a]; + for (; y >= x; --p) y -= p->l, z -= p->c[a]; + ++p; + } else for (; y + p->l < x; ++p) y += p->l, z += p->c[a]; // then search forwardly + assert(p - u < u->n); + if (v) v->c[a] += rl, v->l += rl; // we should not change p->c[a] because this may cause troubles when p's child is split + v = p; p = p->p; // descend + } while (!u->is_bottom); + rope->c[a] += rl; // $rope->c should be updated after the loop as adding a new root needs the old $rope->c counts + if (cache) { + if (cache->p != (uint8_t*)p) memset(cache, 0, sizeof(rpcache_t)); + n_runs = rle_insert_cached((uint8_t*)p, x - y, a, rl, cnt, v->c, &cache->beg, cache->bc); + cache->p = (uint8_t*)p; + } else n_runs = rle_insert((uint8_t*)p, x - y, a, rl, cnt, v->c); + z += cnt[a]; + v->c[a] += rl; v->l += rl; // this should be after rle_insert(); otherwise rle_insert() won't work + if (n_runs + RLE_MIN_SPACE > rope->block_len) { + split_node(rope, u, v); + if (cache) memset(cache, 0, sizeof(rpcache_t)); + } + return z; +} + +static rpnode_t *rope_count_to_leaf(const rope_t *rope, int64_t x, int64_t cx[6], int64_t *rest) +{ + rpnode_t *u, *v = 0, *p = rope->root; + int64_t y = 0; + int a; + + memset(cx, 0, 48); + do { + u = p; + if (v && x - y > v->l>>1) { + p += p->n - 1; y += v->l; + for (a = 0; a != 6; ++a) cx[a] += v->c[a]; + for (; y >= x; --p) { + y -= p->l; + for (a = 0; a != 6; ++a) cx[a] -= p->c[a]; + } + ++p; + } else { + for (; y + p->l < x; ++p) { + y += p->l; + for (a = 0; a != 6; ++a) cx[a] += p->c[a]; + } + } + v = p; p = p->p; + } while (!u->is_bottom); + *rest = x - y; + return v; +} + +void rope_rank2a(const rope_t *rope, int64_t x, int64_t y, int64_t *cx, int64_t *cy) +{ + rpnode_t *v; + int64_t rest; + v = rope_count_to_leaf(rope, x, cx, &rest); + if (y < x || cy == 0) { + rle_rank1a((const uint8_t*)v->p, rest, cx, v->c); + } else if (rest + (y - x) <= v->l) { + memcpy(cy, cx, 48); + rle_rank2a((const uint8_t*)v->p, rest, rest + (y - x), cx, cy, v->c); + } else { + rle_rank1a((const uint8_t*)v->p, rest, cx, v->c); + v = rope_count_to_leaf(rope, y, cy, &rest); + rle_rank1a((const uint8_t*)v->p, rest, cy, v->c); + } +} + +/********************* + *** Rope iterator *** + *********************/ + +void rope_itr_first(const rope_t *rope, rpitr_t *i) +{ + memset(i, 0, sizeof(rpitr_t)); + i->rope = rope; + for (i->pa[i->d] = rope->root; !i->pa[i->d]->is_bottom;) // descend to the leftmost leaf + ++i->d, i->pa[i->d] = i->pa[i->d - 1]->p; +} + +const uint8_t *rope_itr_next_block(rpitr_t *i) +{ + const uint8_t *ret; + assert(i->d < ROPE_MAX_DEPTH); // a B+ tree should not be that tall + if (i->d < 0) return 0; + ret = (uint8_t*)i->pa[i->d][i->ia[i->d]].p; + while (i->d >= 0 && ++i->ia[i->d] == i->pa[i->d]->n) i->ia[i->d--] = 0; // backtracking + if (i->d >= 0) + while (!i->pa[i->d]->is_bottom) // descend to the leftmost leaf + ++i->d, i->pa[i->d] = i->pa[i->d - 1][i->ia[i->d - 1]].p; + return ret; +} + +/*********** + *** I/O *** + ***********/ + +void rope_print_node(const rpnode_t *p) +{ + if (p->is_bottom) { + int i; + putchar('('); + for (i = 0; i < p->n; ++i) { + uint8_t *block = (uint8_t*)p[i].p; + const uint8_t *q = block + 2, *end = block + 2 + *rle_nptr(block); + if (i) putchar(','); + while (q < end) { + int c = 0; + int64_t j, l; + rle_dec1(q, c, l); + for (j = 0; j < l; ++j) putchar("$ACGTN"[c]); + } + } + putchar(')'); + } else { + int i; + putchar('('); + for (i = 0; i < p->n; ++i) { + if (i) putchar(','); + rope_print_node(p[i].p); + } + putchar(')'); + } +} + +void rope_dump_node(const rpnode_t *p, FILE *fp) +{ + int16_t i, n = p->n; + uint8_t is_bottom = p->is_bottom; + fwrite(&is_bottom, 1, 1, fp); + fwrite(&n, 2, 1, fp); + if (is_bottom) { + for (i = 0; i < n; ++i) { + fwrite(p[i].c, 8, 6, fp); + fwrite(p[i].p, 1, *rle_nptr(p[i].p) + 2, fp); + } + } else { + for (i = 0; i < p->n; ++i) + rope_dump_node(p[i].p, fp); + } +} + +void rope_dump(const rope_t *r, FILE *fp) +{ + fwrite(&r->max_nodes, 4, 1, fp); + fwrite(&r->block_len, 4, 1, fp); + rope_dump_node(r->root, fp); +} + +rpnode_t *rope_restore_node(const rope_t *r, FILE *fp, int64_t c[6]) +{ + uint8_t is_bottom, a; + int16_t i, n; + rpnode_t *p; + fread(&is_bottom, 1, 1, fp); + fread(&n, 2, 1, fp); + p = mp_alloc(r->node); + p->is_bottom = is_bottom, p->n = n; + if (is_bottom) { + for (i = 0; i < n; ++i) { + uint16_t *q; + p[i].p = mp_alloc(r->leaf); + q = rle_nptr(p[i].p); + fread(p[i].c, 8, 6, fp); + fread(q, 2, 1, fp); + fread(q + 1, 1, *q, fp); + } + } else { + for (i = 0; i < n; ++i) + p[i].p = rope_restore_node(r, fp, p[i].c); + } + memset(c, 0, 48); + for (i = 0; i < n; ++i) { + p[i].l = 0; + for (a = 0; a < 6; ++a) + c[a] += p[i].c[a], p[i].l += p[i].c[a]; + } + return p; +} + +rope_t *rope_restore(FILE *fp) +{ + rope_t *r; + r = calloc(1, sizeof(rope_t)); + fread(&r->max_nodes, 4, 1, fp); + fread(&r->block_len, 4, 1, fp); + r->node = mp_init(sizeof(rpnode_t) * r->max_nodes); + r->leaf = mp_init(r->block_len); + r->root = rope_restore_node(r, fp, r->c); + return r; +} diff --git a/rope.h b/rope.h new file mode 100644 index 0000000..843a408 --- /dev/null +++ b/rope.h @@ -0,0 +1,58 @@ +#ifndef ROPE_H_ +#define ROPE_H_ + +#include +#include + +#define ROPE_MAX_DEPTH 80 +#define ROPE_DEF_MAX_NODES 64 +#define ROPE_DEF_BLOCK_LEN 512 + +typedef struct rpnode_s { + struct rpnode_s *p; // child; at the bottom level, $p points to a string with the first 2 bytes giving the number of runs (#runs) + uint64_t l:54, n:9, is_bottom:1; // $n and $is_bottom are only set for the first node in a bucket + int64_t c[6]; // marginal counts +} rpnode_t; + +typedef struct { + int32_t max_nodes, block_len; // both MUST BE even numbers + int64_t c[6]; // marginal counts + rpnode_t *root; + void *node, *leaf; // memory pool +} rope_t; + +typedef struct { + const rope_t *rope; // the rope + const rpnode_t *pa[ROPE_MAX_DEPTH]; // parent nodes + int ia[ROPE_MAX_DEPTH]; // index in the parent nodes + int d; // the current depth in the B+-tree +} rpitr_t; + +typedef struct { + int beg; + int64_t bc[6]; + uint8_t *p; +} rpcache_t; + +#ifdef __cplusplus +extern "C" { +#endif + + rope_t *rope_init(int max_nodes, int block_len); + void rope_destroy(rope_t *rope); + int64_t rope_insert_run(rope_t *rope, int64_t x, int a, int64_t rl, rpcache_t *cache); + void rope_rank2a(const rope_t *rope, int64_t x, int64_t y, int64_t *cx, int64_t *cy); + #define rope_rank1a(rope, x, cx) rope_rank2a(rope, x, -1, cx, 0) + + void rope_itr_first(const rope_t *rope, rpitr_t *i); + const uint8_t *rope_itr_next_block(rpitr_t *i); + + void rope_print_node(const rpnode_t *p); + void rope_dump(const rope_t *r, FILE *fp); + rope_t *rope_restore(FILE *fp); + +#ifdef __cplusplus +} +#endif + +#endif