From be2020c4af30f9dee417762d3dae8eb00a0309a3 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Tue, 26 Jan 2016 13:46:08 -0500 Subject: [PATCH] ropebwt2 for bwt construction --- Makefile | 2 +- bwa.1 | 7 +- bwtindex.c | 36 ++++-- rle.c | 191 ++++++++++++++++++++++++++++++++ rle.h | 77 +++++++++++++ rope.c | 318 +++++++++++++++++++++++++++++++++++++++++++++++++++++ rope.h | 58 ++++++++++ 7 files changed, 678 insertions(+), 11 deletions(-) create mode 100644 rle.c create mode 100644 rle.h create mode 100644 rope.c create mode 100644 rope.h diff --git a/Makefile b/Makefile index ed85639..40a93ad 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS AR= ar DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC) LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o -AOBJS= QSufSort.o bwt_gen.o bwashm.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \ +AOBJS= QSufSort.o bwt_gen.o rope.o rle.o bwashm.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \ is.o bwtindex.o bwape.o kopen.o pemerge.o maxk.o \ bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \ bwtsw2_chain.o fastmap.o bwtsw2_pair.o diff --git a/bwa.1 b/bwa.1 index e30f382..994f96a 100644 --- a/bwa.1 +++ b/bwa.1 @@ -60,11 +60,12 @@ Index database sequences in the FASTA format. Prefix of the output database [same as db filename] .TP .BI -a \ STR -Algorithm for constructing BWT index. BWA implements two algorithms for BWT +Algorithm for constructing BWT index. BWA implements three algorithms for BWT construction: -.B is +.BR is , +.B bwtsw and -.BR bwtsw . +.BR rb2 . The first algorithm is a little faster for small database but requires large RAM and does not work for databases with total length longer than 2GB. The second algorithm is adapted from the BWT-SW source code. It in theory works diff --git a/bwtindex.c b/bwtindex.c index 2bfd667..ce49112 100644 --- a/bwtindex.c +++ b/bwtindex.c @@ -34,6 +34,8 @@ #include "bntseq.h" #include "bwt.h" #include "utils.h" +#include "rle.h" +#include "rope.h" #ifdef _DIVBWT #include "divsufsort.h" @@ -90,11 +92,31 @@ bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is) if (use_is) { bwt->primary = is_bwt(buf, bwt->seq_len); } else { -#ifdef _DIVBWT - bwt->primary = divbwt(buf, buf, 0, bwt->seq_len); -#else - err_fatal_simple("libdivsufsort is not compiled in."); -#endif + rope_t *r; + int64_t x; + rpitr_t itr; + const uint8_t *blk; + + r = rope_init(ROPE_DEF_MAX_NODES, ROPE_DEF_BLOCK_LEN); + for (i = bwt->seq_len - 1, x = 0; i >= 0; --i) { + int c = buf[i] + 1; + x = rope_insert_run(r, x, c, 1, 0) + 1; + while (--c >= 0) x += r->c[c]; + } + bwt->primary = x; + rope_itr_first(r, &itr); + x = 0; + while ((blk = rope_itr_next_block(&itr)) != 0) { + const uint8_t *q = blk + 2, *end = blk + 2 + *rle_nptr(blk); + while (q < end) { + int c = 0; + int64_t l; + rle_dec1(q, c, l); + for (i = 0; i < l; ++i) + buf[x++] = c - 1; + } + } + rope_destroy(r); } bwt->bwt = (u_int32_t*)calloc(bwt->bwt_size, 4); for (i = 0; i < bwt->seq_len; ++i) @@ -196,7 +218,7 @@ int bwa_index(int argc, char *argv[]) // the "index" command while ((c = getopt(argc, argv, "6a:p:b:")) >= 0) { switch (c) { case 'a': // if -a is not set, algo_type will be determined later - if (strcmp(optarg, "div") == 0) algo_type = 1; + if (strcmp(optarg, "rb2") == 0) algo_type = 1; else if (strcmp(optarg, "bwtsw") == 0) algo_type = 2; else if (strcmp(optarg, "is") == 0) algo_type = 3; else err_fatal(__func__, "unknown algorithm: '%s'.", optarg); @@ -216,7 +238,7 @@ int bwa_index(int argc, char *argv[]) // the "index" command if (optind + 1 > argc) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: bwa index [options] \n\n"); - fprintf(stderr, "Options: -a STR BWT construction algorithm: bwtsw or is [auto]\n"); + fprintf(stderr, "Options: -a STR BWT construction algorithm: bwtsw, is or rb2 [auto]\n"); fprintf(stderr, " -p STR prefix of the index [same as fasta name]\n"); fprintf(stderr, " -b INT block size for the bwtsw algorithm (effective with -a bwtsw) [%d]\n", block_size); fprintf(stderr, " -6 index files named as .64.* instead of .* \n"); diff --git a/rle.c b/rle.c new file mode 100644 index 0000000..221e1cd --- /dev/null +++ b/rle.c @@ -0,0 +1,191 @@ +#include +#include +#include +#include +#include "rle.h" + +const uint8_t rle_auxtab[8] = { 0x01, 0x11, 0x21, 0x31, 0x03, 0x13, 0x07, 0x17 }; + +// insert symbol $a after $x symbols in $str; marginal counts added to $cnt; returns the size increase +int rle_insert_cached(uint8_t *block, int64_t x, int a, int64_t rl, int64_t cnt[6], const int64_t ec[6], int *beg, int64_t bc[6]) +{ + uint16_t *nptr = (uint16_t*)block; + int diff; + + block += 2; // skip the first 2 counting bytes + if (*nptr == 0) { + memset(cnt, 0, 48); + diff = rle_enc1(block, a, rl); + } else { + uint8_t *p, *end = block + *nptr, *q; + int64_t pre, z, l = 0, tot, beg_l; + int c = -1, n_bytes = 0, n_bytes2, t = 0; + uint8_t tmp[24]; + beg_l = bc[0] + bc[1] + bc[2] + bc[3] + bc[4] + bc[5]; + tot = ec[0] + ec[1] + ec[2] + ec[3] + ec[4] + ec[5]; + if (x < beg_l) { + beg_l = 0, *beg = 0; + memset(bc, 0, 48); + } + if (x == beg_l) { + p = q = block + (*beg); z = beg_l; + memcpy(cnt, bc, 48); + } else if (x - beg_l <= ((tot-beg_l)>>1) + ((tot-beg_l)>>3)) { // forward + z = beg_l; p = block + (*beg); + memcpy(cnt, bc, 48); + while (z < x) { + rle_dec1(p, c, l); + z += l; cnt[c] += l; + } + for (q = p - 1; *q>>6 == 2; --q); + } else { // backward + memcpy(cnt, ec, 48); + z = tot; p = end; + while (z >= x) { + --p; + if (*p>>6 != 2) { + l |= *p>>7? (int64_t)rle_auxtab[*p>>3&7]>>4 << t : *p>>3; + z -= l; cnt[*p&7] -= l; + l = 0; t = 0; + } else { + l |= (*p&0x3fL) << t; + t += 6; + } + } + q = p; + rle_dec1(p, c, l); + z += l; cnt[c] += l; + } + *beg = q - block; + memcpy(bc, cnt, 48); + bc[c] -= l; + n_bytes = p - q; + if (x == z && a != c && p < end) { // then try the next run + int tc; + int64_t tl; + q = p; + rle_dec1(q, tc, tl); + if (a == tc) + c = tc, n_bytes = q - p, l = tl, z += l, p = q, cnt[tc] += tl; + } + if (z != x) cnt[c] -= z - x; + pre = x - (z - l); p -= n_bytes; + if (a == c) { // insert to the same run + n_bytes2 = rle_enc1(tmp, c, l + rl); + } else if (x == z) { // at the end; append to the existing run + p += n_bytes; n_bytes = 0; + n_bytes2 = rle_enc1(tmp, a, rl); + } else { // break the current run + n_bytes2 = rle_enc1(tmp, c, pre); + n_bytes2 += rle_enc1(tmp + n_bytes2, a, rl); + n_bytes2 += rle_enc1(tmp + n_bytes2, c, l - pre); + } + if (n_bytes != n_bytes2 && end != p + n_bytes) // size changed + memmove(p + n_bytes2, p + n_bytes, end - p - n_bytes); + memcpy(p, tmp, n_bytes2); + diff = n_bytes2 - n_bytes; + } + return (*nptr += diff); +} + +int rle_insert(uint8_t *block, int64_t x, int a, int64_t rl, int64_t cnt[6], const int64_t ec[6]) +{ + int beg = 0; + int64_t bc[6]; + memset(bc, 0, 48); + return rle_insert_cached(block, x, a, rl, cnt, ec, &beg, bc); +} + +void rle_split(uint8_t *block, uint8_t *new_block) +{ + int n = *(uint16_t*)block; + uint8_t *end = block + 2 + n, *q = block + 2 + (n>>1); + while (*q>>6 == 2) --q; + memcpy(new_block + 2, q, end - q); + *(uint16_t*)new_block = end - q; + *(uint16_t*)block = q - block - 2; +} + +void rle_count(const uint8_t *block, int64_t cnt[6]) +{ + const uint8_t *q = block + 2, *end = q + *(uint16_t*)block; + while (q < end) { + int c; + int64_t l; + rle_dec1(q, c, l); + cnt[c] += l; + } +} + +void rle_print(const uint8_t *block, int expand) +{ + const uint16_t *p = (const uint16_t*)block; + const uint8_t *q = block + 2, *end = block + 2 + *p; + while (q < end) { + int c; + int64_t l, x; + rle_dec1(q, c, l); + if (expand) for (x = 0; x < l; ++x) putchar("$ACGTN"[c]); + else printf("%c%ld", "$ACGTN"[c], (long)l); + } + putchar('\n'); +} + +void rle_rank2a(const uint8_t *block, int64_t x, int64_t y, int64_t *cx, int64_t *cy, const int64_t ec[6]) +{ + int a; + int64_t tot, cnt[6]; + const uint8_t *p; + + y = y >= x? y : x; + tot = ec[0] + ec[1] + ec[2] + ec[3] + ec[4] + ec[5]; + if (tot == 0) return; + if (x <= (tot - y) + (tot>>3)) { + int c = 0; + int64_t l, z = 0; + memset(cnt, 0, 48); + p = block + 2; + while (z < x) { + rle_dec1(p, c, l); + z += l; cnt[c] += l; + } + for (a = 0; a != 6; ++a) cx[a] += cnt[a]; + cx[c] -= z - x; + if (cy) { + while (z < y) { + rle_dec1(p, c, l); + z += l; cnt[c] += l; + } + for (a = 0; a != 6; ++a) cy[a] += cnt[a]; + cy[c] -= z - y; + } + } else { +#define move_backward(_x) \ + while (z >= (_x)) { \ + --p; \ + if (*p>>6 != 2) { \ + l |= *p>>7? (int64_t)rle_auxtab[*p>>3&7]>>4 << t : *p>>3; \ + z -= l; cnt[*p&7] -= l; \ + l = 0; t = 0; \ + } else { \ + l |= (*p&0x3fL) << t; \ + t += 6; \ + } \ + } \ + + int t = 0; + int64_t l = 0, z = tot; + memcpy(cnt, ec, 48); + p = block + 2 + *(const uint16_t*)block; + if (cy) { + move_backward(y) + for (a = 0; a != 6; ++a) cy[a] += cnt[a]; + cy[*p&7] += y - z; + } + move_backward(x) + for (a = 0; a != 6; ++a) cx[a] += cnt[a]; + cx[*p&7] += x - z; + +#undef move_backward + } +} diff --git a/rle.h b/rle.h new file mode 100644 index 0000000..0d59484 --- /dev/null +++ b/rle.h @@ -0,0 +1,77 @@ +#ifndef RLE6_H_ +#define RLE6_H_ + +#include + +#ifdef __GNUC__ +#define LIKELY(x) __builtin_expect((x),1) +#else +#define LIKELY(x) (x) +#endif +#ifdef __cplusplus + +extern "C" { +#endif + + int rle_insert_cached(uint8_t *block, int64_t x, int a, int64_t rl, int64_t cnt[6], const int64_t ec[6], int *beg, int64_t bc[6]); + int rle_insert(uint8_t *block, int64_t x, int a, int64_t rl, int64_t cnt[6], const int64_t end_cnt[6]); + void rle_split(uint8_t *block, uint8_t *new_block); + void rle_count(const uint8_t *block, int64_t cnt[6]); + void rle_rank2a(const uint8_t *block, int64_t x, int64_t y, int64_t *cx, int64_t *cy, const int64_t ec[6]); + #define rle_rank1a(block, x, cx, ec) rle_rank2a(block, x, -1, cx, 0, ec) + + void rle_print(const uint8_t *block, int expand); + +#ifdef __cplusplus +} +#endif + +/****************** + *** 43+3 codec *** + ******************/ + +const uint8_t rle_auxtab[8]; + +#define RLE_MIN_SPACE 18 +#define rle_nptr(block) ((uint16_t*)(block)) + +// decode one run (c,l) and move the pointer p +#define rle_dec1(p, c, l) do { \ + (c) = *(p) & 7; \ + if (LIKELY((*(p)&0x80) == 0)) { \ + (l) = *(p)++ >> 3; \ + } else if (LIKELY(*(p)>>5 == 6)) { \ + (l) = (*(p)&0x18L)<<3L | ((p)[1]&0x3fL); \ + (p) += 2; \ + } else { \ + int n = ((*(p)&0x10) >> 2) + 4; \ + (l) = *(p)++ >> 3 & 1; \ + while (--n) (l) = ((l)<<6) | (*(p)++&0x3fL); \ + } \ + } while (0) + +static inline int rle_enc1(uint8_t *p, int c, int64_t l) +{ + if (l < 1LL<<4) { + *p = l << 3 | c; + return 1; + } else if (l < 1LL<<8) { + *p = 0xC0 | l >> 6 << 3 | c; + p[1] = 0x80 | (l & 0x3f); + return 2; + } else if (l < 1LL<<19) { + *p = 0xE0 | l >> 18 << 3 | c; + p[1] = 0x80 | (l >> 12 & 0x3f); + p[2] = 0x80 | (l >> 6 & 0x3f); + p[3] = 0x80 | (l & 0x3f); + return 4; + } else { + int i, shift = 36; + *p = 0xF0 | l >> 42 << 3 | c; + for (i = 1; i < 8; ++i, shift -= 6) + p[i] = 0x80 | (l>>shift & 0x3f); + return 8; + } +} + +#endif diff --git a/rope.c b/rope.c new file mode 100644 index 0000000..3d85981 --- /dev/null +++ b/rope.c @@ -0,0 +1,318 @@ +#include +#include +#include +#include +#include +#include "rle.h" +#include "rope.h" + +/******************* + *** Memory Pool *** + *******************/ + +#define MP_CHUNK_SIZE 0x100000 // 1MB per chunk + +typedef struct { // memory pool for fast and compact memory allocation (no free) + int size, i, n_elems; + int64_t top, max; + uint8_t **mem; +} mempool_t; + +static mempool_t *mp_init(int size) +{ + mempool_t *mp; + mp = calloc(1, sizeof(mempool_t)); + mp->size = size; + mp->i = mp->n_elems = MP_CHUNK_SIZE / size; + mp->top = -1; + return mp; +} + +static void mp_destroy(mempool_t *mp) +{ + int64_t i; + for (i = 0; i <= mp->top; ++i) free(mp->mem[i]); + free(mp->mem); free(mp); +} + +static inline void *mp_alloc(mempool_t *mp) +{ + if (mp->i == mp->n_elems) { + if (++mp->top == mp->max) { + mp->max = mp->max? mp->max<<1 : 1; + mp->mem = realloc(mp->mem, sizeof(void*) * mp->max); + } + mp->mem[mp->top] = calloc(mp->n_elems, mp->size); + mp->i = 0; + } + return mp->mem[mp->top] + (mp->i++) * mp->size; +} + +/*************** + *** B+ rope *** + ***************/ + +rope_t *rope_init(int max_nodes, int block_len) +{ + rope_t *rope; + rope = calloc(1, sizeof(rope_t)); + if (block_len < 32) block_len = 32; + rope->max_nodes = (max_nodes+ 1)>>1<<1; + rope->block_len = (block_len + 7) >> 3 << 3; + rope->node = mp_init(sizeof(rpnode_t) * rope->max_nodes); + rope->leaf = mp_init(rope->block_len); + rope->root = mp_alloc(rope->node); + rope->root->n = 1; + rope->root->is_bottom = 1; + rope->root->p = mp_alloc(rope->leaf); + return rope; +} + +void rope_destroy(rope_t *rope) +{ + mp_destroy(rope->node); + mp_destroy(rope->leaf); + free(rope); +} + +static inline rpnode_t *split_node(rope_t *rope, rpnode_t *u, rpnode_t *v) +{ // split $v's child. $u is the first node in the bucket. $v and $u are in the same bucket. IMPORTANT: there is always enough room in $u + int j, i = v - u; + rpnode_t *w; // $w is the sibling of $v + if (u == 0) { // only happens at the root; add a new root + u = v = mp_alloc(rope->node); + v->n = 1; v->p = rope->root; // the new root has the old root as the only child + memcpy(v->c, rope->c, 48); + for (j = 0; j < 6; ++j) v->l += v->c[j]; + rope->root = v; + } + if (i != u->n - 1) // then make room for a new node + memmove(v + 2, v + 1, sizeof(rpnode_t) * (u->n - i - 1)); + ++u->n; w = v + 1; + memset(w, 0, sizeof(rpnode_t)); + w->p = mp_alloc(u->is_bottom? rope->leaf : rope->node); + if (u->is_bottom) { // we are at the bottom level; $v->p is a string instead of a node + uint8_t *p = (uint8_t*)v->p, *q = (uint8_t*)w->p; + rle_split(p, q); + rle_count(q, w->c); + } else { // $v->p is a node, not a string + rpnode_t *p = v->p, *q = w->p; // $v and $w are siblings and thus $p and $q are cousins + p->n -= rope->max_nodes>>1; + memcpy(q, p + p->n, sizeof(rpnode_t) * (rope->max_nodes>>1)); + q->n = rope->max_nodes>>1; // NB: this line must below memcpy() as $q->n and $q->is_bottom are modified by memcpy() + q->is_bottom = p->is_bottom; + for (i = 0; i < q->n; ++i) + for (j = 0; j < 6; ++j) + w->c[j] += q[i].c[j]; + } + for (j = 0; j < 6; ++j) // compute $w->l and update $v->c + w->l += w->c[j], v->c[j] -= w->c[j]; + v->l -= w->l; // update $v->c + return v; +} + +int64_t rope_insert_run(rope_t *rope, int64_t x, int a, int64_t rl, rpcache_t *cache) +{ // insert $a after $x symbols in $rope and the returns rank(a, x) + rpnode_t *u = 0, *v = 0, *p = rope->root; // $v is the parent of $p; $u and $v are at the same level and $u is the first node in the bucket + int64_t y = 0, z = 0, cnt[6]; + int n_runs; + do { // top-down update. Searching and node splitting are done together in one pass. + if (p->n == rope->max_nodes) { // node is full; split + v = split_node(rope, u, v); // $v points to the parent of $p; when a new root is added, $v points to the root + if (y + v->l < x) // if $v is not long enough after the split, we need to move both $p and its parent $v + y += v->l, z += v->c[a], ++v, p = v->p; + } + u = p; + if (v && x - y > v->l>>1) { // then search backwardly for the right node to descend + p += p->n - 1; y += v->l; z += v->c[a]; + for (; y >= x; --p) y -= p->l, z -= p->c[a]; + ++p; + } else for (; y + p->l < x; ++p) y += p->l, z += p->c[a]; // then search forwardly + assert(p - u < u->n); + if (v) v->c[a] += rl, v->l += rl; // we should not change p->c[a] because this may cause troubles when p's child is split + v = p; p = p->p; // descend + } while (!u->is_bottom); + rope->c[a] += rl; // $rope->c should be updated after the loop as adding a new root needs the old $rope->c counts + if (cache) { + if (cache->p != (uint8_t*)p) memset(cache, 0, sizeof(rpcache_t)); + n_runs = rle_insert_cached((uint8_t*)p, x - y, a, rl, cnt, v->c, &cache->beg, cache->bc); + cache->p = (uint8_t*)p; + } else n_runs = rle_insert((uint8_t*)p, x - y, a, rl, cnt, v->c); + z += cnt[a]; + v->c[a] += rl; v->l += rl; // this should be after rle_insert(); otherwise rle_insert() won't work + if (n_runs + RLE_MIN_SPACE > rope->block_len) { + split_node(rope, u, v); + if (cache) memset(cache, 0, sizeof(rpcache_t)); + } + return z; +} + +static rpnode_t *rope_count_to_leaf(const rope_t *rope, int64_t x, int64_t cx[6], int64_t *rest) +{ + rpnode_t *u, *v = 0, *p = rope->root; + int64_t y = 0; + int a; + + memset(cx, 0, 48); + do { + u = p; + if (v && x - y > v->l>>1) { + p += p->n - 1; y += v->l; + for (a = 0; a != 6; ++a) cx[a] += v->c[a]; + for (; y >= x; --p) { + y -= p->l; + for (a = 0; a != 6; ++a) cx[a] -= p->c[a]; + } + ++p; + } else { + for (; y + p->l < x; ++p) { + y += p->l; + for (a = 0; a != 6; ++a) cx[a] += p->c[a]; + } + } + v = p; p = p->p; + } while (!u->is_bottom); + *rest = x - y; + return v; +} + +void rope_rank2a(const rope_t *rope, int64_t x, int64_t y, int64_t *cx, int64_t *cy) +{ + rpnode_t *v; + int64_t rest; + v = rope_count_to_leaf(rope, x, cx, &rest); + if (y < x || cy == 0) { + rle_rank1a((const uint8_t*)v->p, rest, cx, v->c); + } else if (rest + (y - x) <= v->l) { + memcpy(cy, cx, 48); + rle_rank2a((const uint8_t*)v->p, rest, rest + (y - x), cx, cy, v->c); + } else { + rle_rank1a((const uint8_t*)v->p, rest, cx, v->c); + v = rope_count_to_leaf(rope, y, cy, &rest); + rle_rank1a((const uint8_t*)v->p, rest, cy, v->c); + } +} + +/********************* + *** Rope iterator *** + *********************/ + +void rope_itr_first(const rope_t *rope, rpitr_t *i) +{ + memset(i, 0, sizeof(rpitr_t)); + i->rope = rope; + for (i->pa[i->d] = rope->root; !i->pa[i->d]->is_bottom;) // descend to the leftmost leaf + ++i->d, i->pa[i->d] = i->pa[i->d - 1]->p; +} + +const uint8_t *rope_itr_next_block(rpitr_t *i) +{ + const uint8_t *ret; + assert(i->d < ROPE_MAX_DEPTH); // a B+ tree should not be that tall + if (i->d < 0) return 0; + ret = (uint8_t*)i->pa[i->d][i->ia[i->d]].p; + while (i->d >= 0 && ++i->ia[i->d] == i->pa[i->d]->n) i->ia[i->d--] = 0; // backtracking + if (i->d >= 0) + while (!i->pa[i->d]->is_bottom) // descend to the leftmost leaf + ++i->d, i->pa[i->d] = i->pa[i->d - 1][i->ia[i->d - 1]].p; + return ret; +} + +/*********** + *** I/O *** + ***********/ + +void rope_print_node(const rpnode_t *p) +{ + if (p->is_bottom) { + int i; + putchar('('); + for (i = 0; i < p->n; ++i) { + uint8_t *block = (uint8_t*)p[i].p; + const uint8_t *q = block + 2, *end = block + 2 + *rle_nptr(block); + if (i) putchar(','); + while (q < end) { + int c = 0; + int64_t j, l; + rle_dec1(q, c, l); + for (j = 0; j < l; ++j) putchar("$ACGTN"[c]); + } + } + putchar(')'); + } else { + int i; + putchar('('); + for (i = 0; i < p->n; ++i) { + if (i) putchar(','); + rope_print_node(p[i].p); + } + putchar(')'); + } +} + +void rope_dump_node(const rpnode_t *p, FILE *fp) +{ + int16_t i, n = p->n; + uint8_t is_bottom = p->is_bottom; + fwrite(&is_bottom, 1, 1, fp); + fwrite(&n, 2, 1, fp); + if (is_bottom) { + for (i = 0; i < n; ++i) { + fwrite(p[i].c, 8, 6, fp); + fwrite(p[i].p, 1, *rle_nptr(p[i].p) + 2, fp); + } + } else { + for (i = 0; i < p->n; ++i) + rope_dump_node(p[i].p, fp); + } +} + +void rope_dump(const rope_t *r, FILE *fp) +{ + fwrite(&r->max_nodes, 4, 1, fp); + fwrite(&r->block_len, 4, 1, fp); + rope_dump_node(r->root, fp); +} + +rpnode_t *rope_restore_node(const rope_t *r, FILE *fp, int64_t c[6]) +{ + uint8_t is_bottom, a; + int16_t i, n; + rpnode_t *p; + fread(&is_bottom, 1, 1, fp); + fread(&n, 2, 1, fp); + p = mp_alloc(r->node); + p->is_bottom = is_bottom, p->n = n; + if (is_bottom) { + for (i = 0; i < n; ++i) { + uint16_t *q; + p[i].p = mp_alloc(r->leaf); + q = rle_nptr(p[i].p); + fread(p[i].c, 8, 6, fp); + fread(q, 2, 1, fp); + fread(q + 1, 1, *q, fp); + } + } else { + for (i = 0; i < n; ++i) + p[i].p = rope_restore_node(r, fp, p[i].c); + } + memset(c, 0, 48); + for (i = 0; i < n; ++i) { + p[i].l = 0; + for (a = 0; a < 6; ++a) + c[a] += p[i].c[a], p[i].l += p[i].c[a]; + } + return p; +} + +rope_t *rope_restore(FILE *fp) +{ + rope_t *r; + r = calloc(1, sizeof(rope_t)); + fread(&r->max_nodes, 4, 1, fp); + fread(&r->block_len, 4, 1, fp); + r->node = mp_init(sizeof(rpnode_t) * r->max_nodes); + r->leaf = mp_init(r->block_len); + r->root = rope_restore_node(r, fp, r->c); + return r; +} diff --git a/rope.h b/rope.h new file mode 100644 index 0000000..843a408 --- /dev/null +++ b/rope.h @@ -0,0 +1,58 @@ +#ifndef ROPE_H_ +#define ROPE_H_ + +#include +#include + +#define ROPE_MAX_DEPTH 80 +#define ROPE_DEF_MAX_NODES 64 +#define ROPE_DEF_BLOCK_LEN 512 + +typedef struct rpnode_s { + struct rpnode_s *p; // child; at the bottom level, $p points to a string with the first 2 bytes giving the number of runs (#runs) + uint64_t l:54, n:9, is_bottom:1; // $n and $is_bottom are only set for the first node in a bucket + int64_t c[6]; // marginal counts +} rpnode_t; + +typedef struct { + int32_t max_nodes, block_len; // both MUST BE even numbers + int64_t c[6]; // marginal counts + rpnode_t *root; + void *node, *leaf; // memory pool +} rope_t; + +typedef struct { + const rope_t *rope; // the rope + const rpnode_t *pa[ROPE_MAX_DEPTH]; // parent nodes + int ia[ROPE_MAX_DEPTH]; // index in the parent nodes + int d; // the current depth in the B+-tree +} rpitr_t; + +typedef struct { + int beg; + int64_t bc[6]; + uint8_t *p; +} rpcache_t; + +#ifdef __cplusplus +extern "C" { +#endif + + rope_t *rope_init(int max_nodes, int block_len); + void rope_destroy(rope_t *rope); + int64_t rope_insert_run(rope_t *rope, int64_t x, int a, int64_t rl, rpcache_t *cache); + void rope_rank2a(const rope_t *rope, int64_t x, int64_t y, int64_t *cx, int64_t *cy); + #define rope_rank1a(rope, x, cx) rope_rank2a(rope, x, -1, cx, 0) + + void rope_itr_first(const rope_t *rope, rpitr_t *i); + const uint8_t *rope_itr_next_block(rpitr_t *i); + + void rope_print_node(const rpnode_t *p); + void rope_dump(const rope_t *r, FILE *fp); + rope_t *rope_restore(FILE *fp); + +#ifdef __cplusplus +} +#endif + +#endif