diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..a957929 --- /dev/null +++ b/Makefile @@ -0,0 +1,41 @@ +CC= gcc +CFLAGS= -g -Wall -O2 -Wc++-compat -Wno-unused-function +CPPFLAGS= +INCLUDES= -I. +OBJS= kalloc.o kthread.o bseq.o sketch.o sdust.o index.o +PROG= minimap2 +PROG_EXTRA= sdust +LIBS= -lm -lz -lpthread + +.SUFFIXES:.c .o + +.c.o: + $(CC) -c $(CFLAGS) $(CPPFLAGS) $(INCLUDES) $< -o $@ + +all:$(PROG) + +extra:all $(PROG_EXTRA) + +minimap2:libminimap2.a + $(CC) $(CFLAGS) $< -o $@ -L. -lminimap2 $(LIBS) + +libminimap2.a:$(OBJS) + $(AR) -csru $@ $(OBJS) + +sdust:sdust.c kalloc.o kdq.h kvec.h kseq.h sdust.h + $(CC) -D_SDUST_MAIN $(CFLAGS) $< kalloc.o -o $@ -lz + +clean: + rm -fr gmon.out *.o a.out $(PROG) $(PROG_EXTRA) *~ *.a *.dSYM session* + +depend: + (LC_ALL=C; export LC_ALL; makedepend -Y -- $(CFLAGS) $(DFLAGS) -- *.c) + +# DO NOT DELETE + +bseq.o: bseq.h kseq.h +index.o: kthread.h bseq.h minimap.h kvec.h kalloc.h khash.h +kalloc.o: kalloc.h +misc.o: minimap.h ksort.h +sdust.o: kalloc.h kdq.h kvec.h sdust.h +sketch.o: kvec.h kalloc.h minimap.h diff --git a/bseq.c b/bseq.c index 3cde6c8..3179fa1 100644 --- a/bseq.c +++ b/bseq.c @@ -7,8 +7,6 @@ #include "kseq.h" KSEQ_INIT(gzFile, gzread) -extern unsigned char seq_nt4_table[256]; - struct bseq_file_s { int is_eof; gzFile fp; diff --git a/bseq.h b/bseq.h index daf1fc5..0595d98 100644 --- a/bseq.h +++ b/bseq.h @@ -16,4 +16,6 @@ void bseq_close(bseq_file_t *fp); bseq1_t *bseq_read(bseq_file_t *fp, int chunk_size, int *n_); int bseq_eof(bseq_file_t *fp); +extern unsigned char seq_nt4_table[256]; + #endif diff --git a/index.c b/index.c new file mode 100644 index 0000000..52af8e4 --- /dev/null +++ b/index.c @@ -0,0 +1,355 @@ +#include +#include +#include +#include "kthread.h" +#include "bseq.h" +#include "minimap.h" +#include "kvec.h" +#include "khash.h" + +#define idx_hash(a) ((a)>>1) +#define idx_eq(a, b) ((a)>>1 == (b)>>1) +KHASH_INIT(idx, uint64_t, uint64_t, 1, idx_hash, idx_eq) +typedef khash_t(idx) idxhash_t; + +#define kroundup64(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, (x)|=(x)>>32, ++(x)) + +mm_idx_t *mm_idx_init(int w, int k, int b) +{ + mm_idx_t *mi; + if (k*2 < b) b = k * 2; + if (w < 1) w = 1; + mi = (mm_idx_t*)calloc(1, sizeof(mm_idx_t)); + mi->w = w, mi->k = k, mi->b = b; + mi->B = (mm_idx_bucket_t*)calloc(1<b; ++i) { + free(mi->B[i].p); + free(mi->B[i].a.a); + kh_destroy(idx, (idxhash_t*)mi->B[i].h); + } + for (i = 0; i < mi->n_seq; ++i) + free(mi->seq[i].name); + free(mi->seq); free(mi->B); free(mi); +} + +const uint64_t *mm_idx_get(const mm_idx_t *mi, uint64_t minier, int *n) +{ + int mask = (1<b) - 1; + khint_t k; + mm_idx_bucket_t *b = &mi->B[minier&mask]; + idxhash_t *h = (idxhash_t*)b->h; + *n = 0; + if (h == 0) return 0; + k = kh_get(idx, h, minier>>mi->b<<1); + if (k == kh_end(h)) return 0; + if (kh_key(h, k)&1) { + *n = 1; + return &kh_val(h, k); + } else { + *n = (uint32_t)kh_val(h, k); + return &b->p[kh_val(h, k)>>32]; + } +} + +uint32_t mm_idx_cal_max_occ(const mm_idx_t *mi, float f) +{ + int i; + size_t n = 0; + uint32_t thres; + khint_t *a, k; + if (f <= 0.) return UINT32_MAX; + for (i = 0; i < 1<b; ++i) + if (mi->B[i].h) n += kh_size((idxhash_t*)mi->B[i].h); + a = (uint32_t*)malloc(n * 4); + for (i = n = 0; i < 1<b; ++i) { + idxhash_t *h = (idxhash_t*)mi->B[i].h; + if (h == 0) continue; + for (k = 0; k < kh_end(h); ++k) { + if (!kh_exist(h, k)) continue; + a[n++] = kh_key(h, k)&1? 1 : (uint32_t)kh_val(h, k); + } + } + thres = ks_ksmall_uint32_t(n, a, (uint32_t)((1. - f) * n)) + 1; + free(a); + return thres; +} + +/********************************* + * Sort and generate hash tables * + *********************************/ + +static void worker_post(void *g, long i, int tid) +{ + int j, start_a, start_p, n, n_keys; + idxhash_t *h; + mm_idx_t *mi = (mm_idx_t*)g; + mm_idx_bucket_t *b = &mi->B[i]; + if (b->a.n == 0) return; + + // sort by minimizer + radix_sort_128x(b->a.a, b->a.a + b->a.n); + + // count and preallocate + for (j = 1, n = 1, n_keys = 0, b->n = 0; j <= b->a.n; ++j) { + if (j == b->a.n || b->a.a[j].x>>8 != b->a.a[j-1].x>>8) { + ++n_keys; + if (n > 1) b->n += n; + n = 1; + } else ++n; + } + h = kh_init(idx); + kh_resize(idx, h, n_keys); + b->p = (uint64_t*)calloc(b->n, 8); + + // create the hash table + for (j = 1, n = 1, start_a = start_p = 0; j <= b->a.n; ++j) { + if (j == b->a.n || b->a.a[j].x>>8 != b->a.a[j-1].x>>8) { + khint_t itr; + int absent; + mm128_t *p = &b->a.a[j-1]; + itr = kh_put(idx, h, p->x>>8>>mi->b<<1, &absent); + assert(absent && j - start_a == n); + if (n == 1) { + kh_key(h, itr) |= 1; + kh_val(h, itr) = p->y; + } else { + int k; + for (k = 0; k < n; ++k) + b->p[start_p + k] = b->a.a[start_a + k].y; + kh_val(h, itr) = (uint64_t)start_p<<32 | n; + start_p += n; + } + start_a = j, n = 1; + } else ++n; + } + b->h = h; + assert(b->n == start_p); + + // deallocate and clear b->a + free(b->a.a); + b->a.n = b->a.m = 0, b->a.a = 0; +} + +static void mm_idx_post(mm_idx_t *mi, int n_threads) +{ + kt_for(n_threads, worker_post, mi, 1<b); +} + +/****************** + * Generate index * + ******************/ + +#include +#include +#include "bseq.h" + +typedef struct { + int mini_batch_size, keep_name, is_hpc; + uint64_t batch_size; + bseq_file_t *fp; + mm_idx_t *mi; +} pipeline_t; + +typedef struct { + int n_seq; + bseq1_t *seq; + mm128_v a; +} step_t; + +static void mm_idx_add(mm_idx_t *mi, int n, const mm128_t *a) +{ + int i, mask = (1<b) - 1; + for (i = 0; i < n; ++i) { + mm128_v *p = &mi->B[a[i].x&mask].a; + kv_push(mm128_t, 0, *p, a[i]); + } +} + +static void *worker_pipeline(void *shared, int step, void *in) +{ + int i; + pipeline_t *p = (pipeline_t*)shared; + if (step == 0) { // step 0: read sequences + step_t *s; + if (p->mi->sum_len > p->batch_size) return 0; + s = (step_t*)calloc(1, sizeof(step_t)); + s->seq = bseq_read(p->fp, p->mini_batch_size, &s->n_seq); // read a mini-batch + if (s->seq) { + uint32_t old_m, m; + uint64_t sum_len, old_max_len, max_len; + assert((uint64_t)p->mi->n_seq + s->n_seq <= UINT32_MAX); // to prevent integer overflow + // make room for p->mi->seq + old_m = p->mi->n_seq, m = p->mi->n_seq + s->n_seq; + kroundup32(m); kroundup32(old_m); + if (old_m != m) + p->mi->seq = (mm_idx_seq_t*)realloc(p->mi->seq, m * sizeof(mm_idx_seq_t)); + // make room for p->mi->S + for (i = 0, sum_len = 0; i < s->n_seq; ++i) sum_len += s->seq[i].l_seq; + old_max_len = (p->mi->sum_len + 7) / 8; + max_len = (p->mi->sum_len + sum_len + 7) / 8; + kroundup64(old_max_len); kroundup64(max_len); + if (old_max_len != max_len) { + p->mi->S = (uint32_t*)realloc(p->mi->S, max_len * 4); + memset(&p->mi->S[old_max_len], 0, 4 * (max_len - old_max_len)); + } + // populate p->mi->seq + for (i = 0; i < s->n_seq; ++i) { + mm_idx_seq_t *seq = &p->mi->seq[p->mi->n_seq++]; + uint32_t j; + if (p->keep_name) { + assert(strlen(s->seq[i].name) <= 254); // a long query name breaks BAM + seq->name = strdup(s->seq[i].name); + } else seq->name = 0; + seq->len = s->seq[i].l_seq; + seq->offset = p->mi->sum_len; + // copy the sequence + for (j = 0; j < seq->len; ++j) { // TODO: this is not the fastest way, but let's first see if speed matters here + uint64_t o = p->mi->sum_len + j; + int c = seq_nt4_table[(uint8_t)s->seq[i].seq[j]]; + mm_seq4_set(p->mi->S, o, c); + } + // update p->mi->{sum_len,n_seq} + p->mi->sum_len += seq->len; + s->seq[i].rid = p->mi->n_seq++; + } + return s; + } else free(s); + } else if (step == 1) { // step 1: compute sketch + step_t *s = (step_t*)in; + for (i = 0; i < s->n_seq; ++i) { + bseq1_t *t = &s->seq[i]; + mm_sketch(0, t->seq, t->l_seq, p->mi->w, p->mi->k, t->rid, p->is_hpc, &s->a); + free(t->seq); free(t->name); + } + free(s->seq); s->seq = 0; + return s; + } else if (step == 2) { // dispatch sketch to buckets + step_t *s = (step_t*)in; + mm_idx_add(p->mi, s->a.n, s->a.a); + free(s->a.a); free(s); + } + return 0; +} + +mm_idx_t *mm_idx_gen(bseq_file_t *fp, int w, int k, int b, int is_hpc, int mini_batch_size, int n_threads, uint64_t batch_size, int keep_name) +{ + pipeline_t pl; + memset(&pl, 0, sizeof(pipeline_t)); + pl.mini_batch_size = mini_batch_size; + pl.keep_name = keep_name; + pl.is_hpc = is_hpc; + pl.batch_size = batch_size; + pl.fp = fp; + if (pl.fp == 0) return 0; + pl.mi = mm_idx_init(w, k, b); + + kt_pipeline(n_threads < 3? n_threads : 3, worker_pipeline, &pl, 3); + if (mm_verbose >= 3) + fprintf(stderr, "[M::%s::%.3f*%.2f] collected minimizers\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0)); + + mm_idx_post(pl.mi, n_threads); + if (mm_verbose >= 3) + fprintf(stderr, "[M::%s::%.3f*%.2f] sorted minimizers\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0)); + + return pl.mi; +} + +mm_idx_t *mm_idx_build(const char *fn, int w, int k, int is_hpc, int n_threads) // a simpler interface +{ + bseq_file_t *fp; + mm_idx_t *mi; + fp = bseq_open(fn); + if (fp == 0) return 0; + mi = mm_idx_gen(fp, w, k, MM_IDX_DEF_B, is_hpc, 1<<18, n_threads, UINT64_MAX, 1); + bseq_close(fp); + return mi; +} + +/************* + * index I/O * + *************/ + +#define MM_IDX_MAGIC "MMI\2" + +void mm_idx_dump(FILE *fp, const mm_idx_t *mi) +{ + uint32_t x[4]; + int i; + x[0] = mi->w, x[1] = mi->k, x[2] = mi->b, x[3] = mi->n_seq; + fwrite(MM_IDX_MAGIC, 1, 4, fp); + fwrite(x, 4, 4, fp); + for (i = 0; i < mi->n_seq; ++i) { + uint8_t l; + l = strlen(mi->seq[i].name); + fwrite(&l, 1, 1, fp); + fwrite(mi->seq[i].name, 1, l, fp); + fwrite(&mi->seq[i].len, 4, 1, fp); + } + for (i = 0; i < 1<b; ++i) { + mm_idx_bucket_t *b = &mi->B[i]; + khint_t k; + idxhash_t *h = (idxhash_t*)b->h; + uint32_t size = h? h->size : 0; + fwrite(&b->n, 4, 1, fp); + fwrite(b->p, 8, b->n, fp); + fwrite(&size, 4, 1, fp); + if (size == 0) continue; + for (k = 0; k < kh_end(h); ++k) { + uint64_t x[2]; + if (!kh_exist(h, k)) continue; + x[0] = kh_key(h, k), x[1] = kh_val(h, k); + fwrite(x, 8, 2, fp); + } + } +} + +mm_idx_t *mm_idx_load(FILE *fp) +{ + int i; + char magic[4]; + uint32_t x[4]; + mm_idx_t *mi; + if (fread(magic, 1, 4, fp) != 4) return 0; + if (strncmp(magic, MM_IDX_MAGIC, 4) != 0) return 0; + if (fread(x, 4, 4, fp) != 6) return 0; + mi = mm_idx_init(x[0], x[1], x[2]); + mi->n_seq = x[3]; + mi->seq = (mm_idx_seq_t*)calloc(mi->n_seq, sizeof(mm_idx_seq_t)); + for (i = 0; i < mi->n_seq; ++i) { + uint8_t l; + fread(&l, 1, 1, fp); + mi->seq[i].name = (char*)malloc(l + 1); + fread(mi->seq[i].name, 1, l, fp); + mi->seq[i].name[l] = 0; + fread(&mi->seq[i].len, 4, 1, fp); + } + for (i = 0; i < 1<b; ++i) { + mm_idx_bucket_t *b = &mi->B[i]; + uint32_t j, size; + khint_t k; + idxhash_t *h; + fread(&b->n, 4, 1, fp); + b->p = (uint64_t*)malloc(b->n * 8); + fread(b->p, 8, b->n, fp); + fread(&size, 4, 1, fp); + if (size == 0) continue; + b->h = h = kh_init(idx); + kh_resize(idx, h, size); + for (j = 0; j < size; ++j) { + uint64_t x[2]; + int absent; + fread(x, 8, 2, fp); + k = kh_put(idx, h, x[0], &absent); + assert(absent); + kh_val(h, k) = x[1]; + } + } + return mi; +} diff --git a/khash.h b/khash.h new file mode 100644 index 0000000..6373a93 --- /dev/null +++ b/khash.h @@ -0,0 +1,615 @@ +/* The MIT License + + Copyright (c) 2008, 2009, 2011 by Attractive Chaos + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +/* + An example: + +#include "khash.h" +KHASH_MAP_INIT_INT(32, char) +int main() { + int ret, is_missing; + khiter_t k; + khash_t(32) *h = kh_init(32); + k = kh_put(32, h, 5, &ret); + kh_value(h, k) = 10; + k = kh_get(32, h, 10); + is_missing = (k == kh_end(h)); + k = kh_get(32, h, 5); + kh_del(32, h, k); + for (k = kh_begin(h); k != kh_end(h); ++k) + if (kh_exist(h, k)) kh_value(h, k) = 1; + kh_destroy(32, h); + return 0; +} +*/ + +/* + 2013-05-02 (0.2.8): + + * Use quadratic probing. When the capacity is power of 2, stepping function + i*(i+1)/2 guarantees to traverse each bucket. It is better than double + hashing on cache performance and is more robust than linear probing. + + In theory, double hashing should be more robust than quadratic probing. + However, my implementation is probably not for large hash tables, because + the second hash function is closely tied to the first hash function, + which reduce the effectiveness of double hashing. + + Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php + + 2011-12-29 (0.2.7): + + * Minor code clean up; no actual effect. + + 2011-09-16 (0.2.6): + + * The capacity is a power of 2. This seems to dramatically improve the + speed for simple keys. Thank Zilong Tan for the suggestion. Reference: + + - http://code.google.com/p/ulib/ + - http://nothings.org/computer/judy/ + + * Allow to optionally use linear probing which usually has better + performance for random input. Double hashing is still the default as it + is more robust to certain non-random input. + + * Added Wang's integer hash function (not used by default). This hash + function is more robust to certain non-random input. + + 2011-02-14 (0.2.5): + + * Allow to declare global functions. + + 2009-09-26 (0.2.4): + + * Improve portability + + 2008-09-19 (0.2.3): + + * Corrected the example + * Improved interfaces + + 2008-09-11 (0.2.2): + + * Improved speed a little in kh_put() + + 2008-09-10 (0.2.1): + + * Added kh_clear() + * Fixed a compiling error + + 2008-09-02 (0.2.0): + + * Changed to token concatenation which increases flexibility. + + 2008-08-31 (0.1.2): + + * Fixed a bug in kh_get(), which has not been tested previously. + + 2008-08-31 (0.1.1): + + * Added destructor +*/ + + +#ifndef __AC_KHASH_H +#define __AC_KHASH_H + +/*! + @header + + Generic hash table library. + */ + +#define AC_VERSION_KHASH_H "0.2.8" + +#include +#include +#include +#include "kalloc.h" + +/* compiler specific configuration */ + +#if UINT_MAX == 0xffffffffu +typedef unsigned int khint32_t; +#elif ULONG_MAX == 0xffffffffu +typedef unsigned long khint32_t; +#endif + +#if ULONG_MAX == ULLONG_MAX +typedef unsigned long khint64_t; +#else +typedef unsigned long long khint64_t; +#endif + +#ifndef kh_inline +#ifdef _MSC_VER +#define kh_inline __inline +#else +#define kh_inline inline +#endif +#endif /* kh_inline */ + +#ifndef klib_unused +#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3) +#define klib_unused __attribute__ ((__unused__)) +#else +#define klib_unused +#endif +#endif /* klib_unused */ + +typedef khint32_t khint_t; +typedef khint_t khiter_t; + +#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2) +#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1) +#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3) +#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1))) +#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1))) +#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1))) +#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1)) + +#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4) + +#ifndef kroundup32 +#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) +#endif + +static const double __ac_HASH_UPPER = 0.77; + +#define __KHASH_TYPE(name, khkey_t, khval_t) \ + typedef struct kh_##name##_s { \ + khint_t n_buckets, size, n_occupied, upper_bound; \ + khint32_t *flags; \ + khkey_t *keys; \ + khval_t *vals; \ + } kh_##name##_t; + +#define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \ + extern kh_##name##_t *kh_init_##name(void); \ + extern void kh_destroy_##name(kh_##name##_t *h); \ + extern void kh_clear_##name(kh_##name##_t *h); \ + extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \ + extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \ + extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \ + extern void kh_del_##name(kh_##name##_t *h, khint_t x); + +#define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + SCOPE kh_##name##_t *kh_init_##name(void) { \ + return (kh_##name##_t*)kcalloc(0, 1, sizeof(kh_##name##_t)); \ + } \ + SCOPE void kh_destroy_##name(kh_##name##_t *h) \ + { \ + if (h) { \ + kfree(0, (void *)h->keys); kfree(0, h->flags); \ + kfree(0, (void *)h->vals); \ + kfree(0, h); \ + } \ + } \ + SCOPE void kh_clear_##name(kh_##name##_t *h) \ + { \ + if (h && h->flags) { \ + memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \ + h->size = h->n_occupied = 0; \ + } \ + } \ + SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \ + { \ + if (h->n_buckets) { \ + khint_t k, i, last, mask, step = 0; \ + mask = h->n_buckets - 1; \ + k = __hash_func(key); i = k & mask; \ + last = i; \ + while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ + i = (i + (++step)) & mask; \ + if (i == last) return h->n_buckets; \ + } \ + return __ac_iseither(h->flags, i)? h->n_buckets : i; \ + } else return 0; \ + } \ + SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \ + { /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \ + khint32_t *new_flags = 0; \ + khint_t j = 1; \ + { \ + kroundup32(new_n_buckets); \ + if (new_n_buckets < 4) new_n_buckets = 4; \ + if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \ + else { /* hash table size to be changed (shrink or expand); rehash */ \ + new_flags = (khint32_t*)kmalloc(0, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ + if (!new_flags) return -1; \ + memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ + if (h->n_buckets < new_n_buckets) { /* expand */ \ + khkey_t *new_keys = (khkey_t*)krealloc(0, (void *)h->keys, new_n_buckets * sizeof(khkey_t)); \ + if (!new_keys) { kfree(0, new_flags); return -1; } \ + h->keys = new_keys; \ + if (kh_is_map) { \ + khval_t *new_vals = (khval_t*)krealloc(0, (void *)h->vals, new_n_buckets * sizeof(khval_t)); \ + if (!new_vals) { kfree(0, new_flags); return -1; } \ + h->vals = new_vals; \ + } \ + } /* otherwise shrink */ \ + } \ + } \ + if (j) { /* rehashing is needed */ \ + for (j = 0; j != h->n_buckets; ++j) { \ + if (__ac_iseither(h->flags, j) == 0) { \ + khkey_t key = h->keys[j]; \ + khval_t val; \ + khint_t new_mask; \ + new_mask = new_n_buckets - 1; \ + if (kh_is_map) val = h->vals[j]; \ + __ac_set_isdel_true(h->flags, j); \ + while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ + khint_t k, i, step = 0; \ + k = __hash_func(key); \ + i = k & new_mask; \ + while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \ + __ac_set_isempty_false(new_flags, i); \ + if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \ + { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ + if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \ + __ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \ + } else { /* write the element and jump out of the loop */ \ + h->keys[i] = key; \ + if (kh_is_map) h->vals[i] = val; \ + break; \ + } \ + } \ + } \ + } \ + if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \ + h->keys = (khkey_t*)krealloc(0, (void *)h->keys, new_n_buckets * sizeof(khkey_t)); \ + if (kh_is_map) h->vals = (khval_t*)krealloc(0, (void *)h->vals, new_n_buckets * sizeof(khval_t)); \ + } \ + kfree(0, h->flags); /* free the working space */ \ + h->flags = new_flags; \ + h->n_buckets = new_n_buckets; \ + h->n_occupied = h->size; \ + h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \ + } \ + return 0; \ + } \ + SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \ + { \ + khint_t x; \ + if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \ + if (h->n_buckets > (h->size<<1)) { \ + if (kh_resize_##name(h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */ \ + *ret = -1; return h->n_buckets; \ + } \ + } else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the hash table */ \ + *ret = -1; return h->n_buckets; \ + } \ + } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \ + { \ + khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \ + x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \ + if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \ + else { \ + last = i; \ + while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ + if (__ac_isdel(h->flags, i)) site = i; \ + i = (i + (++step)) & mask; \ + if (i == last) { x = site; break; } \ + } \ + if (x == h->n_buckets) { \ + if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \ + else x = i; \ + } \ + } \ + } \ + if (__ac_isempty(h->flags, x)) { /* not present at all */ \ + h->keys[x] = key; \ + __ac_set_isboth_false(h->flags, x); \ + ++h->size; ++h->n_occupied; \ + *ret = 1; \ + } else if (__ac_isdel(h->flags, x)) { /* deleted */ \ + h->keys[x] = key; \ + __ac_set_isboth_false(h->flags, x); \ + ++h->size; \ + *ret = 2; \ + } else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \ + return x; \ + } \ + SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \ + { \ + if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \ + __ac_set_isdel_true(h->flags, x); \ + --h->size; \ + } \ + } + +#define KHASH_DECLARE(name, khkey_t, khval_t) \ + __KHASH_TYPE(name, khkey_t, khval_t) \ + __KHASH_PROTOTYPES(name, khkey_t, khval_t) + +#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + __KHASH_TYPE(name, khkey_t, khval_t) \ + __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) + +#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + KHASH_INIT2(name, static kh_inline klib_unused, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) + +/* --- BEGIN OF HASH FUNCTIONS --- */ + +/*! @function + @abstract Integer hash function + @param key The integer [khint32_t] + @return The hash value [khint_t] + */ +#define kh_int_hash_func(key) (khint32_t)(key) +/*! @function + @abstract Integer comparison function + */ +#define kh_int_hash_equal(a, b) ((a) == (b)) +/*! @function + @abstract 64-bit integer hash function + @param key The integer [khint64_t] + @return The hash value [khint_t] + */ +#define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11) +/*! @function + @abstract 64-bit integer comparison function + */ +#define kh_int64_hash_equal(a, b) ((a) == (b)) +/*! @function + @abstract const char* hash function + @param s Pointer to a null terminated string + @return The hash value + */ +static kh_inline khint_t __ac_X31_hash_string(const char *s) +{ + khint_t h = (khint_t)*s; + if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s; + return h; +} +/*! @function + @abstract Another interface to const char* hash function + @param key Pointer to a null terminated string [const char*] + @return The hash value [khint_t] + */ +#define kh_str_hash_func(key) __ac_X31_hash_string(key) +/*! @function + @abstract Const char* comparison function + */ +#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0) + +static kh_inline khint_t __ac_Wang_hash(khint_t key) +{ + key += ~(key << 15); + key ^= (key >> 10); + key += (key << 3); + key ^= (key >> 6); + key += ~(key << 11); + key ^= (key >> 16); + return key; +} +#define kh_int_hash_func2(key) __ac_Wang_hash((khint_t)key) + +/* --- END OF HASH FUNCTIONS --- */ + +/* Other convenient macros... */ + +/*! + @abstract Type of the hash table. + @param name Name of the hash table [symbol] + */ +#define khash_t(name) kh_##name##_t + +/*! @function + @abstract Initiate a hash table. + @param name Name of the hash table [symbol] + @return Pointer to the hash table [khash_t(name)*] + */ +#define kh_init(name) kh_init_##name() + +/*! @function + @abstract Destroy a hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + */ +#define kh_destroy(name, h) kh_destroy_##name(h) + +/*! @function + @abstract Reset a hash table without deallocating memory. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + */ +#define kh_clear(name, h) kh_clear_##name(h) + +/*! @function + @abstract Resize a hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param s New size [khint_t] + */ +#define kh_resize(name, h, s) kh_resize_##name(h, s) + +/*! @function + @abstract Insert a key to the hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param k Key [type of keys] + @param r Extra return code: -1 if the operation failed; + 0 if the key is present in the hash table; + 1 if the bucket is empty (never used); 2 if the element in + the bucket has been deleted [int*] + @return Iterator to the inserted element [khint_t] + */ +#define kh_put(name, h, k, r) kh_put_##name(h, k, r) + +/*! @function + @abstract Retrieve a key from the hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param k Key [type of keys] + @return Iterator to the found element, or kh_end(h) if the element is absent [khint_t] + */ +#define kh_get(name, h, k) kh_get_##name(h, k) + +/*! @function + @abstract Remove a key from the hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param k Iterator to the element to be deleted [khint_t] + */ +#define kh_del(name, h, k) kh_del_##name(h, k) + +/*! @function + @abstract Test whether a bucket contains data. + @param h Pointer to the hash table [khash_t(name)*] + @param x Iterator to the bucket [khint_t] + @return 1 if containing data; 0 otherwise [int] + */ +#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x))) + +/*! @function + @abstract Get key given an iterator + @param h Pointer to the hash table [khash_t(name)*] + @param x Iterator to the bucket [khint_t] + @return Key [type of keys] + */ +#define kh_key(h, x) ((h)->keys[x]) + +/*! @function + @abstract Get value given an iterator + @param h Pointer to the hash table [khash_t(name)*] + @param x Iterator to the bucket [khint_t] + @return Value [type of values] + @discussion For hash sets, calling this results in segfault. + */ +#define kh_val(h, x) ((h)->vals[x]) + +/*! @function + @abstract Alias of kh_val() + */ +#define kh_value(h, x) ((h)->vals[x]) + +/*! @function + @abstract Get the start iterator + @param h Pointer to the hash table [khash_t(name)*] + @return The start iterator [khint_t] + */ +#define kh_begin(h) (khint_t)(0) + +/*! @function + @abstract Get the end iterator + @param h Pointer to the hash table [khash_t(name)*] + @return The end iterator [khint_t] + */ +#define kh_end(h) ((h)->n_buckets) + +/*! @function + @abstract Get the number of elements in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @return Number of elements in the hash table [khint_t] + */ +#define kh_size(h) ((h)->size) + +/*! @function + @abstract Get the number of buckets in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @return Number of buckets in the hash table [khint_t] + */ +#define kh_n_buckets(h) ((h)->n_buckets) + +/*! @function + @abstract Iterate over the entries in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @param kvar Variable to which key will be assigned + @param vvar Variable to which value will be assigned + @param code Block of code to execute + */ +#define kh_foreach(h, kvar, vvar, code) { khint_t __i; \ + for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ + if (!kh_exist(h,__i)) continue; \ + (kvar) = kh_key(h,__i); \ + (vvar) = kh_val(h,__i); \ + code; \ + } } + +/*! @function + @abstract Iterate over the values in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @param vvar Variable to which value will be assigned + @param code Block of code to execute + */ +#define kh_foreach_value(h, vvar, code) { khint_t __i; \ + for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ + if (!kh_exist(h,__i)) continue; \ + (vvar) = kh_val(h,__i); \ + code; \ + } } + +/* More conenient interfaces */ + +/*! @function + @abstract Instantiate a hash set containing integer keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_INT(name) \ + KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing integer keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_INT(name, khval_t) \ + KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing 64-bit integer keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_INT64(name) \ + KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing 64-bit integer keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_INT64(name, khval_t) \ + KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal) + +typedef const char *kh_cstr_t; +/*! @function + @abstract Instantiate a hash map containing const char* keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_STR(name) \ + KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing const char* keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_STR(name, khval_t) \ + KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal) + +#endif /* __AC_KHASH_H */ diff --git a/ksort.h b/ksort.h new file mode 100644 index 0000000..b2ffc1a --- /dev/null +++ b/ksort.h @@ -0,0 +1,159 @@ +/* The MIT License + + Copyright (c) 2008, 2011 Attractive Chaos + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +// This is a simplified version of ksort.h + +#ifndef AC_KSORT_H +#define AC_KSORT_H + +#include +#include + +typedef struct { + void *left, *right; + int depth; +} ks_isort_stack_t; + +#define KSORT_SWAP(type_t, a, b) { register type_t t=(a); (a)=(b); (b)=t; } + +#define KSORT_INIT(name, type_t, __sort_lt) \ + size_t ks_lis_##name(size_t n, const type_t *a, size_t *b, size_t *_p) \ + { /* translated from: http://www.algorithmist.com/index.php/Longest_Increasing_Subsequence.cpp */ \ + size_t i, u, v, *top = b, *p; \ + if (n == 0) return 0; \ + p = _p? _p : (size_t*)calloc(n, sizeof(size_t)); \ + *top++ = 0; \ + for (i = 1; i < n; i++) { \ + if (__sort_lt(a[*(top-1)], a[i])) { \ + p[i] = *(top-1); \ + *top++ = i; \ + continue; \ + } \ + for (u = 0, v = top - b - 1; u < v;) { \ + size_t c = (u + v) >> 1; \ + if (__sort_lt(a[b[c]], a[i])) u = c + 1; \ + else v = c; \ + } \ + if (__sort_lt(a[i], a[b[u]])) { \ + if (u > 0) p[i] = b[u-1]; \ + b[u] = i; \ + } \ + } \ + for (u = top - b, v = *(top-1); u--; v = p[v]) b[u] = v; \ + if (!_p) free(p); \ + return top - b; \ + } \ + type_t ks_ksmall_##name(size_t n, type_t arr[], size_t kk) \ + { \ + type_t *low, *high, *k, *ll, *hh, *mid; \ + low = arr; high = arr + n - 1; k = arr + kk; \ + for (;;) { \ + if (high <= low) return *k; \ + if (high == low + 1) { \ + if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \ + return *k; \ + } \ + mid = low + (high - low) / 2; \ + if (__sort_lt(*high, *mid)) KSORT_SWAP(type_t, *mid, *high); \ + if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \ + if (__sort_lt(*low, *mid)) KSORT_SWAP(type_t, *mid, *low); \ + KSORT_SWAP(type_t, *mid, *(low+1)); \ + ll = low + 1; hh = high; \ + for (;;) { \ + do ++ll; while (__sort_lt(*ll, *low)); \ + do --hh; while (__sort_lt(*low, *hh)); \ + if (hh < ll) break; \ + KSORT_SWAP(type_t, *ll, *hh); \ + } \ + KSORT_SWAP(type_t, *low, *hh); \ + if (hh <= k) low = ll; \ + if (hh >= k) high = hh - 1; \ + } \ + } \ + +#define ks_ksmall(name, n, a, k) ks_ksmall_##name(n, a, k) + +#define ks_lt_generic(a, b) ((a) < (b)) +#define ks_lt_str(a, b) (strcmp((a), (b)) < 0) + +typedef const char *ksstr_t; + +#define KSORT_INIT_GENERIC(type_t) KSORT_INIT(type_t, type_t, ks_lt_generic) +#define KSORT_INIT_STR KSORT_INIT(str, ksstr_t, ks_lt_str) + +#define RS_MIN_SIZE 64 + +#define KRADIX_SORT_INIT(name, rstype_t, rskey, sizeof_key) \ + typedef struct { \ + rstype_t *b, *e; \ + } rsbucket_##name##_t; \ + void rs_insertsort_##name(rstype_t *beg, rstype_t *end) \ + { \ + rstype_t *i; \ + for (i = beg + 1; i < end; ++i) \ + if (rskey(*i) < rskey(*(i - 1))) { \ + rstype_t *j, tmp = *i; \ + for (j = i; j > beg && rskey(tmp) < rskey(*(j-1)); --j) \ + *j = *(j - 1); \ + *j = tmp; \ + } \ + } \ + void rs_sort_##name(rstype_t *beg, rstype_t *end, int n_bits, int s) \ + { \ + rstype_t *i; \ + int size = 1<b = k->e = beg; \ + for (i = beg; i != end; ++i) ++b[rskey(*i)>>s&m].e; \ + for (k = b + 1; k != be; ++k) \ + k->e += (k-1)->e - beg, k->b = (k-1)->e; \ + for (k = b; k != be;) { \ + if (k->b != k->e) { \ + rsbucket_##name##_t *l; \ + if ((l = b + (rskey(*k->b)>>s&m)) != k) { \ + rstype_t tmp = *k->b, swap; \ + do { \ + swap = tmp; tmp = *l->b; *l->b++ = swap; \ + l = b + (rskey(tmp)>>s&m); \ + } while (l != k); \ + *k->b++ = tmp; \ + } else ++k->b; \ + } else ++k; \ + } \ + for (b->b = beg, k = b + 1; k != be; ++k) k->b = (k-1)->e; \ + if (s) { \ + s = s > n_bits? s - n_bits : 0; \ + for (k = b; k != be; ++k) \ + if (k->e - k->b > RS_MIN_SIZE) rs_sort_##name(k->b, k->e, n_bits, s); \ + else if (k->e - k->b > 1) rs_insertsort_##name(k->b, k->e); \ + } \ + } \ + void radix_sort_##name(rstype_t *beg, rstype_t *end) \ + { \ + if (end - beg <= RS_MIN_SIZE) rs_insertsort_##name(beg, end); \ + else rs_sort_##name(beg, end, 8, sizeof_key * 8 - 8); \ + } + +#endif diff --git a/kthread.c b/kthread.c new file mode 100644 index 0000000..f991714 --- /dev/null +++ b/kthread.c @@ -0,0 +1,151 @@ +#include +#include +#include + +/************ + * kt_for() * + ************/ + +struct kt_for_t; + +typedef struct { + struct kt_for_t *t; + long i; +} ktf_worker_t; + +typedef struct kt_for_t { + int n_threads; + long n; + ktf_worker_t *w; + void (*func)(void*,long,int); + void *data; +} kt_for_t; + +static inline long steal_work(kt_for_t *t) +{ + int i, min_i = -1; + long k, min = LONG_MAX; + for (i = 0; i < t->n_threads; ++i) + if (min > t->w[i].i) min = t->w[i].i, min_i = i; + k = __sync_fetch_and_add(&t->w[min_i].i, t->n_threads); + return k >= t->n? -1 : k; +} + +static void *ktf_worker(void *data) +{ + ktf_worker_t *w = (ktf_worker_t*)data; + long i; + for (;;) { + i = __sync_fetch_and_add(&w->i, w->t->n_threads); + if (i >= w->t->n) break; + w->t->func(w->t->data, i, w - w->t->w); + } + while ((i = steal_work(w->t)) >= 0) + w->t->func(w->t->data, i, w - w->t->w); + pthread_exit(0); +} + +void kt_for(int n_threads, void (*func)(void*,long,int), void *data, long n) +{ + if (n_threads > 1) { + int i; + kt_for_t t; + pthread_t *tid; + t.func = func, t.data = data, t.n_threads = n_threads, t.n = n; + t.w = (ktf_worker_t*)alloca(n_threads * sizeof(ktf_worker_t)); + tid = (pthread_t*)alloca(n_threads * sizeof(pthread_t)); + for (i = 0; i < n_threads; ++i) + t.w[i].t = &t, t.w[i].i = i; + for (i = 0; i < n_threads; ++i) pthread_create(&tid[i], 0, ktf_worker, &t.w[i]); + for (i = 0; i < n_threads; ++i) pthread_join(tid[i], 0); + } else { + long j; + for (j = 0; j < n; ++j) func(data, j, 0); + } +} + +/***************** + * kt_pipeline() * + *****************/ + +struct ktp_t; + +typedef struct { + struct ktp_t *pl; + int64_t index; + int step; + void *data; +} ktp_worker_t; + +typedef struct ktp_t { + void *shared; + void *(*func)(void*, int, void*); + int64_t index; + int n_workers, n_steps; + ktp_worker_t *workers; + pthread_mutex_t mutex; + pthread_cond_t cv; +} ktp_t; + +static void *ktp_worker(void *data) +{ + ktp_worker_t *w = (ktp_worker_t*)data; + ktp_t *p = w->pl; + while (w->step < p->n_steps) { + // test whether we can kick off the job with this worker + pthread_mutex_lock(&p->mutex); + for (;;) { + int i; + // test whether another worker is doing the same step + for (i = 0; i < p->n_workers; ++i) { + if (w == &p->workers[i]) continue; // ignore itself + if (p->workers[i].step <= w->step && p->workers[i].index < w->index) + break; + } + if (i == p->n_workers) break; // no workers with smaller indices are doing w->step or the previous steps + pthread_cond_wait(&p->cv, &p->mutex); + } + pthread_mutex_unlock(&p->mutex); + + // working on w->step + w->data = p->func(p->shared, w->step, w->step? w->data : 0); // for the first step, input is NULL + + // update step and let other workers know + pthread_mutex_lock(&p->mutex); + w->step = w->step == p->n_steps - 1 || w->data? (w->step + 1) % p->n_steps : p->n_steps; + if (w->step == 0) w->index = p->index++; + pthread_cond_broadcast(&p->cv); + pthread_mutex_unlock(&p->mutex); + } + pthread_exit(0); +} + +void kt_pipeline(int n_threads, void *(*func)(void*, int, void*), void *shared_data, int n_steps) +{ + ktp_t aux; + pthread_t *tid; + int i; + + if (n_threads < 1) n_threads = 1; + aux.n_workers = n_threads; + aux.n_steps = n_steps; + aux.func = func; + aux.shared = shared_data; + aux.index = 0; + pthread_mutex_init(&aux.mutex, 0); + pthread_cond_init(&aux.cv, 0); + + aux.workers = (ktp_worker_t*)alloca(n_threads * sizeof(ktp_worker_t)); + for (i = 0; i < n_threads; ++i) { + ktp_worker_t *w = &aux.workers[i]; + w->step = 0; w->pl = &aux; w->data = 0; + w->index = aux.index++; + } + + tid = (pthread_t*)alloca(n_threads * sizeof(pthread_t)); + for (i = 0; i < n_threads; ++i) pthread_create(&tid[i], 0, ktp_worker, &aux.workers[i]); + for (i = 0; i < n_threads; ++i) pthread_join(tid[i], 0); + + pthread_mutex_destroy(&aux.mutex); + pthread_cond_destroy(&aux.cv); +} diff --git a/kthread.h b/kthread.h new file mode 100644 index 0000000..c3cd165 --- /dev/null +++ b/kthread.h @@ -0,0 +1,15 @@ +#ifndef KTHREAD_H +#define KTHREAD_H + +#ifdef __cplusplus +extern "C" { +#endif + +void kt_for(int n_threads, void (*func)(void*,long,int), void *data, long n); +void kt_pipeline(int n_threads, void *(*func)(void*, int, void*), void *shared_data, int n_steps); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/minimap.h b/minimap.h index 1376968..5728cf2 100644 --- a/minimap.h +++ b/minimap.h @@ -4,7 +4,6 @@ #include #include #include -#include "bseq.h" #define MM_IDX_DEF_B 14 #define MM_DEREP_Q50 5.0 @@ -30,13 +29,18 @@ typedef struct { } mm_idx_bucket_t; typedef struct { - int b, w, k; - uint32_t n; // number of reference sequences - mm_idx_bucket_t *B; - uint32_t max_occ; - float freq_thres; - int32_t *len; // length of each reference sequence - char **name; // TODO: if this uses too much RAM, switch to one concatenated string + char *name; // name of the db sequence + uint64_t offset; // offset in mm_idx_t::seq16 + uint32_t len; // length +} mm_idx_seq_t; + +typedef struct { + int32_t b, w, k; + uint64_t sum_len; // sum of lengths + uint32_t n_seq; // number of reference sequences + mm_idx_seq_t *seq; // sequence name, length and offset + uint32_t *S; // 4-bit packed sequence + mm_idx_bucket_t *B; // index } mm_idx_t; typedef struct { @@ -62,6 +66,11 @@ extern double mm_realtime0; struct mm_tbuf_s; typedef struct mm_tbuf_s mm_tbuf_t; +struct bseq_file_s; + +#define mm_seq4_set(s, i, c) ((s)[(i)>>8] |= (uint32_t)(c) << (((i)&7)<<2)) +#define mm_seq4_get(s, i) ((s)[(i)>>8] >> (((i)&7)<<2) & 0xf) + #ifdef __cplusplus extern "C" { #endif @@ -72,11 +81,11 @@ void mm_sketch(void *km, const char *str, int len, int w, int k, uint32_t rid, i // minimizer indexing mm_idx_t *mm_idx_init(int w, int k, int b); void mm_idx_destroy(mm_idx_t *mi); -mm_idx_t *mm_idx_gen(bseq_file_t *fp, int w, int k, int b, int tbatch_size, int n_threads, uint64_t ibatch_size, int keep_name); +mm_idx_t *mm_idx_gen(struct bseq_file_s *fp, int w, int k, int b, int is_hpc, int mini_batch_size, int n_threads, uint64_t batch_size, int keep_name); void mm_idx_set_max_occ(mm_idx_t *mi, float f); const uint64_t *mm_idx_get(const mm_idx_t *mi, uint64_t minier, int *n); -mm_idx_t *mm_idx_build(const char *fn, int w, int k, int n_threads); +mm_idx_t *mm_idx_build(const char *fn, int w, int k, int is_hpc, int n_threads); // minimizer index I/O void mm_idx_dump(FILE *fp, const mm_idx_t *mi); diff --git a/misc.c b/misc.c new file mode 100644 index 0000000..3f8bf52 --- /dev/null +++ b/misc.c @@ -0,0 +1,26 @@ +#include +#include +#include "minimap.h" + +int mm_verbose = 3; +double mm_realtime0; + +double cputime() +{ + struct rusage r; + getrusage(RUSAGE_SELF, &r); + return r.ru_utime.tv_sec + r.ru_stime.tv_sec + 1e-6 * (r.ru_utime.tv_usec + r.ru_stime.tv_usec); +} + +double realtime() +{ + struct timeval tp; + struct timezone tzp; + gettimeofday(&tp, &tzp); + return tp.tv_sec + tp.tv_usec * 1e-6; +} + +#include "ksort.h" +#define sort_key_128x(a) ((a).x) +KRADIX_SORT_INIT(128x, mm128_t, sort_key_128x, 8) +KSORT_INIT_GENERIC(uint32_t)