diff --git a/Makefile b/Makefile index c97fbcf..6a3fc1e 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,9 @@ -CC= gcc -CXX= g++ +CC= clang CFLAGS= -g -Wall -O2 CXXFLAGS= $(CFLAGS) AR= ar DFLAGS= -DHAVE_PTHREAD #-D_NO_SSE2 #-D_FILE_OFFSET_BITS=64 -LOBJS= bwa.o bamlite.o utils.o bwt.o bwtio.o bwtaln.o bwtgap.o bntseq.o bwamem.o bwamem_pair.o stdaln.o \ +LOBJS= bamlite.o utils.o bwt.o bwtio.o bwtaln.o bwtgap.o bntseq.o bwamem.o bwamem_pair.o stdaln.o \ bseq.o bwaseqio.o bwase.o kstring.o AOBJS= QSufSort.o bwt_gen.o \ is.o bwtmisc.o bwtindex.o ksw.o simple_dp.o \ diff --git a/bwa.c b/bwa.c deleted file mode 100644 index 8e99f18..0000000 --- a/bwa.c +++ /dev/null @@ -1,272 +0,0 @@ -#include -#include -#include -#include -#include "bwa.h" -#include "bwt.h" -#include "bwtgap.h" -#include "bntseq.h" - -#ifndef kroundup32 -#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) -#endif - -extern unsigned char nst_nt4_table[256]; -extern void seq_reverse(int len, uint8_t *seq, int is_comp); - -bwa_opt_t bwa_def_opt = { 11, 4, -1, 1, 6, 32, 2, 0.04 }; - -struct bwa_idx_t { - bwt_t *bwt; - bntseq_t *bns; - uint8_t *pac; -}; - -struct bwa_buf_t { - int max_buf; - bwa_pestat_t pes; - gap_stack_t *stack; - gap_opt_t *opt; - int *diff_tab; - uint8_t *buf; - int *logn; -}; - -bwa_idx_t *bwa_idx_load(const char *prefix) -{ - bwa_idx_t *p; - int l; - char *str; - l = strlen(prefix); - p = calloc(1, sizeof(bwa_idx_t)); - str = malloc(l + 10); - strcpy(str, prefix); - p->bns = bns_restore(str); - strcpy(str + l, ".bwt"); - p->bwt = bwt_restore_bwt(str); - str[l] = 0; - strcpy(str + l, ".sa"); - bwt_restore_sa(str, p->bwt); - free(str); - p->pac = calloc(p->bns->l_pac/4+1, 1); - fread(p->pac, 1, p->bns->l_pac/4+1, p->bns->fp_pac); - fclose(p->bns->fp_pac); - p->bns->fp_pac = 0; - return p; -} - -void bwa_idx_destroy(bwa_idx_t *p) -{ - bns_destroy(p->bns); - bwt_destroy(p->bwt); - free(p->pac); - free(p); -} - -bwa_buf_t *bwa_buf_init(const bwa_opt_t *opt, int max_score) -{ - extern gap_opt_t *gap_init_opt(void); - extern int bwa_cal_maxdiff(int l, double err, double thres); - int i; - bwa_buf_t *p; - p = malloc(sizeof(bwa_buf_t)); - p->stack = gap_init_stack2(max_score); - p->opt = gap_init_opt(); - p->opt->s_gapo = opt->s_gapo; - p->opt->s_gape = opt->s_gape; - p->opt->max_diff = opt->max_diff; - p->opt->max_gapo = opt->max_gapo; - p->opt->max_gape = opt->max_gape; - p->opt->seed_len = opt->seed_len; - p->opt->max_seed_diff = opt->max_seed_diff; - p->opt->fnr = opt->fnr; - p->diff_tab = calloc(BWA_MAX_QUERY_LEN, sizeof(int)); - for (i = 1; i < BWA_MAX_QUERY_LEN; ++i) - p->diff_tab[i] = bwa_cal_maxdiff(i, BWA_AVG_ERR, opt->fnr); - p->logn = calloc(256, sizeof(int)); - for (i = 1; i != 256; ++i) - p->logn[i] = (int)(4.343 * log(i) + 0.499); - return p; -} - -void bwa_buf_destroy(bwa_buf_t *p) -{ - gap_destroy_stack(p->stack); - free(p->diff_tab); free(p->logn); free(p->opt); - free(p); -} - -bwa_sai_t bwa_sai(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq) -{ - extern int bwt_cal_width(const bwt_t *bwt, int len, const ubyte_t *str, bwt_width_t *width); - int i, seq_len, buf_len; - bwt_width_t *w, *seed_w; - uint8_t *s; - gap_opt_t opt2 = *buf->opt; - bwa_sai_t sai; - - seq_len = strlen(seq); - // estimate the buffer length - buf_len = (buf->opt->seed_len + seq_len + 1) * sizeof(bwt_width_t) + seq_len; - if (buf_len > buf->max_buf) { - buf->max_buf = buf_len; - kroundup32(buf->max_buf); - buf->buf = realloc(buf->buf, buf->max_buf); - } - memset(buf->buf, 0, buf_len); - seed_w = (bwt_width_t*)buf->buf; - w = seed_w + buf->opt->seed_len; - s = (uint8_t*)(w + seq_len + 1); - if (opt2.fnr > 0.) opt2.max_diff = buf->diff_tab[seq_len]; - // copy the sequence - for (i = 0; i < seq_len; ++i) - s[i] = nst_nt4_table[(int)seq[i]]; - seq_reverse(seq_len, s, 0); - // mapping - bwt_cal_width(idx->bwt, seq_len, s, w); - if (opt2.seed_len >= seq_len) opt2.seed_len = 0x7fffffff; - if (seq_len > buf->opt->seed_len) - bwt_cal_width(idx->bwt, buf->opt->seed_len, s + (seq_len - buf->opt->seed_len), seed_w); - for (i = 0; i < seq_len; ++i) // complement; I forgot why... - s[i] = s[i] > 3? 4 : 3 - s[i]; - sai.sai = (bwa_sai1_t*)bwt_match_gap(idx->bwt, seq_len, s, w, seq_len <= buf->opt->seed_len? 0 : seed_w, &opt2, &sai.n, buf->stack); - return sai; -} - -static void compute_NM(const uint8_t *pac, uint64_t l_pac, uint8_t *seq, int64_t pos, int n_cigar, uint32_t *cigar, int *n_mm, int *n_gaps) -{ - uint64_t x = pos, z; - int k, y = 0; - *n_mm = *n_gaps = 0; - for (k = 0; k < n_cigar; ++k) { - int l = cigar[k]>>4; - int op = cigar[k]&0xf; - if (op == 0) { // match/mismatch - for (z = 0; z < l && x + z < l_pac; ++z) { - int c = pac[(x+z)>>2] >> ((~(x+z)&3)<<1) & 3; - if (c > 3 || seq[y+z] > 3 || c != seq[y+z]) ++(*n_mm); - } - } - if (op == 1 || op == 2) (*n_gaps) += l; - if (op == 0 || op == 2) x += l; - if (op == 0 || op == 1 || op == 4) y += l; - } -} - -void bwa_sa2aln(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq, uint64_t sa, int n_gaps, bwa_aln_t *aln) -{ - extern bwtint_t bwa_sa2pos(const bntseq_t *bns, const bwt_t *bwt, bwtint_t sapos, int len, int *strand); - extern bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const uint8_t *seq, bwtint_t *_pos, int ext, int *n_cigar, int is_end_correct); - int strand, seq_len, i, n_gap, n_mm; - uint64_t pos3, pac_pos; - uint8_t *s[2]; - - memset(aln, 0, sizeof(bwa_aln_t)); - seq_len = strlen(seq); - if (seq_len<<1 > buf->max_buf) { - buf->max_buf = seq_len<<1; - kroundup32(buf->max_buf); - buf->buf = realloc(buf->buf, buf->max_buf); - } - s[0] = buf->buf; - s[1] = s[0] + seq_len; - for (i = 0; i < seq_len; ++i) - s[0][i] = s[1][i] = nst_nt4_table[(int)seq[i]]; - seq_reverse(seq_len, s[1], 1); - pac_pos = bwa_sa2pos(idx->bns, idx->bwt, sa, seq_len, &strand); - if (strand) aln->flag |= 16; - if (n_gaps) { // only for gapped alignment - int n_cigar; - bwa_cigar_t *cigar16; - cigar16 = bwa_refine_gapped_core(idx->bns->l_pac, idx->pac, seq_len, s[strand], &pac_pos, strand? n_gaps : -n_gaps, &n_cigar, 1); - aln->n_cigar = n_cigar; - aln->cigar = malloc(n_cigar * 4); - for (i = 0, pos3 = pac_pos; i < n_cigar; ++i) { - int op = cigar16[i]>>14; - int len = cigar16[i]&0x3fff; - if (op == 3) op = 4; // the 16-bit CIGAR is different from the 32-bit CIGAR - aln->cigar[i] = len<<4 | op; - if (op == 0 || op == 2) pos3 += len; - } - free(cigar16); - } else { // ungapped - aln->n_cigar = 1; - aln->cigar = malloc(4); - aln->cigar[0] = seq_len<<4 | 0; - pos3 = pac_pos + seq_len; - } - aln->n_n = bns_cnt_ambi(idx->bns, pac_pos, pos3 - pac_pos, &aln->ref_id); - aln->offset = pac_pos - idx->bns->anns[aln->ref_id].offset; - if (pos3 - idx->bns->anns[aln->ref_id].offset > idx->bns->anns[aln->ref_id].len) // read mapped beyond the end of a sequence - aln->flag |= 4; // read unmapped - compute_NM(idx->pac, idx->bns->l_pac, s[strand], pac_pos, aln->n_cigar, aln->cigar, &n_mm, &n_gap); - aln->n_mm = n_mm; - aln->n_gap = n_gap; -} - -/************************ - * Single-end alignment * - ************************/ - -bwa_one_t *bwa_se(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq, int gen_cigar) -{ - bwa_one_t *one; - int best, cnt, i, seq_len; - - seq_len = strlen(seq); - one = calloc(1, sizeof(bwa_one_t)); - one->sai = bwa_sai(idx, buf, seq); - if (one->sai.n == 0) return one; - // count number of hits; randomly select one alignment - best = one->sai.sai[0].score; - for (i = cnt = 0; i < one->sai.n; ++i) { - bwa_sai1_t *p = &one->sai.sai[i]; - if (p->score > best) break; - if (drand48() * (p->l - p->k + 1 + cnt) > (double)cnt) { - one->which = p; - one->sa = p->k + (bwtint_t)((p->l - p->k + 1) * drand48()); - } - cnt += p->l - p->k + 1; - } - one->c1 = cnt; - for (; i < one->sai.n; ++i) - cnt += one->sai.sai[i].l - one->sai.sai[i].k + 1; - one->c2 = cnt - one->c1; - // estimate single-end mapping quality - one->mapQs = -1; - if (one->c1 == 0) one->mapQs = 23; // FIXME: is it possible? - else if (one->c1 > 1) one->mapQs = 0; - else { - int diff = one->which->n_mm + one->which->n_gapo + one->which->n_gape; - if (diff >= buf->diff_tab[seq_len]) one->mapQs = 25; - else if (one->c2 == 0) one->mapQs = 37; - } - if (one->mapQs < 0) { - cnt = (one->c2 >= 255)? 255 : one->c2; - one->mapQs = 23 < buf->logn[cnt]? 0 : 23 - buf->logn[cnt]; - } - one->mapQ = one->mapQs; - // compute CIGAR on request - one->one.ref_id = -1; - if (gen_cigar) bwa_sa2aln(idx, buf, seq, one->sa, one->which->n_gapo + one->which->n_gape, &one->one); - return one; -} - -void bwa_one_destroy(bwa_one_t *one) -{ - free(one->sai.sai); - free(one->one.cigar); - free(one); -} - -/************************ - * Paired-end alignment * - ************************/ - -void bwa_pestat(bwa_buf_t *buf, int n, bwa_one_t **o[2]) -{ -} - -void bwa_pe(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq[2], bwa_one_t *o[2]) -{ -} diff --git a/bwa.h b/bwa.h deleted file mode 100644 index e8172da..0000000 --- a/bwa.h +++ /dev/null @@ -1,107 +0,0 @@ -#ifndef BWA_H_ -#define BWA_H_ - -#include - -#define BWA_DEF_MAX_SCORE 2048 -#define BWA_MAX_QUERY_LEN 1024 - -// BWA index -struct bwa_idx_t; -typedef struct bwa_idx_t bwa_idx_t; - -// Buffer for BWA alignment -struct bwa_buf_t; -typedef struct bwa_buf_t bwa_buf_t; - -// BWA alignment options -typedef struct { - int s_gapo, s_gape; // gap open and extension penalties; the mismatch penalty is fixed at 3 - int max_diff, max_gapo, max_gape; // max differences (-1 to use fnr for length-adjusted max diff), gap opens and gap extensions - int seed_len, max_seed_diff; // seed length and max differences allowed in the seed - float fnr; // parameter for automatic length-adjusted max differences -} bwa_opt_t; - -// default BWA alignment options -extern bwa_opt_t bwa_def_opt; // = { 11, 4, -1, 1, 6, 32, 2, 0.04 } - -// an interval hit in the SA coordinate; basic unit in .sai files -typedef struct { - uint32_t n_mm:16, n_gapo:8, n_gape:8; - int score; - uint64_t k, l; // [k,l] is the SA interval; each interval has l-k+1 hits -} bwa_sai1_t; - -// all interval hits in the SA coordinate -typedef struct { - int n; // number of interval hits - bwa_sai1_t *sai; -} bwa_sai_t; - -// an alignment -typedef struct { - uint32_t n_n:8, n_gap:12, n_mm:12; // number of ambiguous bases, gaps and mismatches in the alignment - int32_t ref_id; // referece sequence index (the first seq is indexed by 0) - uint32_t offset; // coordinate on the reference; zero-based - uint32_t n_cigar:16, flag:16; // number of CIGAR operations; SAM flag - uint32_t *cigar; // CIGAR in the BAM 28+4 encoding; having n_cigar operations -} bwa_aln_t; - -typedef struct { - int mapQs, mapQ, c1, c2; - uint64_t sa; - bwa_sai1_t *which; - bwa_sai_t sai; - bwa_aln_t one; -} bwa_one_t; - -typedef struct { - double avg, std, ap_prior; - uint64_t low, high, high_bayesian; -} bwa_pestat_t; - -#ifdef __cplusplus -extern "C" { -#endif - - // load a BWA index - bwa_idx_t *bwa_idx_load(const char *prefix); - void bwa_idx_destroy(bwa_idx_t *p); - - // allocate a BWA alignment buffer; if unsure, set opt to &bwa_def_opt and max_score to BWA_DEF_MAX_SCORE - bwa_buf_t *bwa_buf_init(const bwa_opt_t *opt, int max_score); - void bwa_buf_destroy(bwa_buf_t *p); - - /** - * Find all the SA intervals - * - * @param idx BWA index; multiple threads can share the same index - * @param buf BWA alignment buffer; each thread should have its own buffer - * @param seq NULL terminated C string, consisting of A/C/G/T/N only - * - * @return SA intervals seq is matched to - */ - bwa_sai_t bwa_sai(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq); - - /** - * Construct an alignment in the base-pair coordinate - * - * @param idx BWA index - * @param buf BWA alignment buffer - * @param seq NULL terinated C string - * @param sa Suffix array value - * @param n_gaps Number of gaps (typically equal to bwa_sai1_t::n_gapo + bwa_sai1_t::n_gape - * - * @return An alignment - */ - void bwa_sa2aln(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq, uint64_t sa, int n_gaps, bwa_aln_t *aln); - - bwa_one_t *bwa_se(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq, int gen_cigar); - - void bwa_one_destroy(bwa_one_t *one); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/bwamem_pair.c b/bwamem_pair.c index 019f570..845051c 100644 --- a/bwamem_pair.c +++ b/bwamem_pair.c @@ -4,6 +4,7 @@ #include "kstring.h" #include "bwamem.h" #include "kvec.h" +#include "utils.h" #define MIN_RATIO 0.8 #define MIN_DIR_CNT 10 @@ -13,9 +14,6 @@ #define MAX_STDDEV 4.0 #define EXT_STDDEV 4.0 -typedef kvec_t(uint64_t) vec64_t; - -extern void ks_introsort_uint64_t(size_t n, uint64_t *a); void bwa_hit2sam(kstring_t *str, const int8_t mat[25], int q, int r, int w, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, bwahit_t *p, int is_hard); static int cal_sub(const mem_opt_t *opt, mem_alnreg_v *r) @@ -34,9 +32,8 @@ static int cal_sub(const mem_opt_t *opt, mem_alnreg_v *r) void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v *regs, mem_pestat_t pes[4]) { - extern void ks_introsort_uint64_t(size_t n, uint64_t *a); int i, d, max; - vec64_t isize[4]; + uint64_v isize[4]; memset(pes, 0, 4 * sizeof(mem_pestat_t)); memset(isize, 0, sizeof(kvec_t(int)) * 4); for (i = 0; i < n>>1; ++i) { @@ -58,14 +55,14 @@ void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v * if (mem_verbose >= 3) fprintf(stderr, "[M::%s] # candidate unique pairs for (FF, FR, RF, RR): (%ld, %ld, %ld, %ld)\n", __func__, isize[0].n, isize[1].n, isize[2].n, isize[3].n); for (d = 0; d < 4; ++d) { // TODO: this block is nearly identical to the one in bwtsw2_pair.c. It would be better to merge these two. mem_pestat_t *r = &pes[d]; - vec64_t *q = &isize[d]; + uint64_v *q = &isize[d]; int p25, p50, p75, x; if (q->n < MIN_DIR_CNT) { fprintf(stderr, "[M::%s] skip orientation %c%c as there are not enough pairs\n", __func__, "FR"[d>>1&1], "FR"[d&1]); r->failed = 1; continue; } else fprintf(stderr, "[M::%s] analyzing insert size distribution for orientation %c%c...\n", __func__, "FR"[d>>1&1], "FR"[d&1]); - ks_introsort_uint64_t(q->n, q->a); + ks_introsort_64(q->n, q->a); p25 = q->a[(int)(.25 * q->n + .499)]; p50 = q->a[(int)(.50 * q->n + .499)]; p75 = q->a[(int)(.75 * q->n + .499)]; @@ -102,7 +99,7 @@ void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v * void mem_pair(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2], bwahit_t h[2]) { - vec64_t v; + uint64_v v; int r, i, y[4]; // y[] keeps the last hit kv_init(v); for (r = 0; r < 2; ++r) { @@ -113,7 +110,7 @@ void mem_pair(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, con kv_push(uint64_t, v, key); } } - ks_introsort_uint64_t(v.n, v.a); + ks_introsort_64(v.n, v.a); y[0] = y[1] = y[2] = y[3] = -1; printf("**** %ld\n", v.n); for (i = 0; i < v.n; ++i) { diff --git a/bwape.c b/bwape.c index 779670f..644b5bd 100644 --- a/bwape.c +++ b/bwape.c @@ -21,24 +21,15 @@ typedef struct { bwtint_t low, high, high_bayesian; } isize_info_t; -typedef struct { - uint64_t x, y; -} b128_t; - -#define b128_lt(a, b) ((a).x < (b).x) #define b128_eq(a, b) ((a).x == (b).x && (a).y == (b).y) #define b128_hash(a) ((uint32_t)(a).x) #include "khash.h" -KHASH_INIT(b128, b128_t, poslist_t, 1, b128_hash, b128_eq) - -#include "ksort.h" -KSORT_INIT(b128, b128_t, b128_lt) -KSORT_INIT_GENERIC(uint64_t) +KHASH_INIT(b128, pair64_t, poslist_t, 1, b128_hash, b128_eq) typedef struct { - kvec_t(b128_t) arr; - kvec_t(b128_t) pos[2]; + pair64_v arr; + pair64_v pos[2]; kvec_t(bwt_aln1_t) aln[2]; } pe_data_t; @@ -120,7 +111,7 @@ static int infer_isize(int n_seqs, bwa_seq_t *seqs[2], isize_info_t *ii, double free(isizes); return -1; } - ks_introsort(uint64_t, tot, isizes); + ks_introsort_64(tot, isizes); p25 = isizes[(int)(tot*0.25 + 0.5)]; p50 = isizes[(int)(tot*0.50 + 0.5)]; p75 = isizes[(int)(tot*0.75 + 0.5)]; @@ -170,7 +161,7 @@ static int pairing(bwa_seq_t *p[2], pe_data_t *d, const pe_opt_t *opt, int s_mm, { int i, j, o_n, subo_n, cnt_chg = 0, low_bound = ii->low, max_len; uint64_t o_score, subo_score; - b128_t last_pos[2][2], o_pos[2]; + pair64_t last_pos[2][2], o_pos[2]; max_len = p[0]->full_len; if (max_len < p[1]->full_len) max_len = p[1]->full_len; if (low_bound < max_len) low_bound = max_len; @@ -206,11 +197,11 @@ static int pairing(bwa_seq_t *p[2], pe_data_t *d, const pe_opt_t *opt, int s_mm, o_score = subo_score = (uint64_t)-1; o_n = subo_n = 0; - ks_introsort(b128, d->arr.n, d->arr.a); + ks_introsort_128(d->arr.n, d->arr.a); for (j = 0; j < 2; ++j) last_pos[j][0].x = last_pos[j][0].y = last_pos[j][1].x = last_pos[j][1].y = (uint64_t)-1; if (opt->type == BWA_PET_STD) { for (i = 0; i < d->arr.n; ++i) { - b128_t x = d->arr.a[i]; + pair64_t x = d->arr.a[i]; int strand = x.y>>1&1; if (strand == 1) { // reverse strand, then check int y = 1 - (x.y&1); @@ -223,7 +214,7 @@ static int pairing(bwa_seq_t *p[2], pe_data_t *d, const pe_opt_t *opt, int s_mm, } } else if (opt->type == BWA_PET_SOLID) { for (i = 0; i < d->arr.n; ++i) { - b128_t x = d->arr.a[i]; + pair64_t x = d->arr.a[i]; int strand = x.y>>1&1; if ((strand^x.y)&1) { // push int y = 1 - (x.y&1); @@ -345,7 +336,7 @@ int bwa_cal_pac_pos_pe(const bntseq_t *bns, const char *prefix, bwt_t *const _bw if ((p[0]->type == BWA_TYPE_UNIQUE || p[0]->type == BWA_TYPE_REPEAT) && (p[1]->type == BWA_TYPE_UNIQUE || p[1]->type == BWA_TYPE_REPEAT)) { // only when both ends mapped - b128_t x; + pair64_t x; int j, k; long long n_occ[2]; for (j = 0; j < 2; ++j) { @@ -360,7 +351,7 @@ int bwa_cal_pac_pos_pe(const bntseq_t *bns, const char *prefix, bwt_t *const _bw bwt_aln1_t *r = d->aln[j].a + k; bwtint_t l; if (0 && r->l - r->k + 1 >= MIN_HASH_WIDTH) { // then check hash table - b128_t key; + pair64_t key; int ret; key.x = r->k; key.y = r->l; khint_t iter = kh_put(b128, g_hash, key, &ret); @@ -377,14 +368,14 @@ int bwa_cal_pac_pos_pe(const bntseq_t *bns, const char *prefix, bwt_t *const _bw for (l = 0; l < kh_val(g_hash, iter).n; ++l) { x.x = kh_val(g_hash, iter).a[l]>>1; x.y = k<<2 | (kh_val(g_hash, iter).a[l]&1)<<1 | j; - kv_push(b128_t, d->arr, x); + kv_push(pair64_t, d->arr, x); } } else { // then calculate on the fly for (l = r->k; l <= r->l; ++l) { int strand; x.x = bwa_sa2pos(bns, bwt, l, p[j]->len, &strand); x.y = k<<2 | strand<<1 | j; - kv_push(b128_t, d->arr, x); + kv_push(pair64_t, d->arr, x); } } } diff --git a/bwtsw2_pair.c b/bwtsw2_pair.c index 633641e..8a8287b 100644 --- a/bwtsw2_pair.c +++ b/bwtsw2_pair.c @@ -6,6 +6,7 @@ #include "bntseq.h" #include "bwtsw2.h" #include "kstring.h" +#include "utils.h" #ifndef _NO_SSE2 #include "ksw.h" #else @@ -24,7 +25,6 @@ typedef struct { bsw2pestat_t bsw2_stat(int n, bwtsw2_t **buf, kstring_t *msg, int max_ins) { - extern void ks_introsort_uint64_t(size_t n, uint64_t *a); int i, k, x, p25, p50, p75, tmp, max_len = 0; uint64_t *isize; bsw2pestat_t r; @@ -44,7 +44,7 @@ bsw2pestat_t bsw2_stat(int n, bwtsw2_t **buf, kstring_t *msg, int max_ins) max_len = max_len > t[1]->end - t[1]->beg? max_len : t[1]->end - t[1]->beg; isize[k++] = l; } - ks_introsort_uint64_t(k, isize); + ks_introsort_64(k, isize); p25 = isize[(int)(.25 * k + .499)]; p50 = isize[(int)(.50 * k + .499)]; p75 = isize[(int)(.75 * k + .499)]; diff --git a/khash.h b/khash.h index de6be6d..2422044 100644 --- a/khash.h +++ b/khash.h @@ -1,6 +1,6 @@ /* The MIT License - Copyright (c) 2008, 2009 by attractor + Copyright (c) 2008, 2009, 2011 by Attractive Chaos Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -33,7 +33,6 @@ int main() { khiter_t k; khash_t(32) *h = kh_init(32); k = kh_put(32, h, 5, &ret); - if (!ret) kh_del(32, h, k); kh_value(h, k) = 10; k = kh_get(32, h, 10); is_missing = (k == kh_end(h)); @@ -47,6 +46,29 @@ int main() { */ /* + 2011-12-29 (0.2.7): + + * Minor code clean up; no actual effect. + + 2011-09-16 (0.2.6): + + * The capacity is a power of 2. This seems to dramatically improve the + speed for simple keys. Thank Zilong Tan for the suggestion. Reference: + + - http://code.google.com/p/ulib/ + - http://nothings.org/computer/judy/ + + * Allow to optionally use linear probing which usually has better + performance for random input. Double hashing is still the default as it + is more robust to certain non-random input. + + * Added Wang's integer hash function (not used by default). This hash + function is more robust to certain non-random input. + + 2011-02-14 (0.2.5): + + * Allow to declare global functions. + 2009-09-26 (0.2.4): * Improve portability @@ -86,11 +108,9 @@ int main() { @header Generic hash table library. - - @copyright Heng Li */ -#define AC_VERSION_KHASH_H "0.2.4" +#define AC_VERSION_KHASH_H "0.2.6" #include #include @@ -111,24 +131,14 @@ typedef unsigned long long khint64_t; #endif #ifdef _MSC_VER -#define inline __inline +#define kh_inline __inline +#else +#define kh_inline inline #endif typedef khint32_t khint_t; typedef khint_t khiter_t; -#define __ac_HASH_PRIME_SIZE 32 -static const khint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] = -{ - 0ul, 3ul, 11ul, 23ul, 53ul, - 97ul, 193ul, 389ul, 769ul, 1543ul, - 3079ul, 6151ul, 12289ul, 24593ul, 49157ul, - 98317ul, 196613ul, 393241ul, 786433ul, 1572869ul, - 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul, - 100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul, - 3221225473ul, 4294967291ul -}; - #define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2) #define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1) #define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3) @@ -137,88 +147,128 @@ static const khint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] = #define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1))) #define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1)) +#ifdef KHASH_LINEAR +#define __ac_inc(k, m) 1 +#else +#define __ac_inc(k, m) (((k)>>3 ^ (k)<<3) | 1) & (m) +#endif + +#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4) + +#ifndef kroundup32 +#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) +#endif + +#ifndef kcalloc +#define kcalloc(N,Z) calloc(N,Z) +#endif +#ifndef kmalloc +#define kmalloc(Z) malloc(Z) +#endif +#ifndef krealloc +#define krealloc(P,Z) realloc(P,Z) +#endif +#ifndef kfree +#define kfree(P) free(P) +#endif + static const double __ac_HASH_UPPER = 0.77; -#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ - typedef struct { \ - khint_t n_buckets, size, n_occupied, upper_bound; \ - khint32_t *flags; \ - khkey_t *keys; \ - khval_t *vals; \ - } kh_##name##_t; \ - static inline kh_##name##_t *kh_init_##name() { \ - return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t)); \ +#define __KHASH_TYPE(name, khkey_t, khval_t) \ + typedef struct { \ + khint_t n_buckets, size, n_occupied, upper_bound; \ + khint32_t *flags; \ + khkey_t *keys; \ + khval_t *vals; \ + } kh_##name##_t; + +#define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \ + extern kh_##name##_t *kh_init_##name(void); \ + extern void kh_destroy_##name(kh_##name##_t *h); \ + extern void kh_clear_##name(kh_##name##_t *h); \ + extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \ + extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \ + extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \ + extern void kh_del_##name(kh_##name##_t *h, khint_t x); + +#define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + SCOPE kh_##name##_t *kh_init_##name(void) { \ + return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t)); \ } \ - static inline void kh_destroy_##name(kh_##name##_t *h) \ + SCOPE void kh_destroy_##name(kh_##name##_t *h) \ { \ if (h) { \ - free(h->keys); free(h->flags); \ - free(h->vals); \ - free(h); \ + kfree((void *)h->keys); kfree(h->flags); \ + kfree((void *)h->vals); \ + kfree(h); \ } \ } \ - static inline void kh_clear_##name(kh_##name##_t *h) \ + SCOPE void kh_clear_##name(kh_##name##_t *h) \ { \ if (h && h->flags) { \ - memset(h->flags, 0xaa, ((h->n_buckets>>4) + 1) * sizeof(khint32_t)); \ + memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \ h->size = h->n_occupied = 0; \ } \ } \ - static inline khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \ + SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \ { \ if (h->n_buckets) { \ - khint_t inc, k, i, last; \ - k = __hash_func(key); i = k % h->n_buckets; \ - inc = 1 + k % (h->n_buckets - 1); last = i; \ + khint_t inc, k, i, last, mask; \ + mask = h->n_buckets - 1; \ + k = __hash_func(key); i = k & mask; \ + inc = __ac_inc(k, mask); last = i; /* inc==1 for linear probing */ \ while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ - if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \ - else i += inc; \ + i = (i + inc) & mask; \ if (i == last) return h->n_buckets; \ } \ return __ac_iseither(h->flags, i)? h->n_buckets : i; \ } else return 0; \ } \ - static inline void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \ - { \ + SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \ + { /* This function uses 0.25*n_bucktes bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \ khint32_t *new_flags = 0; \ khint_t j = 1; \ { \ - khint_t t = __ac_HASH_PRIME_SIZE - 1; \ - while (__ac_prime_list[t] > new_n_buckets) --t; \ - new_n_buckets = __ac_prime_list[t+1]; \ - if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; \ - else { \ - new_flags = (khint32_t*)malloc(((new_n_buckets>>4) + 1) * sizeof(khint32_t)); \ - memset(new_flags, 0xaa, ((new_n_buckets>>4) + 1) * sizeof(khint32_t)); \ - if (h->n_buckets < new_n_buckets) { \ - h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \ - if (kh_is_map) \ - h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \ - } \ + kroundup32(new_n_buckets); \ + if (new_n_buckets < 4) new_n_buckets = 4; \ + if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \ + else { /* hash table size to be changed (shrink or expand); rehash */ \ + new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ + if (!new_flags) return -1; \ + memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ + if (h->n_buckets < new_n_buckets) { /* expand */ \ + khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \ + if (!new_keys) return -1; \ + h->keys = new_keys; \ + if (kh_is_map) { \ + khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \ + if (!new_vals) return -1; \ + h->vals = new_vals; \ + } \ + } /* otherwise shrink */ \ } \ } \ - if (j) { \ + if (j) { /* rehashing is needed */ \ for (j = 0; j != h->n_buckets; ++j) { \ if (__ac_iseither(h->flags, j) == 0) { \ khkey_t key = h->keys[j]; \ khval_t val; \ + khint_t new_mask; \ + new_mask = new_n_buckets - 1; \ if (kh_is_map) val = h->vals[j]; \ __ac_set_isdel_true(h->flags, j); \ - while (1) { \ + while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ khint_t inc, k, i; \ k = __hash_func(key); \ - i = k % new_n_buckets; \ - inc = 1 + k % (new_n_buckets - 1); \ - while (!__ac_isempty(new_flags, i)) { \ - if (i + inc >= new_n_buckets) i = i + inc - new_n_buckets; \ - else i += inc; \ - } \ + i = k & new_mask; \ + inc = __ac_inc(k, new_mask); \ + while (!__ac_isempty(new_flags, i)) i = (i + inc) & new_mask; \ __ac_set_isempty_false(new_flags, i); \ - if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { \ + if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \ { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \ - __ac_set_isdel_true(h->flags, i); \ - } else { \ + __ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \ + } else { /* write the element and jump out of the loop */ \ h->keys[i] = key; \ if (kh_is_map) h->vals[i] = val; \ break; \ @@ -226,35 +276,39 @@ static const double __ac_HASH_UPPER = 0.77; } \ } \ } \ - if (h->n_buckets > new_n_buckets) { \ - h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \ - if (kh_is_map) \ - h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \ + if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \ + h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \ + if (kh_is_map) h->vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \ } \ - free(h->flags); \ + kfree(h->flags); /* free the working space */ \ h->flags = new_flags; \ h->n_buckets = new_n_buckets; \ h->n_occupied = h->size; \ h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \ } \ + return 0; \ } \ - static inline khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \ + SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \ { \ khint_t x; \ - if (h->n_occupied >= h->upper_bound) { \ - if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); \ - else kh_resize_##name(h, h->n_buckets + 1); \ - } \ + if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \ + if (h->n_buckets > (h->size<<1)) { \ + if (kh_resize_##name(h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */ \ + *ret = -1; return h->n_buckets; \ + } \ + } else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the hash table */ \ + *ret = -1; return h->n_buckets; \ + } \ + } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \ { \ - khint_t inc, k, i, site, last; \ - x = site = h->n_buckets; k = __hash_func(key); i = k % h->n_buckets; \ - if (__ac_isempty(h->flags, i)) x = i; \ + khint_t inc, k, i, site, last, mask = h->n_buckets - 1; \ + x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \ + if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \ else { \ - inc = 1 + k % (h->n_buckets - 1); last = i; \ + inc = __ac_inc(k, mask); last = i; \ while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ if (__ac_isdel(h->flags, i)) site = i; \ - if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \ - else i += inc; \ + i = (i + inc) & mask; \ if (i == last) { x = site; break; } \ } \ if (x == h->n_buckets) { \ @@ -263,20 +317,20 @@ static const double __ac_HASH_UPPER = 0.77; } \ } \ } \ - if (__ac_isempty(h->flags, x)) { \ + if (__ac_isempty(h->flags, x)) { /* not present at all */ \ h->keys[x] = key; \ __ac_set_isboth_false(h->flags, x); \ ++h->size; ++h->n_occupied; \ *ret = 1; \ - } else if (__ac_isdel(h->flags, x)) { \ + } else if (__ac_isdel(h->flags, x)) { /* deleted */ \ h->keys[x] = key; \ __ac_set_isboth_false(h->flags, x); \ ++h->size; \ *ret = 2; \ - } else *ret = 0; \ + } else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \ return x; \ } \ - static inline void kh_del_##name(kh_##name##_t *h, khint_t x) \ + SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \ { \ if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \ __ac_set_isdel_true(h->flags, x); \ @@ -284,6 +338,17 @@ static const double __ac_HASH_UPPER = 0.77; } \ } +#define KHASH_DECLARE(name, khkey_t, khval_t) \ + __KHASH_TYPE(name, khkey_t, khval_t) \ + __KHASH_PROTOTYPES(name, khkey_t, khval_t) + +#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + __KHASH_TYPE(name, khkey_t, khval_t) \ + __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) + +#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + KHASH_INIT2(name, static kh_inline, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) + /* --- BEGIN OF HASH FUNCTIONS --- */ /*! @function @@ -311,10 +376,10 @@ static const double __ac_HASH_UPPER = 0.77; @param s Pointer to a null terminated string @return The hash value */ -static inline khint_t __ac_X31_hash_string(const char *s) +static kh_inline khint_t __ac_X31_hash_string(const char *s) { - khint_t h = *s; - if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s; + khint_t h = (khint_t)*s; + if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s; return h; } /*! @function @@ -328,9 +393,21 @@ static inline khint_t __ac_X31_hash_string(const char *s) */ #define kh_str_hash_equal(a, b) (strcmp(a, b) == 0) +static kh_inline khint_t __ac_Wang_hash(khint_t key) +{ + key += ~(key << 15); + key ^= (key >> 10); + key += (key << 3); + key ^= (key >> 6); + key += ~(key << 11); + key ^= (key >> 16); + return key; +} +#define kh_int_hash_func2(k) __ac_Wang_hash((khint_t)key) + /* --- END OF HASH FUNCTIONS --- */ -/* Other necessary macros... */ +/* Other convenient macros... */ /*! @abstract Type of the hash table. @@ -396,7 +473,6 @@ static inline khint_t __ac_X31_hash_string(const char *s) */ #define kh_del(name, h, k) kh_del_##name(h, k) - /*! @function @abstract Test whether a bucket contains data. @param h Pointer to the hash table [khash_t(name)*] @@ -455,6 +531,34 @@ static inline khint_t __ac_X31_hash_string(const char *s) */ #define kh_n_buckets(h) ((h)->n_buckets) +/*! @function + @abstract Iterate over the entries in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @param kvar Variable to which key will be assigned + @param vvar Variable to which value will be assigned + @param code Block of code to execute + */ +#define kh_foreach(h, kvar, vvar, code) { khint_t __i; \ + for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ + if (!kh_exist(h,__i)) continue; \ + (kvar) = kh_key(h,__i); \ + (vvar) = kh_val(h,__i); \ + code; \ + } } + +/*! @function + @abstract Iterate over the values in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @param vvar Variable to which value will be assigned + @param code Block of code to execute + */ +#define kh_foreach_value(h, vvar, code) { khint_t __i; \ + for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ + if (!kh_exist(h,__i)) continue; \ + (vvar) = kh_val(h,__i); \ + code; \ + } } + /* More conenient interfaces */ /*! @function diff --git a/utils.c b/utils.c index 8c1ad7e..41594c3 100644 --- a/utils.c +++ b/utils.c @@ -35,6 +35,12 @@ #include #include "utils.h" +#define pair64_lt(a, b) ((a).x < (b).x || ((a).x == (b).x && (a).y < (b).y)) + +#include "ksort.h" +KSORT_INIT(128, pair64_t, pair64_lt) +KSORT_INIT(64, uint64_t, ks_lt_generic) + FILE *err_xopen_core(const char *func, const char *fn, const char *mode) { FILE *fp = 0; @@ -46,6 +52,7 @@ FILE *err_xopen_core(const char *func, const char *fn, const char *mode) } return fp; } + FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp) { if (freopen(fn, mode, fp) == 0) { @@ -56,6 +63,7 @@ FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE } return fp; } + gzFile err_xzopen_core(const char *func, const char *fn, const char *mode) { gzFile fp; @@ -67,6 +75,7 @@ gzFile err_xzopen_core(const char *func, const char *fn, const char *mode) } return fp; } + void err_fatal(const char *header, const char *fmt, ...) { va_list args; @@ -86,66 +95,48 @@ void err_fatal_simple_core(const char *func, const char *msg) size_t err_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) { - size_t ret = fwrite(ptr, size, nmemb, stream); - if (ret != nmemb) - { - err_fatal_simple_core("fwrite", strerror(errno)); - } - return ret; + size_t ret = fwrite(ptr, size, nmemb, stream); + if (ret != nmemb) + err_fatal_simple_core("fwrite", strerror(errno)); + return ret; } int err_printf(const char *format, ...) { - va_list arg; - int done; - - va_start(arg, format); - done = vfprintf(stdout, format, arg); - int saveErrno = errno; - va_end(arg); - - if (done < 0) - { - err_fatal_simple_core("vfprintf(stdout)", strerror(saveErrno)); - } - return done; + va_list arg; + int done; + va_start(arg, format); + done = vfprintf(stdout, format, arg); + int saveErrno = errno; + va_end(arg); + if (done < 0) err_fatal_simple_core("vfprintf(stdout)", strerror(saveErrno)); + return done; } int err_fprintf(FILE *stream, const char *format, ...) { - va_list arg; - int done; - - va_start(arg, format); - done = vfprintf(stream, format, arg); - int saveErrno = errno; - va_end(arg); - - if (done < 0) - { - err_fatal_simple_core("vfprintf", strerror(saveErrno)); - } - return done; + va_list arg; + int done; + va_start(arg, format); + done = vfprintf(stream, format, arg); + int saveErrno = errno; + va_end(arg); + if (done < 0) err_fatal_simple_core("vfprintf", strerror(saveErrno)); + return done; } int err_fflush(FILE *stream) { - int ret = fflush(stream); - if (ret != 0) - { - err_fatal_simple_core("fflush", strerror(errno)); - } - return ret; + int ret = fflush(stream); + if (ret != 0) err_fatal_simple_core("fflush", strerror(errno)); + return ret; } int err_fclose(FILE *stream) { - int ret = fclose(stream); - if (ret != 0) - { - err_fatal_simple_core("fclose", strerror(errno)); - } - return ret; + int ret = fclose(stream); + if (ret != 0) err_fatal_simple_core("fclose", strerror(errno)); + return ret; } double cputime() diff --git a/utils.h b/utils.h index b6839e9..5abab41 100644 --- a/utils.h +++ b/utils.h @@ -28,6 +28,7 @@ #ifndef LH3_UTILS_H #define LH3_UTILS_H +#include #include #include @@ -38,14 +39,19 @@ #define ATTRIBUTE(list) #endif - - #define err_fatal_simple(msg) err_fatal_simple_core(__func__, msg) #define xopen(fn, mode) err_xopen_core(__func__, fn, mode) #define xreopen(fn, mode, fp) err_xreopen_core(__func__, fn, mode, fp) #define xzopen(fn, mode) err_xzopen_core(__func__, fn, mode) #define xassert(cond, msg) if ((cond) == 0) err_fatal_simple_core(__func__, msg) +typedef struct { + uint64_t x, y; +} pair64_t; + +typedef struct { size_t n, m; uint64_t *a; } uint64_v; +typedef struct { size_t n, m; pair64_t *a; } pair64_v; + #ifdef __cplusplus extern "C" { #endif @@ -66,6 +72,9 @@ extern "C" { double cputime(); double realtime(); + void ks_introsort_64 (size_t n, uint64_t *a); + void ks_introsort_128(size_t n, pair64_t *a); + #ifdef __cplusplus } #endif