Multiple changes:
1. Removed bwa.{h,c}. I am not going to finish them anyway.
2. Updated to the latest khash.h, which should be faster.
3. Define 64-bit vector and 128-bit integer/vector in utils.h.
This commit is contained in:
parent
13288e2dcd
commit
e5ab59db53
5
Makefile
5
Makefile
|
|
@ -1,10 +1,9 @@
|
||||||
CC= gcc
|
CC= clang
|
||||||
CXX= g++
|
|
||||||
CFLAGS= -g -Wall -O2
|
CFLAGS= -g -Wall -O2
|
||||||
CXXFLAGS= $(CFLAGS)
|
CXXFLAGS= $(CFLAGS)
|
||||||
AR= ar
|
AR= ar
|
||||||
DFLAGS= -DHAVE_PTHREAD #-D_NO_SSE2 #-D_FILE_OFFSET_BITS=64
|
DFLAGS= -DHAVE_PTHREAD #-D_NO_SSE2 #-D_FILE_OFFSET_BITS=64
|
||||||
LOBJS= bwa.o bamlite.o utils.o bwt.o bwtio.o bwtaln.o bwtgap.o bntseq.o bwamem.o bwamem_pair.o stdaln.o \
|
LOBJS= bamlite.o utils.o bwt.o bwtio.o bwtaln.o bwtgap.o bntseq.o bwamem.o bwamem_pair.o stdaln.o \
|
||||||
bseq.o bwaseqio.o bwase.o kstring.o
|
bseq.o bwaseqio.o bwase.o kstring.o
|
||||||
AOBJS= QSufSort.o bwt_gen.o \
|
AOBJS= QSufSort.o bwt_gen.o \
|
||||||
is.o bwtmisc.o bwtindex.o ksw.o simple_dp.o \
|
is.o bwtmisc.o bwtindex.o ksw.o simple_dp.o \
|
||||||
|
|
|
||||||
272
bwa.c
272
bwa.c
|
|
@ -1,272 +0,0 @@
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include "bwa.h"
|
|
||||||
#include "bwt.h"
|
|
||||||
#include "bwtgap.h"
|
|
||||||
#include "bntseq.h"
|
|
||||||
|
|
||||||
#ifndef kroundup32
|
|
||||||
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
extern unsigned char nst_nt4_table[256];
|
|
||||||
extern void seq_reverse(int len, uint8_t *seq, int is_comp);
|
|
||||||
|
|
||||||
bwa_opt_t bwa_def_opt = { 11, 4, -1, 1, 6, 32, 2, 0.04 };
|
|
||||||
|
|
||||||
struct bwa_idx_t {
|
|
||||||
bwt_t *bwt;
|
|
||||||
bntseq_t *bns;
|
|
||||||
uint8_t *pac;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct bwa_buf_t {
|
|
||||||
int max_buf;
|
|
||||||
bwa_pestat_t pes;
|
|
||||||
gap_stack_t *stack;
|
|
||||||
gap_opt_t *opt;
|
|
||||||
int *diff_tab;
|
|
||||||
uint8_t *buf;
|
|
||||||
int *logn;
|
|
||||||
};
|
|
||||||
|
|
||||||
bwa_idx_t *bwa_idx_load(const char *prefix)
|
|
||||||
{
|
|
||||||
bwa_idx_t *p;
|
|
||||||
int l;
|
|
||||||
char *str;
|
|
||||||
l = strlen(prefix);
|
|
||||||
p = calloc(1, sizeof(bwa_idx_t));
|
|
||||||
str = malloc(l + 10);
|
|
||||||
strcpy(str, prefix);
|
|
||||||
p->bns = bns_restore(str);
|
|
||||||
strcpy(str + l, ".bwt");
|
|
||||||
p->bwt = bwt_restore_bwt(str);
|
|
||||||
str[l] = 0;
|
|
||||||
strcpy(str + l, ".sa");
|
|
||||||
bwt_restore_sa(str, p->bwt);
|
|
||||||
free(str);
|
|
||||||
p->pac = calloc(p->bns->l_pac/4+1, 1);
|
|
||||||
fread(p->pac, 1, p->bns->l_pac/4+1, p->bns->fp_pac);
|
|
||||||
fclose(p->bns->fp_pac);
|
|
||||||
p->bns->fp_pac = 0;
|
|
||||||
return p;
|
|
||||||
}
|
|
||||||
|
|
||||||
void bwa_idx_destroy(bwa_idx_t *p)
|
|
||||||
{
|
|
||||||
bns_destroy(p->bns);
|
|
||||||
bwt_destroy(p->bwt);
|
|
||||||
free(p->pac);
|
|
||||||
free(p);
|
|
||||||
}
|
|
||||||
|
|
||||||
bwa_buf_t *bwa_buf_init(const bwa_opt_t *opt, int max_score)
|
|
||||||
{
|
|
||||||
extern gap_opt_t *gap_init_opt(void);
|
|
||||||
extern int bwa_cal_maxdiff(int l, double err, double thres);
|
|
||||||
int i;
|
|
||||||
bwa_buf_t *p;
|
|
||||||
p = malloc(sizeof(bwa_buf_t));
|
|
||||||
p->stack = gap_init_stack2(max_score);
|
|
||||||
p->opt = gap_init_opt();
|
|
||||||
p->opt->s_gapo = opt->s_gapo;
|
|
||||||
p->opt->s_gape = opt->s_gape;
|
|
||||||
p->opt->max_diff = opt->max_diff;
|
|
||||||
p->opt->max_gapo = opt->max_gapo;
|
|
||||||
p->opt->max_gape = opt->max_gape;
|
|
||||||
p->opt->seed_len = opt->seed_len;
|
|
||||||
p->opt->max_seed_diff = opt->max_seed_diff;
|
|
||||||
p->opt->fnr = opt->fnr;
|
|
||||||
p->diff_tab = calloc(BWA_MAX_QUERY_LEN, sizeof(int));
|
|
||||||
for (i = 1; i < BWA_MAX_QUERY_LEN; ++i)
|
|
||||||
p->diff_tab[i] = bwa_cal_maxdiff(i, BWA_AVG_ERR, opt->fnr);
|
|
||||||
p->logn = calloc(256, sizeof(int));
|
|
||||||
for (i = 1; i != 256; ++i)
|
|
||||||
p->logn[i] = (int)(4.343 * log(i) + 0.499);
|
|
||||||
return p;
|
|
||||||
}
|
|
||||||
|
|
||||||
void bwa_buf_destroy(bwa_buf_t *p)
|
|
||||||
{
|
|
||||||
gap_destroy_stack(p->stack);
|
|
||||||
free(p->diff_tab); free(p->logn); free(p->opt);
|
|
||||||
free(p);
|
|
||||||
}
|
|
||||||
|
|
||||||
bwa_sai_t bwa_sai(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq)
|
|
||||||
{
|
|
||||||
extern int bwt_cal_width(const bwt_t *bwt, int len, const ubyte_t *str, bwt_width_t *width);
|
|
||||||
int i, seq_len, buf_len;
|
|
||||||
bwt_width_t *w, *seed_w;
|
|
||||||
uint8_t *s;
|
|
||||||
gap_opt_t opt2 = *buf->opt;
|
|
||||||
bwa_sai_t sai;
|
|
||||||
|
|
||||||
seq_len = strlen(seq);
|
|
||||||
// estimate the buffer length
|
|
||||||
buf_len = (buf->opt->seed_len + seq_len + 1) * sizeof(bwt_width_t) + seq_len;
|
|
||||||
if (buf_len > buf->max_buf) {
|
|
||||||
buf->max_buf = buf_len;
|
|
||||||
kroundup32(buf->max_buf);
|
|
||||||
buf->buf = realloc(buf->buf, buf->max_buf);
|
|
||||||
}
|
|
||||||
memset(buf->buf, 0, buf_len);
|
|
||||||
seed_w = (bwt_width_t*)buf->buf;
|
|
||||||
w = seed_w + buf->opt->seed_len;
|
|
||||||
s = (uint8_t*)(w + seq_len + 1);
|
|
||||||
if (opt2.fnr > 0.) opt2.max_diff = buf->diff_tab[seq_len];
|
|
||||||
// copy the sequence
|
|
||||||
for (i = 0; i < seq_len; ++i)
|
|
||||||
s[i] = nst_nt4_table[(int)seq[i]];
|
|
||||||
seq_reverse(seq_len, s, 0);
|
|
||||||
// mapping
|
|
||||||
bwt_cal_width(idx->bwt, seq_len, s, w);
|
|
||||||
if (opt2.seed_len >= seq_len) opt2.seed_len = 0x7fffffff;
|
|
||||||
if (seq_len > buf->opt->seed_len)
|
|
||||||
bwt_cal_width(idx->bwt, buf->opt->seed_len, s + (seq_len - buf->opt->seed_len), seed_w);
|
|
||||||
for (i = 0; i < seq_len; ++i) // complement; I forgot why...
|
|
||||||
s[i] = s[i] > 3? 4 : 3 - s[i];
|
|
||||||
sai.sai = (bwa_sai1_t*)bwt_match_gap(idx->bwt, seq_len, s, w, seq_len <= buf->opt->seed_len? 0 : seed_w, &opt2, &sai.n, buf->stack);
|
|
||||||
return sai;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void compute_NM(const uint8_t *pac, uint64_t l_pac, uint8_t *seq, int64_t pos, int n_cigar, uint32_t *cigar, int *n_mm, int *n_gaps)
|
|
||||||
{
|
|
||||||
uint64_t x = pos, z;
|
|
||||||
int k, y = 0;
|
|
||||||
*n_mm = *n_gaps = 0;
|
|
||||||
for (k = 0; k < n_cigar; ++k) {
|
|
||||||
int l = cigar[k]>>4;
|
|
||||||
int op = cigar[k]&0xf;
|
|
||||||
if (op == 0) { // match/mismatch
|
|
||||||
for (z = 0; z < l && x + z < l_pac; ++z) {
|
|
||||||
int c = pac[(x+z)>>2] >> ((~(x+z)&3)<<1) & 3;
|
|
||||||
if (c > 3 || seq[y+z] > 3 || c != seq[y+z]) ++(*n_mm);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (op == 1 || op == 2) (*n_gaps) += l;
|
|
||||||
if (op == 0 || op == 2) x += l;
|
|
||||||
if (op == 0 || op == 1 || op == 4) y += l;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void bwa_sa2aln(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq, uint64_t sa, int n_gaps, bwa_aln_t *aln)
|
|
||||||
{
|
|
||||||
extern bwtint_t bwa_sa2pos(const bntseq_t *bns, const bwt_t *bwt, bwtint_t sapos, int len, int *strand);
|
|
||||||
extern bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const uint8_t *seq, bwtint_t *_pos, int ext, int *n_cigar, int is_end_correct);
|
|
||||||
int strand, seq_len, i, n_gap, n_mm;
|
|
||||||
uint64_t pos3, pac_pos;
|
|
||||||
uint8_t *s[2];
|
|
||||||
|
|
||||||
memset(aln, 0, sizeof(bwa_aln_t));
|
|
||||||
seq_len = strlen(seq);
|
|
||||||
if (seq_len<<1 > buf->max_buf) {
|
|
||||||
buf->max_buf = seq_len<<1;
|
|
||||||
kroundup32(buf->max_buf);
|
|
||||||
buf->buf = realloc(buf->buf, buf->max_buf);
|
|
||||||
}
|
|
||||||
s[0] = buf->buf;
|
|
||||||
s[1] = s[0] + seq_len;
|
|
||||||
for (i = 0; i < seq_len; ++i)
|
|
||||||
s[0][i] = s[1][i] = nst_nt4_table[(int)seq[i]];
|
|
||||||
seq_reverse(seq_len, s[1], 1);
|
|
||||||
pac_pos = bwa_sa2pos(idx->bns, idx->bwt, sa, seq_len, &strand);
|
|
||||||
if (strand) aln->flag |= 16;
|
|
||||||
if (n_gaps) { // only for gapped alignment
|
|
||||||
int n_cigar;
|
|
||||||
bwa_cigar_t *cigar16;
|
|
||||||
cigar16 = bwa_refine_gapped_core(idx->bns->l_pac, idx->pac, seq_len, s[strand], &pac_pos, strand? n_gaps : -n_gaps, &n_cigar, 1);
|
|
||||||
aln->n_cigar = n_cigar;
|
|
||||||
aln->cigar = malloc(n_cigar * 4);
|
|
||||||
for (i = 0, pos3 = pac_pos; i < n_cigar; ++i) {
|
|
||||||
int op = cigar16[i]>>14;
|
|
||||||
int len = cigar16[i]&0x3fff;
|
|
||||||
if (op == 3) op = 4; // the 16-bit CIGAR is different from the 32-bit CIGAR
|
|
||||||
aln->cigar[i] = len<<4 | op;
|
|
||||||
if (op == 0 || op == 2) pos3 += len;
|
|
||||||
}
|
|
||||||
free(cigar16);
|
|
||||||
} else { // ungapped
|
|
||||||
aln->n_cigar = 1;
|
|
||||||
aln->cigar = malloc(4);
|
|
||||||
aln->cigar[0] = seq_len<<4 | 0;
|
|
||||||
pos3 = pac_pos + seq_len;
|
|
||||||
}
|
|
||||||
aln->n_n = bns_cnt_ambi(idx->bns, pac_pos, pos3 - pac_pos, &aln->ref_id);
|
|
||||||
aln->offset = pac_pos - idx->bns->anns[aln->ref_id].offset;
|
|
||||||
if (pos3 - idx->bns->anns[aln->ref_id].offset > idx->bns->anns[aln->ref_id].len) // read mapped beyond the end of a sequence
|
|
||||||
aln->flag |= 4; // read unmapped
|
|
||||||
compute_NM(idx->pac, idx->bns->l_pac, s[strand], pac_pos, aln->n_cigar, aln->cigar, &n_mm, &n_gap);
|
|
||||||
aln->n_mm = n_mm;
|
|
||||||
aln->n_gap = n_gap;
|
|
||||||
}
|
|
||||||
|
|
||||||
/************************
|
|
||||||
* Single-end alignment *
|
|
||||||
************************/
|
|
||||||
|
|
||||||
bwa_one_t *bwa_se(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq, int gen_cigar)
|
|
||||||
{
|
|
||||||
bwa_one_t *one;
|
|
||||||
int best, cnt, i, seq_len;
|
|
||||||
|
|
||||||
seq_len = strlen(seq);
|
|
||||||
one = calloc(1, sizeof(bwa_one_t));
|
|
||||||
one->sai = bwa_sai(idx, buf, seq);
|
|
||||||
if (one->sai.n == 0) return one;
|
|
||||||
// count number of hits; randomly select one alignment
|
|
||||||
best = one->sai.sai[0].score;
|
|
||||||
for (i = cnt = 0; i < one->sai.n; ++i) {
|
|
||||||
bwa_sai1_t *p = &one->sai.sai[i];
|
|
||||||
if (p->score > best) break;
|
|
||||||
if (drand48() * (p->l - p->k + 1 + cnt) > (double)cnt) {
|
|
||||||
one->which = p;
|
|
||||||
one->sa = p->k + (bwtint_t)((p->l - p->k + 1) * drand48());
|
|
||||||
}
|
|
||||||
cnt += p->l - p->k + 1;
|
|
||||||
}
|
|
||||||
one->c1 = cnt;
|
|
||||||
for (; i < one->sai.n; ++i)
|
|
||||||
cnt += one->sai.sai[i].l - one->sai.sai[i].k + 1;
|
|
||||||
one->c2 = cnt - one->c1;
|
|
||||||
// estimate single-end mapping quality
|
|
||||||
one->mapQs = -1;
|
|
||||||
if (one->c1 == 0) one->mapQs = 23; // FIXME: is it possible?
|
|
||||||
else if (one->c1 > 1) one->mapQs = 0;
|
|
||||||
else {
|
|
||||||
int diff = one->which->n_mm + one->which->n_gapo + one->which->n_gape;
|
|
||||||
if (diff >= buf->diff_tab[seq_len]) one->mapQs = 25;
|
|
||||||
else if (one->c2 == 0) one->mapQs = 37;
|
|
||||||
}
|
|
||||||
if (one->mapQs < 0) {
|
|
||||||
cnt = (one->c2 >= 255)? 255 : one->c2;
|
|
||||||
one->mapQs = 23 < buf->logn[cnt]? 0 : 23 - buf->logn[cnt];
|
|
||||||
}
|
|
||||||
one->mapQ = one->mapQs;
|
|
||||||
// compute CIGAR on request
|
|
||||||
one->one.ref_id = -1;
|
|
||||||
if (gen_cigar) bwa_sa2aln(idx, buf, seq, one->sa, one->which->n_gapo + one->which->n_gape, &one->one);
|
|
||||||
return one;
|
|
||||||
}
|
|
||||||
|
|
||||||
void bwa_one_destroy(bwa_one_t *one)
|
|
||||||
{
|
|
||||||
free(one->sai.sai);
|
|
||||||
free(one->one.cigar);
|
|
||||||
free(one);
|
|
||||||
}
|
|
||||||
|
|
||||||
/************************
|
|
||||||
* Paired-end alignment *
|
|
||||||
************************/
|
|
||||||
|
|
||||||
void bwa_pestat(bwa_buf_t *buf, int n, bwa_one_t **o[2])
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
void bwa_pe(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq[2], bwa_one_t *o[2])
|
|
||||||
{
|
|
||||||
}
|
|
||||||
107
bwa.h
107
bwa.h
|
|
@ -1,107 +0,0 @@
|
||||||
#ifndef BWA_H_
|
|
||||||
#define BWA_H_
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
#define BWA_DEF_MAX_SCORE 2048
|
|
||||||
#define BWA_MAX_QUERY_LEN 1024
|
|
||||||
|
|
||||||
// BWA index
|
|
||||||
struct bwa_idx_t;
|
|
||||||
typedef struct bwa_idx_t bwa_idx_t;
|
|
||||||
|
|
||||||
// Buffer for BWA alignment
|
|
||||||
struct bwa_buf_t;
|
|
||||||
typedef struct bwa_buf_t bwa_buf_t;
|
|
||||||
|
|
||||||
// BWA alignment options
|
|
||||||
typedef struct {
|
|
||||||
int s_gapo, s_gape; // gap open and extension penalties; the mismatch penalty is fixed at 3
|
|
||||||
int max_diff, max_gapo, max_gape; // max differences (-1 to use fnr for length-adjusted max diff), gap opens and gap extensions
|
|
||||||
int seed_len, max_seed_diff; // seed length and max differences allowed in the seed
|
|
||||||
float fnr; // parameter for automatic length-adjusted max differences
|
|
||||||
} bwa_opt_t;
|
|
||||||
|
|
||||||
// default BWA alignment options
|
|
||||||
extern bwa_opt_t bwa_def_opt; // = { 11, 4, -1, 1, 6, 32, 2, 0.04 }
|
|
||||||
|
|
||||||
// an interval hit in the SA coordinate; basic unit in .sai files
|
|
||||||
typedef struct {
|
|
||||||
uint32_t n_mm:16, n_gapo:8, n_gape:8;
|
|
||||||
int score;
|
|
||||||
uint64_t k, l; // [k,l] is the SA interval; each interval has l-k+1 hits
|
|
||||||
} bwa_sai1_t;
|
|
||||||
|
|
||||||
// all interval hits in the SA coordinate
|
|
||||||
typedef struct {
|
|
||||||
int n; // number of interval hits
|
|
||||||
bwa_sai1_t *sai;
|
|
||||||
} bwa_sai_t;
|
|
||||||
|
|
||||||
// an alignment
|
|
||||||
typedef struct {
|
|
||||||
uint32_t n_n:8, n_gap:12, n_mm:12; // number of ambiguous bases, gaps and mismatches in the alignment
|
|
||||||
int32_t ref_id; // referece sequence index (the first seq is indexed by 0)
|
|
||||||
uint32_t offset; // coordinate on the reference; zero-based
|
|
||||||
uint32_t n_cigar:16, flag:16; // number of CIGAR operations; SAM flag
|
|
||||||
uint32_t *cigar; // CIGAR in the BAM 28+4 encoding; having n_cigar operations
|
|
||||||
} bwa_aln_t;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int mapQs, mapQ, c1, c2;
|
|
||||||
uint64_t sa;
|
|
||||||
bwa_sai1_t *which;
|
|
||||||
bwa_sai_t sai;
|
|
||||||
bwa_aln_t one;
|
|
||||||
} bwa_one_t;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
double avg, std, ap_prior;
|
|
||||||
uint64_t low, high, high_bayesian;
|
|
||||||
} bwa_pestat_t;
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// load a BWA index
|
|
||||||
bwa_idx_t *bwa_idx_load(const char *prefix);
|
|
||||||
void bwa_idx_destroy(bwa_idx_t *p);
|
|
||||||
|
|
||||||
// allocate a BWA alignment buffer; if unsure, set opt to &bwa_def_opt and max_score to BWA_DEF_MAX_SCORE
|
|
||||||
bwa_buf_t *bwa_buf_init(const bwa_opt_t *opt, int max_score);
|
|
||||||
void bwa_buf_destroy(bwa_buf_t *p);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find all the SA intervals
|
|
||||||
*
|
|
||||||
* @param idx BWA index; multiple threads can share the same index
|
|
||||||
* @param buf BWA alignment buffer; each thread should have its own buffer
|
|
||||||
* @param seq NULL terminated C string, consisting of A/C/G/T/N only
|
|
||||||
*
|
|
||||||
* @return SA intervals seq is matched to
|
|
||||||
*/
|
|
||||||
bwa_sai_t bwa_sai(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Construct an alignment in the base-pair coordinate
|
|
||||||
*
|
|
||||||
* @param idx BWA index
|
|
||||||
* @param buf BWA alignment buffer
|
|
||||||
* @param seq NULL terinated C string
|
|
||||||
* @param sa Suffix array value
|
|
||||||
* @param n_gaps Number of gaps (typically equal to bwa_sai1_t::n_gapo + bwa_sai1_t::n_gape
|
|
||||||
*
|
|
||||||
* @return An alignment
|
|
||||||
*/
|
|
||||||
void bwa_sa2aln(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq, uint64_t sa, int n_gaps, bwa_aln_t *aln);
|
|
||||||
|
|
||||||
bwa_one_t *bwa_se(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq, int gen_cigar);
|
|
||||||
|
|
||||||
void bwa_one_destroy(bwa_one_t *one);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
@ -4,6 +4,7 @@
|
||||||
#include "kstring.h"
|
#include "kstring.h"
|
||||||
#include "bwamem.h"
|
#include "bwamem.h"
|
||||||
#include "kvec.h"
|
#include "kvec.h"
|
||||||
|
#include "utils.h"
|
||||||
|
|
||||||
#define MIN_RATIO 0.8
|
#define MIN_RATIO 0.8
|
||||||
#define MIN_DIR_CNT 10
|
#define MIN_DIR_CNT 10
|
||||||
|
|
@ -13,9 +14,6 @@
|
||||||
#define MAX_STDDEV 4.0
|
#define MAX_STDDEV 4.0
|
||||||
#define EXT_STDDEV 4.0
|
#define EXT_STDDEV 4.0
|
||||||
|
|
||||||
typedef kvec_t(uint64_t) vec64_t;
|
|
||||||
|
|
||||||
extern void ks_introsort_uint64_t(size_t n, uint64_t *a);
|
|
||||||
void bwa_hit2sam(kstring_t *str, const int8_t mat[25], int q, int r, int w, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, bwahit_t *p, int is_hard);
|
void bwa_hit2sam(kstring_t *str, const int8_t mat[25], int q, int r, int w, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, bwahit_t *p, int is_hard);
|
||||||
|
|
||||||
static int cal_sub(const mem_opt_t *opt, mem_alnreg_v *r)
|
static int cal_sub(const mem_opt_t *opt, mem_alnreg_v *r)
|
||||||
|
|
@ -34,9 +32,8 @@ static int cal_sub(const mem_opt_t *opt, mem_alnreg_v *r)
|
||||||
|
|
||||||
void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v *regs, mem_pestat_t pes[4])
|
void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v *regs, mem_pestat_t pes[4])
|
||||||
{
|
{
|
||||||
extern void ks_introsort_uint64_t(size_t n, uint64_t *a);
|
|
||||||
int i, d, max;
|
int i, d, max;
|
||||||
vec64_t isize[4];
|
uint64_v isize[4];
|
||||||
memset(pes, 0, 4 * sizeof(mem_pestat_t));
|
memset(pes, 0, 4 * sizeof(mem_pestat_t));
|
||||||
memset(isize, 0, sizeof(kvec_t(int)) * 4);
|
memset(isize, 0, sizeof(kvec_t(int)) * 4);
|
||||||
for (i = 0; i < n>>1; ++i) {
|
for (i = 0; i < n>>1; ++i) {
|
||||||
|
|
@ -58,14 +55,14 @@ void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v *
|
||||||
if (mem_verbose >= 3) fprintf(stderr, "[M::%s] # candidate unique pairs for (FF, FR, RF, RR): (%ld, %ld, %ld, %ld)\n", __func__, isize[0].n, isize[1].n, isize[2].n, isize[3].n);
|
if (mem_verbose >= 3) fprintf(stderr, "[M::%s] # candidate unique pairs for (FF, FR, RF, RR): (%ld, %ld, %ld, %ld)\n", __func__, isize[0].n, isize[1].n, isize[2].n, isize[3].n);
|
||||||
for (d = 0; d < 4; ++d) { // TODO: this block is nearly identical to the one in bwtsw2_pair.c. It would be better to merge these two.
|
for (d = 0; d < 4; ++d) { // TODO: this block is nearly identical to the one in bwtsw2_pair.c. It would be better to merge these two.
|
||||||
mem_pestat_t *r = &pes[d];
|
mem_pestat_t *r = &pes[d];
|
||||||
vec64_t *q = &isize[d];
|
uint64_v *q = &isize[d];
|
||||||
int p25, p50, p75, x;
|
int p25, p50, p75, x;
|
||||||
if (q->n < MIN_DIR_CNT) {
|
if (q->n < MIN_DIR_CNT) {
|
||||||
fprintf(stderr, "[M::%s] skip orientation %c%c as there are not enough pairs\n", __func__, "FR"[d>>1&1], "FR"[d&1]);
|
fprintf(stderr, "[M::%s] skip orientation %c%c as there are not enough pairs\n", __func__, "FR"[d>>1&1], "FR"[d&1]);
|
||||||
r->failed = 1;
|
r->failed = 1;
|
||||||
continue;
|
continue;
|
||||||
} else fprintf(stderr, "[M::%s] analyzing insert size distribution for orientation %c%c...\n", __func__, "FR"[d>>1&1], "FR"[d&1]);
|
} else fprintf(stderr, "[M::%s] analyzing insert size distribution for orientation %c%c...\n", __func__, "FR"[d>>1&1], "FR"[d&1]);
|
||||||
ks_introsort_uint64_t(q->n, q->a);
|
ks_introsort_64(q->n, q->a);
|
||||||
p25 = q->a[(int)(.25 * q->n + .499)];
|
p25 = q->a[(int)(.25 * q->n + .499)];
|
||||||
p50 = q->a[(int)(.50 * q->n + .499)];
|
p50 = q->a[(int)(.50 * q->n + .499)];
|
||||||
p75 = q->a[(int)(.75 * q->n + .499)];
|
p75 = q->a[(int)(.75 * q->n + .499)];
|
||||||
|
|
@ -102,7 +99,7 @@ void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v *
|
||||||
|
|
||||||
void mem_pair(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2], bwahit_t h[2])
|
void mem_pair(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2], bwahit_t h[2])
|
||||||
{
|
{
|
||||||
vec64_t v;
|
uint64_v v;
|
||||||
int r, i, y[4]; // y[] keeps the last hit
|
int r, i, y[4]; // y[] keeps the last hit
|
||||||
kv_init(v);
|
kv_init(v);
|
||||||
for (r = 0; r < 2; ++r) {
|
for (r = 0; r < 2; ++r) {
|
||||||
|
|
@ -113,7 +110,7 @@ void mem_pair(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, con
|
||||||
kv_push(uint64_t, v, key);
|
kv_push(uint64_t, v, key);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ks_introsort_uint64_t(v.n, v.a);
|
ks_introsort_64(v.n, v.a);
|
||||||
y[0] = y[1] = y[2] = y[3] = -1;
|
y[0] = y[1] = y[2] = y[3] = -1;
|
||||||
printf("**** %ld\n", v.n);
|
printf("**** %ld\n", v.n);
|
||||||
for (i = 0; i < v.n; ++i) {
|
for (i = 0; i < v.n; ++i) {
|
||||||
|
|
|
||||||
33
bwape.c
33
bwape.c
|
|
@ -21,24 +21,15 @@ typedef struct {
|
||||||
bwtint_t low, high, high_bayesian;
|
bwtint_t low, high, high_bayesian;
|
||||||
} isize_info_t;
|
} isize_info_t;
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
uint64_t x, y;
|
|
||||||
} b128_t;
|
|
||||||
|
|
||||||
#define b128_lt(a, b) ((a).x < (b).x)
|
|
||||||
#define b128_eq(a, b) ((a).x == (b).x && (a).y == (b).y)
|
#define b128_eq(a, b) ((a).x == (b).x && (a).y == (b).y)
|
||||||
#define b128_hash(a) ((uint32_t)(a).x)
|
#define b128_hash(a) ((uint32_t)(a).x)
|
||||||
|
|
||||||
#include "khash.h"
|
#include "khash.h"
|
||||||
KHASH_INIT(b128, b128_t, poslist_t, 1, b128_hash, b128_eq)
|
KHASH_INIT(b128, pair64_t, poslist_t, 1, b128_hash, b128_eq)
|
||||||
|
|
||||||
#include "ksort.h"
|
|
||||||
KSORT_INIT(b128, b128_t, b128_lt)
|
|
||||||
KSORT_INIT_GENERIC(uint64_t)
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
kvec_t(b128_t) arr;
|
pair64_v arr;
|
||||||
kvec_t(b128_t) pos[2];
|
pair64_v pos[2];
|
||||||
kvec_t(bwt_aln1_t) aln[2];
|
kvec_t(bwt_aln1_t) aln[2];
|
||||||
} pe_data_t;
|
} pe_data_t;
|
||||||
|
|
||||||
|
|
@ -120,7 +111,7 @@ static int infer_isize(int n_seqs, bwa_seq_t *seqs[2], isize_info_t *ii, double
|
||||||
free(isizes);
|
free(isizes);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
ks_introsort(uint64_t, tot, isizes);
|
ks_introsort_64(tot, isizes);
|
||||||
p25 = isizes[(int)(tot*0.25 + 0.5)];
|
p25 = isizes[(int)(tot*0.25 + 0.5)];
|
||||||
p50 = isizes[(int)(tot*0.50 + 0.5)];
|
p50 = isizes[(int)(tot*0.50 + 0.5)];
|
||||||
p75 = isizes[(int)(tot*0.75 + 0.5)];
|
p75 = isizes[(int)(tot*0.75 + 0.5)];
|
||||||
|
|
@ -170,7 +161,7 @@ static int pairing(bwa_seq_t *p[2], pe_data_t *d, const pe_opt_t *opt, int s_mm,
|
||||||
{
|
{
|
||||||
int i, j, o_n, subo_n, cnt_chg = 0, low_bound = ii->low, max_len;
|
int i, j, o_n, subo_n, cnt_chg = 0, low_bound = ii->low, max_len;
|
||||||
uint64_t o_score, subo_score;
|
uint64_t o_score, subo_score;
|
||||||
b128_t last_pos[2][2], o_pos[2];
|
pair64_t last_pos[2][2], o_pos[2];
|
||||||
max_len = p[0]->full_len;
|
max_len = p[0]->full_len;
|
||||||
if (max_len < p[1]->full_len) max_len = p[1]->full_len;
|
if (max_len < p[1]->full_len) max_len = p[1]->full_len;
|
||||||
if (low_bound < max_len) low_bound = max_len;
|
if (low_bound < max_len) low_bound = max_len;
|
||||||
|
|
@ -206,11 +197,11 @@ static int pairing(bwa_seq_t *p[2], pe_data_t *d, const pe_opt_t *opt, int s_mm,
|
||||||
|
|
||||||
o_score = subo_score = (uint64_t)-1;
|
o_score = subo_score = (uint64_t)-1;
|
||||||
o_n = subo_n = 0;
|
o_n = subo_n = 0;
|
||||||
ks_introsort(b128, d->arr.n, d->arr.a);
|
ks_introsort_128(d->arr.n, d->arr.a);
|
||||||
for (j = 0; j < 2; ++j) last_pos[j][0].x = last_pos[j][0].y = last_pos[j][1].x = last_pos[j][1].y = (uint64_t)-1;
|
for (j = 0; j < 2; ++j) last_pos[j][0].x = last_pos[j][0].y = last_pos[j][1].x = last_pos[j][1].y = (uint64_t)-1;
|
||||||
if (opt->type == BWA_PET_STD) {
|
if (opt->type == BWA_PET_STD) {
|
||||||
for (i = 0; i < d->arr.n; ++i) {
|
for (i = 0; i < d->arr.n; ++i) {
|
||||||
b128_t x = d->arr.a[i];
|
pair64_t x = d->arr.a[i];
|
||||||
int strand = x.y>>1&1;
|
int strand = x.y>>1&1;
|
||||||
if (strand == 1) { // reverse strand, then check
|
if (strand == 1) { // reverse strand, then check
|
||||||
int y = 1 - (x.y&1);
|
int y = 1 - (x.y&1);
|
||||||
|
|
@ -223,7 +214,7 @@ static int pairing(bwa_seq_t *p[2], pe_data_t *d, const pe_opt_t *opt, int s_mm,
|
||||||
}
|
}
|
||||||
} else if (opt->type == BWA_PET_SOLID) {
|
} else if (opt->type == BWA_PET_SOLID) {
|
||||||
for (i = 0; i < d->arr.n; ++i) {
|
for (i = 0; i < d->arr.n; ++i) {
|
||||||
b128_t x = d->arr.a[i];
|
pair64_t x = d->arr.a[i];
|
||||||
int strand = x.y>>1&1;
|
int strand = x.y>>1&1;
|
||||||
if ((strand^x.y)&1) { // push
|
if ((strand^x.y)&1) { // push
|
||||||
int y = 1 - (x.y&1);
|
int y = 1 - (x.y&1);
|
||||||
|
|
@ -345,7 +336,7 @@ int bwa_cal_pac_pos_pe(const bntseq_t *bns, const char *prefix, bwt_t *const _bw
|
||||||
if ((p[0]->type == BWA_TYPE_UNIQUE || p[0]->type == BWA_TYPE_REPEAT)
|
if ((p[0]->type == BWA_TYPE_UNIQUE || p[0]->type == BWA_TYPE_REPEAT)
|
||||||
&& (p[1]->type == BWA_TYPE_UNIQUE || p[1]->type == BWA_TYPE_REPEAT))
|
&& (p[1]->type == BWA_TYPE_UNIQUE || p[1]->type == BWA_TYPE_REPEAT))
|
||||||
{ // only when both ends mapped
|
{ // only when both ends mapped
|
||||||
b128_t x;
|
pair64_t x;
|
||||||
int j, k;
|
int j, k;
|
||||||
long long n_occ[2];
|
long long n_occ[2];
|
||||||
for (j = 0; j < 2; ++j) {
|
for (j = 0; j < 2; ++j) {
|
||||||
|
|
@ -360,7 +351,7 @@ int bwa_cal_pac_pos_pe(const bntseq_t *bns, const char *prefix, bwt_t *const _bw
|
||||||
bwt_aln1_t *r = d->aln[j].a + k;
|
bwt_aln1_t *r = d->aln[j].a + k;
|
||||||
bwtint_t l;
|
bwtint_t l;
|
||||||
if (0 && r->l - r->k + 1 >= MIN_HASH_WIDTH) { // then check hash table
|
if (0 && r->l - r->k + 1 >= MIN_HASH_WIDTH) { // then check hash table
|
||||||
b128_t key;
|
pair64_t key;
|
||||||
int ret;
|
int ret;
|
||||||
key.x = r->k; key.y = r->l;
|
key.x = r->k; key.y = r->l;
|
||||||
khint_t iter = kh_put(b128, g_hash, key, &ret);
|
khint_t iter = kh_put(b128, g_hash, key, &ret);
|
||||||
|
|
@ -377,14 +368,14 @@ int bwa_cal_pac_pos_pe(const bntseq_t *bns, const char *prefix, bwt_t *const _bw
|
||||||
for (l = 0; l < kh_val(g_hash, iter).n; ++l) {
|
for (l = 0; l < kh_val(g_hash, iter).n; ++l) {
|
||||||
x.x = kh_val(g_hash, iter).a[l]>>1;
|
x.x = kh_val(g_hash, iter).a[l]>>1;
|
||||||
x.y = k<<2 | (kh_val(g_hash, iter).a[l]&1)<<1 | j;
|
x.y = k<<2 | (kh_val(g_hash, iter).a[l]&1)<<1 | j;
|
||||||
kv_push(b128_t, d->arr, x);
|
kv_push(pair64_t, d->arr, x);
|
||||||
}
|
}
|
||||||
} else { // then calculate on the fly
|
} else { // then calculate on the fly
|
||||||
for (l = r->k; l <= r->l; ++l) {
|
for (l = r->k; l <= r->l; ++l) {
|
||||||
int strand;
|
int strand;
|
||||||
x.x = bwa_sa2pos(bns, bwt, l, p[j]->len, &strand);
|
x.x = bwa_sa2pos(bns, bwt, l, p[j]->len, &strand);
|
||||||
x.y = k<<2 | strand<<1 | j;
|
x.y = k<<2 | strand<<1 | j;
|
||||||
kv_push(b128_t, d->arr, x);
|
kv_push(pair64_t, d->arr, x);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@
|
||||||
#include "bntseq.h"
|
#include "bntseq.h"
|
||||||
#include "bwtsw2.h"
|
#include "bwtsw2.h"
|
||||||
#include "kstring.h"
|
#include "kstring.h"
|
||||||
|
#include "utils.h"
|
||||||
#ifndef _NO_SSE2
|
#ifndef _NO_SSE2
|
||||||
#include "ksw.h"
|
#include "ksw.h"
|
||||||
#else
|
#else
|
||||||
|
|
@ -24,7 +25,6 @@ typedef struct {
|
||||||
|
|
||||||
bsw2pestat_t bsw2_stat(int n, bwtsw2_t **buf, kstring_t *msg, int max_ins)
|
bsw2pestat_t bsw2_stat(int n, bwtsw2_t **buf, kstring_t *msg, int max_ins)
|
||||||
{
|
{
|
||||||
extern void ks_introsort_uint64_t(size_t n, uint64_t *a);
|
|
||||||
int i, k, x, p25, p50, p75, tmp, max_len = 0;
|
int i, k, x, p25, p50, p75, tmp, max_len = 0;
|
||||||
uint64_t *isize;
|
uint64_t *isize;
|
||||||
bsw2pestat_t r;
|
bsw2pestat_t r;
|
||||||
|
|
@ -44,7 +44,7 @@ bsw2pestat_t bsw2_stat(int n, bwtsw2_t **buf, kstring_t *msg, int max_ins)
|
||||||
max_len = max_len > t[1]->end - t[1]->beg? max_len : t[1]->end - t[1]->beg;
|
max_len = max_len > t[1]->end - t[1]->beg? max_len : t[1]->end - t[1]->beg;
|
||||||
isize[k++] = l;
|
isize[k++] = l;
|
||||||
}
|
}
|
||||||
ks_introsort_uint64_t(k, isize);
|
ks_introsort_64(k, isize);
|
||||||
p25 = isize[(int)(.25 * k + .499)];
|
p25 = isize[(int)(.25 * k + .499)];
|
||||||
p50 = isize[(int)(.50 * k + .499)];
|
p50 = isize[(int)(.50 * k + .499)];
|
||||||
p75 = isize[(int)(.75 * k + .499)];
|
p75 = isize[(int)(.75 * k + .499)];
|
||||||
|
|
|
||||||
282
khash.h
282
khash.h
|
|
@ -1,6 +1,6 @@
|
||||||
/* The MIT License
|
/* The MIT License
|
||||||
|
|
||||||
Copyright (c) 2008, 2009 by attractor <attractor@live.co.uk>
|
Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor@live.co.uk>
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
a copy of this software and associated documentation files (the
|
a copy of this software and associated documentation files (the
|
||||||
|
|
@ -33,7 +33,6 @@ int main() {
|
||||||
khiter_t k;
|
khiter_t k;
|
||||||
khash_t(32) *h = kh_init(32);
|
khash_t(32) *h = kh_init(32);
|
||||||
k = kh_put(32, h, 5, &ret);
|
k = kh_put(32, h, 5, &ret);
|
||||||
if (!ret) kh_del(32, h, k);
|
|
||||||
kh_value(h, k) = 10;
|
kh_value(h, k) = 10;
|
||||||
k = kh_get(32, h, 10);
|
k = kh_get(32, h, 10);
|
||||||
is_missing = (k == kh_end(h));
|
is_missing = (k == kh_end(h));
|
||||||
|
|
@ -47,6 +46,29 @@ int main() {
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
2011-12-29 (0.2.7):
|
||||||
|
|
||||||
|
* Minor code clean up; no actual effect.
|
||||||
|
|
||||||
|
2011-09-16 (0.2.6):
|
||||||
|
|
||||||
|
* The capacity is a power of 2. This seems to dramatically improve the
|
||||||
|
speed for simple keys. Thank Zilong Tan for the suggestion. Reference:
|
||||||
|
|
||||||
|
- http://code.google.com/p/ulib/
|
||||||
|
- http://nothings.org/computer/judy/
|
||||||
|
|
||||||
|
* Allow to optionally use linear probing which usually has better
|
||||||
|
performance for random input. Double hashing is still the default as it
|
||||||
|
is more robust to certain non-random input.
|
||||||
|
|
||||||
|
* Added Wang's integer hash function (not used by default). This hash
|
||||||
|
function is more robust to certain non-random input.
|
||||||
|
|
||||||
|
2011-02-14 (0.2.5):
|
||||||
|
|
||||||
|
* Allow to declare global functions.
|
||||||
|
|
||||||
2009-09-26 (0.2.4):
|
2009-09-26 (0.2.4):
|
||||||
|
|
||||||
* Improve portability
|
* Improve portability
|
||||||
|
|
@ -86,11 +108,9 @@ int main() {
|
||||||
@header
|
@header
|
||||||
|
|
||||||
Generic hash table library.
|
Generic hash table library.
|
||||||
|
|
||||||
@copyright Heng Li
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define AC_VERSION_KHASH_H "0.2.4"
|
#define AC_VERSION_KHASH_H "0.2.6"
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
@ -111,24 +131,14 @@ typedef unsigned long long khint64_t;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#define inline __inline
|
#define kh_inline __inline
|
||||||
|
#else
|
||||||
|
#define kh_inline inline
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef khint32_t khint_t;
|
typedef khint32_t khint_t;
|
||||||
typedef khint_t khiter_t;
|
typedef khint_t khiter_t;
|
||||||
|
|
||||||
#define __ac_HASH_PRIME_SIZE 32
|
|
||||||
static const khint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] =
|
|
||||||
{
|
|
||||||
0ul, 3ul, 11ul, 23ul, 53ul,
|
|
||||||
97ul, 193ul, 389ul, 769ul, 1543ul,
|
|
||||||
3079ul, 6151ul, 12289ul, 24593ul, 49157ul,
|
|
||||||
98317ul, 196613ul, 393241ul, 786433ul, 1572869ul,
|
|
||||||
3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul,
|
|
||||||
100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul,
|
|
||||||
3221225473ul, 4294967291ul
|
|
||||||
};
|
|
||||||
|
|
||||||
#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
|
#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
|
||||||
#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
|
#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
|
||||||
#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
|
#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
|
||||||
|
|
@ -137,88 +147,128 @@ static const khint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] =
|
||||||
#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
|
#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
|
||||||
#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
|
#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
|
||||||
|
|
||||||
|
#ifdef KHASH_LINEAR
|
||||||
|
#define __ac_inc(k, m) 1
|
||||||
|
#else
|
||||||
|
#define __ac_inc(k, m) (((k)>>3 ^ (k)<<3) | 1) & (m)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4)
|
||||||
|
|
||||||
|
#ifndef kroundup32
|
||||||
|
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef kcalloc
|
||||||
|
#define kcalloc(N,Z) calloc(N,Z)
|
||||||
|
#endif
|
||||||
|
#ifndef kmalloc
|
||||||
|
#define kmalloc(Z) malloc(Z)
|
||||||
|
#endif
|
||||||
|
#ifndef krealloc
|
||||||
|
#define krealloc(P,Z) realloc(P,Z)
|
||||||
|
#endif
|
||||||
|
#ifndef kfree
|
||||||
|
#define kfree(P) free(P)
|
||||||
|
#endif
|
||||||
|
|
||||||
static const double __ac_HASH_UPPER = 0.77;
|
static const double __ac_HASH_UPPER = 0.77;
|
||||||
|
|
||||||
#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
|
#define __KHASH_TYPE(name, khkey_t, khval_t) \
|
||||||
typedef struct { \
|
typedef struct { \
|
||||||
khint_t n_buckets, size, n_occupied, upper_bound; \
|
khint_t n_buckets, size, n_occupied, upper_bound; \
|
||||||
khint32_t *flags; \
|
khint32_t *flags; \
|
||||||
khkey_t *keys; \
|
khkey_t *keys; \
|
||||||
khval_t *vals; \
|
khval_t *vals; \
|
||||||
} kh_##name##_t; \
|
} kh_##name##_t;
|
||||||
static inline kh_##name##_t *kh_init_##name() { \
|
|
||||||
return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t)); \
|
#define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \
|
||||||
|
extern kh_##name##_t *kh_init_##name(void); \
|
||||||
|
extern void kh_destroy_##name(kh_##name##_t *h); \
|
||||||
|
extern void kh_clear_##name(kh_##name##_t *h); \
|
||||||
|
extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \
|
||||||
|
extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
|
||||||
|
extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \
|
||||||
|
extern void kh_del_##name(kh_##name##_t *h, khint_t x);
|
||||||
|
|
||||||
|
#define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
|
||||||
|
SCOPE kh_##name##_t *kh_init_##name(void) { \
|
||||||
|
return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t)); \
|
||||||
} \
|
} \
|
||||||
static inline void kh_destroy_##name(kh_##name##_t *h) \
|
SCOPE void kh_destroy_##name(kh_##name##_t *h) \
|
||||||
{ \
|
{ \
|
||||||
if (h) { \
|
if (h) { \
|
||||||
free(h->keys); free(h->flags); \
|
kfree((void *)h->keys); kfree(h->flags); \
|
||||||
free(h->vals); \
|
kfree((void *)h->vals); \
|
||||||
free(h); \
|
kfree(h); \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
static inline void kh_clear_##name(kh_##name##_t *h) \
|
SCOPE void kh_clear_##name(kh_##name##_t *h) \
|
||||||
{ \
|
{ \
|
||||||
if (h && h->flags) { \
|
if (h && h->flags) { \
|
||||||
memset(h->flags, 0xaa, ((h->n_buckets>>4) + 1) * sizeof(khint32_t)); \
|
memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \
|
||||||
h->size = h->n_occupied = 0; \
|
h->size = h->n_occupied = 0; \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
static inline khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
|
SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
|
||||||
{ \
|
{ \
|
||||||
if (h->n_buckets) { \
|
if (h->n_buckets) { \
|
||||||
khint_t inc, k, i, last; \
|
khint_t inc, k, i, last, mask; \
|
||||||
k = __hash_func(key); i = k % h->n_buckets; \
|
mask = h->n_buckets - 1; \
|
||||||
inc = 1 + k % (h->n_buckets - 1); last = i; \
|
k = __hash_func(key); i = k & mask; \
|
||||||
|
inc = __ac_inc(k, mask); last = i; /* inc==1 for linear probing */ \
|
||||||
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
|
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
|
||||||
if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \
|
i = (i + inc) & mask; \
|
||||||
else i += inc; \
|
|
||||||
if (i == last) return h->n_buckets; \
|
if (i == last) return h->n_buckets; \
|
||||||
} \
|
} \
|
||||||
return __ac_iseither(h->flags, i)? h->n_buckets : i; \
|
return __ac_iseither(h->flags, i)? h->n_buckets : i; \
|
||||||
} else return 0; \
|
} else return 0; \
|
||||||
} \
|
} \
|
||||||
static inline void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
|
SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
|
||||||
{ \
|
{ /* This function uses 0.25*n_bucktes bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \
|
||||||
khint32_t *new_flags = 0; \
|
khint32_t *new_flags = 0; \
|
||||||
khint_t j = 1; \
|
khint_t j = 1; \
|
||||||
{ \
|
{ \
|
||||||
khint_t t = __ac_HASH_PRIME_SIZE - 1; \
|
kroundup32(new_n_buckets); \
|
||||||
while (__ac_prime_list[t] > new_n_buckets) --t; \
|
if (new_n_buckets < 4) new_n_buckets = 4; \
|
||||||
new_n_buckets = __ac_prime_list[t+1]; \
|
if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \
|
||||||
if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; \
|
else { /* hash table size to be changed (shrink or expand); rehash */ \
|
||||||
else { \
|
new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
|
||||||
new_flags = (khint32_t*)malloc(((new_n_buckets>>4) + 1) * sizeof(khint32_t)); \
|
if (!new_flags) return -1; \
|
||||||
memset(new_flags, 0xaa, ((new_n_buckets>>4) + 1) * sizeof(khint32_t)); \
|
memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
|
||||||
if (h->n_buckets < new_n_buckets) { \
|
if (h->n_buckets < new_n_buckets) { /* expand */ \
|
||||||
h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
|
khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
|
||||||
if (kh_is_map) \
|
if (!new_keys) return -1; \
|
||||||
h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
|
h->keys = new_keys; \
|
||||||
} \
|
if (kh_is_map) { \
|
||||||
|
khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
|
||||||
|
if (!new_vals) return -1; \
|
||||||
|
h->vals = new_vals; \
|
||||||
|
} \
|
||||||
|
} /* otherwise shrink */ \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
if (j) { \
|
if (j) { /* rehashing is needed */ \
|
||||||
for (j = 0; j != h->n_buckets; ++j) { \
|
for (j = 0; j != h->n_buckets; ++j) { \
|
||||||
if (__ac_iseither(h->flags, j) == 0) { \
|
if (__ac_iseither(h->flags, j) == 0) { \
|
||||||
khkey_t key = h->keys[j]; \
|
khkey_t key = h->keys[j]; \
|
||||||
khval_t val; \
|
khval_t val; \
|
||||||
|
khint_t new_mask; \
|
||||||
|
new_mask = new_n_buckets - 1; \
|
||||||
if (kh_is_map) val = h->vals[j]; \
|
if (kh_is_map) val = h->vals[j]; \
|
||||||
__ac_set_isdel_true(h->flags, j); \
|
__ac_set_isdel_true(h->flags, j); \
|
||||||
while (1) { \
|
while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
|
||||||
khint_t inc, k, i; \
|
khint_t inc, k, i; \
|
||||||
k = __hash_func(key); \
|
k = __hash_func(key); \
|
||||||
i = k % new_n_buckets; \
|
i = k & new_mask; \
|
||||||
inc = 1 + k % (new_n_buckets - 1); \
|
inc = __ac_inc(k, new_mask); \
|
||||||
while (!__ac_isempty(new_flags, i)) { \
|
while (!__ac_isempty(new_flags, i)) i = (i + inc) & new_mask; \
|
||||||
if (i + inc >= new_n_buckets) i = i + inc - new_n_buckets; \
|
|
||||||
else i += inc; \
|
|
||||||
} \
|
|
||||||
__ac_set_isempty_false(new_flags, i); \
|
__ac_set_isempty_false(new_flags, i); \
|
||||||
if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { \
|
if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \
|
||||||
{ khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
|
{ khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
|
||||||
if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
|
if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
|
||||||
__ac_set_isdel_true(h->flags, i); \
|
__ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \
|
||||||
} else { \
|
} else { /* write the element and jump out of the loop */ \
|
||||||
h->keys[i] = key; \
|
h->keys[i] = key; \
|
||||||
if (kh_is_map) h->vals[i] = val; \
|
if (kh_is_map) h->vals[i] = val; \
|
||||||
break; \
|
break; \
|
||||||
|
|
@ -226,35 +276,39 @@ static const double __ac_HASH_UPPER = 0.77;
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
if (h->n_buckets > new_n_buckets) { \
|
if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \
|
||||||
h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
|
h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
|
||||||
if (kh_is_map) \
|
if (kh_is_map) h->vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
|
||||||
h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
|
|
||||||
} \
|
} \
|
||||||
free(h->flags); \
|
kfree(h->flags); /* free the working space */ \
|
||||||
h->flags = new_flags; \
|
h->flags = new_flags; \
|
||||||
h->n_buckets = new_n_buckets; \
|
h->n_buckets = new_n_buckets; \
|
||||||
h->n_occupied = h->size; \
|
h->n_occupied = h->size; \
|
||||||
h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
|
h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
|
||||||
} \
|
} \
|
||||||
|
return 0; \
|
||||||
} \
|
} \
|
||||||
static inline khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
|
SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
|
||||||
{ \
|
{ \
|
||||||
khint_t x; \
|
khint_t x; \
|
||||||
if (h->n_occupied >= h->upper_bound) { \
|
if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \
|
||||||
if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); \
|
if (h->n_buckets > (h->size<<1)) { \
|
||||||
else kh_resize_##name(h, h->n_buckets + 1); \
|
if (kh_resize_##name(h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */ \
|
||||||
} \
|
*ret = -1; return h->n_buckets; \
|
||||||
|
} \
|
||||||
|
} else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the hash table */ \
|
||||||
|
*ret = -1; return h->n_buckets; \
|
||||||
|
} \
|
||||||
|
} /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
|
||||||
{ \
|
{ \
|
||||||
khint_t inc, k, i, site, last; \
|
khint_t inc, k, i, site, last, mask = h->n_buckets - 1; \
|
||||||
x = site = h->n_buckets; k = __hash_func(key); i = k % h->n_buckets; \
|
x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \
|
||||||
if (__ac_isempty(h->flags, i)) x = i; \
|
if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \
|
||||||
else { \
|
else { \
|
||||||
inc = 1 + k % (h->n_buckets - 1); last = i; \
|
inc = __ac_inc(k, mask); last = i; \
|
||||||
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
|
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
|
||||||
if (__ac_isdel(h->flags, i)) site = i; \
|
if (__ac_isdel(h->flags, i)) site = i; \
|
||||||
if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \
|
i = (i + inc) & mask; \
|
||||||
else i += inc; \
|
|
||||||
if (i == last) { x = site; break; } \
|
if (i == last) { x = site; break; } \
|
||||||
} \
|
} \
|
||||||
if (x == h->n_buckets) { \
|
if (x == h->n_buckets) { \
|
||||||
|
|
@ -263,20 +317,20 @@ static const double __ac_HASH_UPPER = 0.77;
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
if (__ac_isempty(h->flags, x)) { \
|
if (__ac_isempty(h->flags, x)) { /* not present at all */ \
|
||||||
h->keys[x] = key; \
|
h->keys[x] = key; \
|
||||||
__ac_set_isboth_false(h->flags, x); \
|
__ac_set_isboth_false(h->flags, x); \
|
||||||
++h->size; ++h->n_occupied; \
|
++h->size; ++h->n_occupied; \
|
||||||
*ret = 1; \
|
*ret = 1; \
|
||||||
} else if (__ac_isdel(h->flags, x)) { \
|
} else if (__ac_isdel(h->flags, x)) { /* deleted */ \
|
||||||
h->keys[x] = key; \
|
h->keys[x] = key; \
|
||||||
__ac_set_isboth_false(h->flags, x); \
|
__ac_set_isboth_false(h->flags, x); \
|
||||||
++h->size; \
|
++h->size; \
|
||||||
*ret = 2; \
|
*ret = 2; \
|
||||||
} else *ret = 0; \
|
} else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \
|
||||||
return x; \
|
return x; \
|
||||||
} \
|
} \
|
||||||
static inline void kh_del_##name(kh_##name##_t *h, khint_t x) \
|
SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \
|
||||||
{ \
|
{ \
|
||||||
if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \
|
if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \
|
||||||
__ac_set_isdel_true(h->flags, x); \
|
__ac_set_isdel_true(h->flags, x); \
|
||||||
|
|
@ -284,6 +338,17 @@ static const double __ac_HASH_UPPER = 0.77;
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define KHASH_DECLARE(name, khkey_t, khval_t) \
|
||||||
|
__KHASH_TYPE(name, khkey_t, khval_t) \
|
||||||
|
__KHASH_PROTOTYPES(name, khkey_t, khval_t)
|
||||||
|
|
||||||
|
#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
|
||||||
|
__KHASH_TYPE(name, khkey_t, khval_t) \
|
||||||
|
__KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
|
||||||
|
|
||||||
|
#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
|
||||||
|
KHASH_INIT2(name, static kh_inline, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
|
||||||
|
|
||||||
/* --- BEGIN OF HASH FUNCTIONS --- */
|
/* --- BEGIN OF HASH FUNCTIONS --- */
|
||||||
|
|
||||||
/*! @function
|
/*! @function
|
||||||
|
|
@ -311,10 +376,10 @@ static const double __ac_HASH_UPPER = 0.77;
|
||||||
@param s Pointer to a null terminated string
|
@param s Pointer to a null terminated string
|
||||||
@return The hash value
|
@return The hash value
|
||||||
*/
|
*/
|
||||||
static inline khint_t __ac_X31_hash_string(const char *s)
|
static kh_inline khint_t __ac_X31_hash_string(const char *s)
|
||||||
{
|
{
|
||||||
khint_t h = *s;
|
khint_t h = (khint_t)*s;
|
||||||
if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s;
|
if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s;
|
||||||
return h;
|
return h;
|
||||||
}
|
}
|
||||||
/*! @function
|
/*! @function
|
||||||
|
|
@ -328,9 +393,21 @@ static inline khint_t __ac_X31_hash_string(const char *s)
|
||||||
*/
|
*/
|
||||||
#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
|
#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
|
||||||
|
|
||||||
|
static kh_inline khint_t __ac_Wang_hash(khint_t key)
|
||||||
|
{
|
||||||
|
key += ~(key << 15);
|
||||||
|
key ^= (key >> 10);
|
||||||
|
key += (key << 3);
|
||||||
|
key ^= (key >> 6);
|
||||||
|
key += ~(key << 11);
|
||||||
|
key ^= (key >> 16);
|
||||||
|
return key;
|
||||||
|
}
|
||||||
|
#define kh_int_hash_func2(k) __ac_Wang_hash((khint_t)key)
|
||||||
|
|
||||||
/* --- END OF HASH FUNCTIONS --- */
|
/* --- END OF HASH FUNCTIONS --- */
|
||||||
|
|
||||||
/* Other necessary macros... */
|
/* Other convenient macros... */
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
@abstract Type of the hash table.
|
@abstract Type of the hash table.
|
||||||
|
|
@ -396,7 +473,6 @@ static inline khint_t __ac_X31_hash_string(const char *s)
|
||||||
*/
|
*/
|
||||||
#define kh_del(name, h, k) kh_del_##name(h, k)
|
#define kh_del(name, h, k) kh_del_##name(h, k)
|
||||||
|
|
||||||
|
|
||||||
/*! @function
|
/*! @function
|
||||||
@abstract Test whether a bucket contains data.
|
@abstract Test whether a bucket contains data.
|
||||||
@param h Pointer to the hash table [khash_t(name)*]
|
@param h Pointer to the hash table [khash_t(name)*]
|
||||||
|
|
@ -455,6 +531,34 @@ static inline khint_t __ac_X31_hash_string(const char *s)
|
||||||
*/
|
*/
|
||||||
#define kh_n_buckets(h) ((h)->n_buckets)
|
#define kh_n_buckets(h) ((h)->n_buckets)
|
||||||
|
|
||||||
|
/*! @function
|
||||||
|
@abstract Iterate over the entries in the hash table
|
||||||
|
@param h Pointer to the hash table [khash_t(name)*]
|
||||||
|
@param kvar Variable to which key will be assigned
|
||||||
|
@param vvar Variable to which value will be assigned
|
||||||
|
@param code Block of code to execute
|
||||||
|
*/
|
||||||
|
#define kh_foreach(h, kvar, vvar, code) { khint_t __i; \
|
||||||
|
for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
|
||||||
|
if (!kh_exist(h,__i)) continue; \
|
||||||
|
(kvar) = kh_key(h,__i); \
|
||||||
|
(vvar) = kh_val(h,__i); \
|
||||||
|
code; \
|
||||||
|
} }
|
||||||
|
|
||||||
|
/*! @function
|
||||||
|
@abstract Iterate over the values in the hash table
|
||||||
|
@param h Pointer to the hash table [khash_t(name)*]
|
||||||
|
@param vvar Variable to which value will be assigned
|
||||||
|
@param code Block of code to execute
|
||||||
|
*/
|
||||||
|
#define kh_foreach_value(h, vvar, code) { khint_t __i; \
|
||||||
|
for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
|
||||||
|
if (!kh_exist(h,__i)) continue; \
|
||||||
|
(vvar) = kh_val(h,__i); \
|
||||||
|
code; \
|
||||||
|
} }
|
||||||
|
|
||||||
/* More conenient interfaces */
|
/* More conenient interfaces */
|
||||||
|
|
||||||
/*! @function
|
/*! @function
|
||||||
|
|
|
||||||
79
utils.c
79
utils.c
|
|
@ -35,6 +35,12 @@
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
|
||||||
|
#define pair64_lt(a, b) ((a).x < (b).x || ((a).x == (b).x && (a).y < (b).y))
|
||||||
|
|
||||||
|
#include "ksort.h"
|
||||||
|
KSORT_INIT(128, pair64_t, pair64_lt)
|
||||||
|
KSORT_INIT(64, uint64_t, ks_lt_generic)
|
||||||
|
|
||||||
FILE *err_xopen_core(const char *func, const char *fn, const char *mode)
|
FILE *err_xopen_core(const char *func, const char *fn, const char *mode)
|
||||||
{
|
{
|
||||||
FILE *fp = 0;
|
FILE *fp = 0;
|
||||||
|
|
@ -46,6 +52,7 @@ FILE *err_xopen_core(const char *func, const char *fn, const char *mode)
|
||||||
}
|
}
|
||||||
return fp;
|
return fp;
|
||||||
}
|
}
|
||||||
|
|
||||||
FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp)
|
FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp)
|
||||||
{
|
{
|
||||||
if (freopen(fn, mode, fp) == 0) {
|
if (freopen(fn, mode, fp) == 0) {
|
||||||
|
|
@ -56,6 +63,7 @@ FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE
|
||||||
}
|
}
|
||||||
return fp;
|
return fp;
|
||||||
}
|
}
|
||||||
|
|
||||||
gzFile err_xzopen_core(const char *func, const char *fn, const char *mode)
|
gzFile err_xzopen_core(const char *func, const char *fn, const char *mode)
|
||||||
{
|
{
|
||||||
gzFile fp;
|
gzFile fp;
|
||||||
|
|
@ -67,6 +75,7 @@ gzFile err_xzopen_core(const char *func, const char *fn, const char *mode)
|
||||||
}
|
}
|
||||||
return fp;
|
return fp;
|
||||||
}
|
}
|
||||||
|
|
||||||
void err_fatal(const char *header, const char *fmt, ...)
|
void err_fatal(const char *header, const char *fmt, ...)
|
||||||
{
|
{
|
||||||
va_list args;
|
va_list args;
|
||||||
|
|
@ -86,66 +95,48 @@ void err_fatal_simple_core(const char *func, const char *msg)
|
||||||
|
|
||||||
size_t err_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream)
|
size_t err_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream)
|
||||||
{
|
{
|
||||||
size_t ret = fwrite(ptr, size, nmemb, stream);
|
size_t ret = fwrite(ptr, size, nmemb, stream);
|
||||||
if (ret != nmemb)
|
if (ret != nmemb)
|
||||||
{
|
err_fatal_simple_core("fwrite", strerror(errno));
|
||||||
err_fatal_simple_core("fwrite", strerror(errno));
|
return ret;
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int err_printf(const char *format, ...)
|
int err_printf(const char *format, ...)
|
||||||
{
|
{
|
||||||
va_list arg;
|
va_list arg;
|
||||||
int done;
|
int done;
|
||||||
|
va_start(arg, format);
|
||||||
va_start(arg, format);
|
done = vfprintf(stdout, format, arg);
|
||||||
done = vfprintf(stdout, format, arg);
|
int saveErrno = errno;
|
||||||
int saveErrno = errno;
|
va_end(arg);
|
||||||
va_end(arg);
|
if (done < 0) err_fatal_simple_core("vfprintf(stdout)", strerror(saveErrno));
|
||||||
|
return done;
|
||||||
if (done < 0)
|
|
||||||
{
|
|
||||||
err_fatal_simple_core("vfprintf(stdout)", strerror(saveErrno));
|
|
||||||
}
|
|
||||||
return done;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int err_fprintf(FILE *stream, const char *format, ...)
|
int err_fprintf(FILE *stream, const char *format, ...)
|
||||||
{
|
{
|
||||||
va_list arg;
|
va_list arg;
|
||||||
int done;
|
int done;
|
||||||
|
va_start(arg, format);
|
||||||
va_start(arg, format);
|
done = vfprintf(stream, format, arg);
|
||||||
done = vfprintf(stream, format, arg);
|
int saveErrno = errno;
|
||||||
int saveErrno = errno;
|
va_end(arg);
|
||||||
va_end(arg);
|
if (done < 0) err_fatal_simple_core("vfprintf", strerror(saveErrno));
|
||||||
|
return done;
|
||||||
if (done < 0)
|
|
||||||
{
|
|
||||||
err_fatal_simple_core("vfprintf", strerror(saveErrno));
|
|
||||||
}
|
|
||||||
return done;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int err_fflush(FILE *stream)
|
int err_fflush(FILE *stream)
|
||||||
{
|
{
|
||||||
int ret = fflush(stream);
|
int ret = fflush(stream);
|
||||||
if (ret != 0)
|
if (ret != 0) err_fatal_simple_core("fflush", strerror(errno));
|
||||||
{
|
return ret;
|
||||||
err_fatal_simple_core("fflush", strerror(errno));
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int err_fclose(FILE *stream)
|
int err_fclose(FILE *stream)
|
||||||
{
|
{
|
||||||
int ret = fclose(stream);
|
int ret = fclose(stream);
|
||||||
if (ret != 0)
|
if (ret != 0) err_fatal_simple_core("fclose", strerror(errno));
|
||||||
{
|
return ret;
|
||||||
err_fatal_simple_core("fclose", strerror(errno));
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
double cputime()
|
double cputime()
|
||||||
|
|
|
||||||
13
utils.h
13
utils.h
|
|
@ -28,6 +28,7 @@
|
||||||
#ifndef LH3_UTILS_H
|
#ifndef LH3_UTILS_H
|
||||||
#define LH3_UTILS_H
|
#define LH3_UTILS_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <zlib.h>
|
#include <zlib.h>
|
||||||
|
|
||||||
|
|
@ -38,14 +39,19 @@
|
||||||
#define ATTRIBUTE(list)
|
#define ATTRIBUTE(list)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#define err_fatal_simple(msg) err_fatal_simple_core(__func__, msg)
|
#define err_fatal_simple(msg) err_fatal_simple_core(__func__, msg)
|
||||||
#define xopen(fn, mode) err_xopen_core(__func__, fn, mode)
|
#define xopen(fn, mode) err_xopen_core(__func__, fn, mode)
|
||||||
#define xreopen(fn, mode, fp) err_xreopen_core(__func__, fn, mode, fp)
|
#define xreopen(fn, mode, fp) err_xreopen_core(__func__, fn, mode, fp)
|
||||||
#define xzopen(fn, mode) err_xzopen_core(__func__, fn, mode)
|
#define xzopen(fn, mode) err_xzopen_core(__func__, fn, mode)
|
||||||
#define xassert(cond, msg) if ((cond) == 0) err_fatal_simple_core(__func__, msg)
|
#define xassert(cond, msg) if ((cond) == 0) err_fatal_simple_core(__func__, msg)
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
uint64_t x, y;
|
||||||
|
} pair64_t;
|
||||||
|
|
||||||
|
typedef struct { size_t n, m; uint64_t *a; } uint64_v;
|
||||||
|
typedef struct { size_t n, m; pair64_t *a; } pair64_v;
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -66,6 +72,9 @@ extern "C" {
|
||||||
double cputime();
|
double cputime();
|
||||||
double realtime();
|
double realtime();
|
||||||
|
|
||||||
|
void ks_introsort_64 (size_t n, uint64_t *a);
|
||||||
|
void ks_introsort_128(size_t n, pair64_t *a);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue