添加log,argparse等第三方库,修改代码
This commit is contained in:
parent
1c35dafbf8
commit
27e0af955e
|
|
@ -1,6 +1,9 @@
|
||||||
# ---> C++
|
# ---> C++
|
||||||
# Prerequisites
|
# Prerequisites
|
||||||
*.d
|
*.d
|
||||||
|
/.vscode
|
||||||
|
/build
|
||||||
|
build.sh
|
||||||
|
|
||||||
# Compiled Object files
|
# Compiled Object files
|
||||||
*.slo
|
*.slo
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,8 @@
|
||||||
|
CMAKE_MINIMUM_REQUIRED(VERSION 3.0)
|
||||||
|
project(FastDup)
|
||||||
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||||
|
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
||||||
|
# set(CMAKE_BUILD_TYPE Debug)
|
||||||
|
# set(CMAKE_BUILD_TYPE Release)
|
||||||
|
ADD_SUBDIRECTORY(src)
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,40 @@
|
||||||
|
# General
|
||||||
|
*.a
|
||||||
|
*.dSYM/
|
||||||
|
*.la
|
||||||
|
*.lo
|
||||||
|
*.o
|
||||||
|
*.opensdf
|
||||||
|
*.orig
|
||||||
|
*.sdf
|
||||||
|
*.suo
|
||||||
|
*.swp
|
||||||
|
*.tests
|
||||||
|
*.vcxproj.filters
|
||||||
|
*.vcxproj.user
|
||||||
|
*~
|
||||||
|
.git
|
||||||
|
TAGS
|
||||||
|
|
||||||
|
# Mac/Xcode-specfic
|
||||||
|
xcuserdata
|
||||||
|
contents.xcworkspacedata
|
||||||
|
.DS_Store
|
||||||
|
._*
|
||||||
|
|
||||||
|
# Test byproducts
|
||||||
|
test/kbtree_test
|
||||||
|
test/khash_keith
|
||||||
|
test/khash_keith2
|
||||||
|
test/khash_test
|
||||||
|
test/klist_test
|
||||||
|
test/kmin_test
|
||||||
|
test/kseq_bench
|
||||||
|
test/kseq_bench2
|
||||||
|
test/kseq_test
|
||||||
|
test/ksort_test
|
||||||
|
test/ksort_test-stl
|
||||||
|
test/kstring_bench
|
||||||
|
test/kstring_bench2
|
||||||
|
test/kstring_test
|
||||||
|
test/kvec_test
|
||||||
|
|
@ -0,0 +1,23 @@
|
||||||
|
The MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2008- Attractive Chaos <attractor@live.co.uk>
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of this software and associated documentation files (the
|
||||||
|
"Software"), to deal in the Software without restriction, including
|
||||||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be
|
||||||
|
included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||||
|
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||||
|
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
|
|
@ -0,0 +1,243 @@
|
||||||
|
# Klib: a Generic Library in C
|
||||||
|
|
||||||
|
## <a name="overview"></a>Overview
|
||||||
|
|
||||||
|
Klib is a standalone and lightweight C library distributed under [MIT/X11
|
||||||
|
license][1]. Most components are independent of external libraries, except the
|
||||||
|
standard C library, and independent of each other. To use a component of this
|
||||||
|
library, you only need to copy a couple of files to your source code tree
|
||||||
|
without worrying about library dependencies.
|
||||||
|
|
||||||
|
Klib strives for efficiency and a small memory footprint. Some components, such
|
||||||
|
as khash.h, kbtree.h, ksort.h and kvec.h, are among the most efficient
|
||||||
|
implementations of similar algorithms or data structures in all programming
|
||||||
|
languages, in terms of both speed and memory use.
|
||||||
|
|
||||||
|
A new documentation is available [here](http://attractivechaos.github.io/klib/)
|
||||||
|
which includes most information in this README file.
|
||||||
|
|
||||||
|
#### Common components
|
||||||
|
|
||||||
|
* [khash.h][khash]: generic [hash table][2] with open addressing.
|
||||||
|
* [kbtree.h][kbtree]: generic search tree based on [B-tree][3].
|
||||||
|
* [kavl.h][kavl]: generic intrusive [AVL tree][wiki-avl].
|
||||||
|
* [ksort.h][ksort]: generic sort, including [introsort][4], [merge sort][5], [heap sort][6], [comb sort][7], [Knuth shuffle][8] and the [k-small][9] algorithm.
|
||||||
|
* [kseq.h][kseq]: generic stream buffer and a [FASTA][10]/[FASTQ][11] format parser.
|
||||||
|
* kvec.h: generic dynamic array.
|
||||||
|
* klist.h: generic single-linked list and [memory pool][12].
|
||||||
|
* kstring.{h,c}: basic string library.
|
||||||
|
* kmath.{h,c}: numerical routines including [MT19937-64][13] [pseudorandom generator][14], basic [nonlinear programming][15] and a few special math functions.
|
||||||
|
* [ketopt.h][ketopt]: portable command-line argument parser with getopt\_long-like API.
|
||||||
|
|
||||||
|
#### Components for more specific use cases
|
||||||
|
|
||||||
|
* ksa.c: constructing [suffix arrays][16] for strings with multiple sentinels, based on a revised [SAIS algorithm][17].
|
||||||
|
* knetfile.{h,c}: random access to remote files on HTTP or FTP.
|
||||||
|
* kopen.c: smart stream opening.
|
||||||
|
* khmm.{h,c}: basic [HMM][18] library.
|
||||||
|
* ksw.(h,c}: Striped [Smith-Waterman algorithm][19].
|
||||||
|
* knhx.{h,c}: [Newick tree format][20] parser.
|
||||||
|
|
||||||
|
|
||||||
|
## <a name="methodology"></a>Methodology
|
||||||
|
|
||||||
|
For the implementation of generic [containers][21], klib extensively uses C
|
||||||
|
macros. To use these data structures, we usually need to instantiate methods by
|
||||||
|
expanding a long macro. This makes the source code look unusual or even ugly
|
||||||
|
and adds difficulty to debugging. Unfortunately, for efficient generic
|
||||||
|
programming in C that lacks [template][22], using macros is the only
|
||||||
|
solution. Only with macros, we can write a generic container which, once
|
||||||
|
instantiated, compete with a type-specific container in efficiency. Some
|
||||||
|
generic libraries in C, such as [Glib][23], use the `void*` type to implement
|
||||||
|
containers. These implementations are usually slower and use more memory than
|
||||||
|
klib (see [this benchmark][31]).
|
||||||
|
|
||||||
|
To effectively use klib, it is important to understand how it achieves generic
|
||||||
|
programming. We will use the hash table library as an example:
|
||||||
|
|
||||||
|
#include "khash.h"
|
||||||
|
KHASH_MAP_INIT_INT(m32, char) // instantiate structs and methods
|
||||||
|
int main() {
|
||||||
|
int ret, is_missing;
|
||||||
|
khint_t k;
|
||||||
|
khash_t(m32) *h = kh_init(m32); // allocate a hash table
|
||||||
|
k = kh_put(m32, h, 5, &ret); // insert a key to the hash table
|
||||||
|
if (!ret) kh_del(m32, h, k);
|
||||||
|
kh_value(h, k) = 10; // set the value
|
||||||
|
k = kh_get(m32, h, 10); // query the hash table
|
||||||
|
is_missing = (k == kh_end(h)); // test if the key is present
|
||||||
|
k = kh_get(m32, h, 5);
|
||||||
|
kh_del(m32, h, k); // remove a key-value pair
|
||||||
|
for (k = kh_begin(h); k != kh_end(h); ++k) // traverse
|
||||||
|
if (kh_exist(h, k)) // test if a bucket contains data
|
||||||
|
kh_value(h, k) = 1;
|
||||||
|
kh_destroy(m32, h); // deallocate the hash table
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
In this example, the second line instantiates a hash table with `unsigned` as
|
||||||
|
the key type and `char` as the value type. `m32` names such a type of hash table.
|
||||||
|
All types and functions associated with this name are macros, which will be
|
||||||
|
explained later. Macro `kh_init()` initiates a hash table and `kh_destroy()`
|
||||||
|
frees it. `kh_put()` inserts a key and returns the iterator (or the position)
|
||||||
|
in the hash table. `kh_get()` and `kh_del()` get a key and delete an element,
|
||||||
|
respectively. Macro `kh_exist()` tests if an iterator (or a position) is filled
|
||||||
|
with data.
|
||||||
|
|
||||||
|
An immediate question is this piece of code does not look like a valid C
|
||||||
|
program (e.g. lacking semicolon, assignment to an _apparent_ function call and
|
||||||
|
_apparent_ undefined `m32` 'variable'). To understand why the code is correct,
|
||||||
|
let's go a bit further into the source code of `khash.h`, whose skeleton looks
|
||||||
|
like:
|
||||||
|
|
||||||
|
#define KHASH_INIT(name, SCOPE, key_t, val_t, is_map, _hashf, _hasheq) \
|
||||||
|
typedef struct { \
|
||||||
|
int n_buckets, size, n_occupied, upper_bound; \
|
||||||
|
unsigned *flags; \
|
||||||
|
key_t *keys; \
|
||||||
|
val_t *vals; \
|
||||||
|
} kh_##name##_t; \
|
||||||
|
SCOPE inline kh_##name##_t *init_##name() { \
|
||||||
|
return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t)); \
|
||||||
|
} \
|
||||||
|
SCOPE inline int get_##name(kh_##name##_t *h, key_t k) \
|
||||||
|
... \
|
||||||
|
SCOPE inline void destroy_##name(kh_##name##_t *h) { \
|
||||||
|
if (h) { \
|
||||||
|
free(h->keys); free(h->flags); free(h->vals); free(h); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define _int_hf(key) (unsigned)(key)
|
||||||
|
#define _int_heq(a, b) (a == b)
|
||||||
|
#define khash_t(name) kh_##name##_t
|
||||||
|
#define kh_value(h, k) ((h)->vals[k])
|
||||||
|
#define kh_begin(h, k) 0
|
||||||
|
#define kh_end(h) ((h)->n_buckets)
|
||||||
|
#define kh_init(name) init_##name()
|
||||||
|
#define kh_get(name, h, k) get_##name(h, k)
|
||||||
|
#define kh_destroy(name, h) destroy_##name(h)
|
||||||
|
...
|
||||||
|
#define KHASH_MAP_INIT_INT(name, val_t) \
|
||||||
|
KHASH_INIT(name, static, unsigned, val_t, is_map, _int_hf, _int_heq)
|
||||||
|
|
||||||
|
`KHASH_INIT()` is a huge macro defining all the structs and methods. When this
|
||||||
|
macro is called, all the code inside it will be inserted by the [C
|
||||||
|
preprocess][37] to the place where it is called. If the macro is called
|
||||||
|
multiple times, multiple copies of the code will be inserted. To avoid naming
|
||||||
|
conflict of hash tables with different key-value types, the library uses [token
|
||||||
|
concatenation][36], which is a preprocessor feature whereby we can substitute
|
||||||
|
part of a symbol based on the parameter of the macro. In the end, the C
|
||||||
|
preprocessor will generate the following code and feed it to the compiler
|
||||||
|
(macro `kh_exist(h,k)` is a little complex and not expanded for simplicity):
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int n_buckets, size, n_occupied, upper_bound;
|
||||||
|
unsigned *flags;
|
||||||
|
unsigned *keys;
|
||||||
|
char *vals;
|
||||||
|
} kh_m32_t;
|
||||||
|
static inline kh_m32_t *init_m32() {
|
||||||
|
return (kh_m32_t*)calloc(1, sizeof(kh_m32_t));
|
||||||
|
}
|
||||||
|
static inline int get_m32(kh_m32_t *h, unsigned k)
|
||||||
|
...
|
||||||
|
static inline void destroy_m32(kh_m32_t *h) {
|
||||||
|
if (h) {
|
||||||
|
free(h->keys); free(h->flags); free(h->vals); free(h);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
int ret, is_missing;
|
||||||
|
khint_t k;
|
||||||
|
kh_m32_t *h = init_m32();
|
||||||
|
k = put_m32(h, 5, &ret);
|
||||||
|
if (!ret) del_m32(h, k);
|
||||||
|
h->vals[k] = 10;
|
||||||
|
k = get_m32(h, 10);
|
||||||
|
is_missing = (k == h->n_buckets);
|
||||||
|
k = get_m32(h, 5);
|
||||||
|
del_m32(h, k);
|
||||||
|
for (k = 0; k != h->n_buckets; ++k)
|
||||||
|
if (kh_exist(h, k)) h->vals[k] = 1;
|
||||||
|
destroy_m32(h);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
This is the C program we know.
|
||||||
|
|
||||||
|
From this example, we can see that macros and the C preprocessor plays a key
|
||||||
|
role in klib. Klib is fast partly because the compiler knows the key-value
|
||||||
|
type at the compile time and is able to optimize the code to the same level
|
||||||
|
as type-specific code. A generic library written with `void*` will not get such
|
||||||
|
performance boost.
|
||||||
|
|
||||||
|
Massively inserting code upon instantiation may remind us of C++'s slow
|
||||||
|
compiling speed and huge binary size when STL/boost is in use. Klib is much
|
||||||
|
better in this respect due to its small code size and component independency.
|
||||||
|
Inserting several hundreds lines of code won't make compiling obviously slower.
|
||||||
|
|
||||||
|
## <a name="resources"></a>Resources
|
||||||
|
|
||||||
|
* Library documentation, if present, is available in the header files. Examples
|
||||||
|
can be found in the [test/][24] directory.
|
||||||
|
* **Obsolete** documentation of the hash table library can be found at
|
||||||
|
[SourceForge][25]. This README is partly adapted from the old documentation.
|
||||||
|
* [Blog post][26] describing the hash table library.
|
||||||
|
* [Blog post][27] on why using `void*` for generic programming may be inefficient.
|
||||||
|
* [Blog post][28] on the generic stream buffer.
|
||||||
|
* [Blog post][29] evaluating the performance of `kvec.h`.
|
||||||
|
* [Blog post][30] arguing B-tree may be a better data structure than a binary search tree.
|
||||||
|
* [Blog post][31] evaluating the performance of `khash.h` and `kbtree.h` among many other implementations.
|
||||||
|
[An older version][33] of the benchmark is also available.
|
||||||
|
* [Blog post][34] benchmarking internal sorting algorithms and implementations.
|
||||||
|
* [Blog post][32] on the k-small algorithm.
|
||||||
|
* [Blog post][35] on the Hooke-Jeeve's algorithm for nonlinear programming.
|
||||||
|
|
||||||
|
[1]: http://en.wikipedia.org/wiki/MIT_License
|
||||||
|
[2]: https://en.wikipedia.org/wiki/Hash_table
|
||||||
|
[3]: http://en.wikipedia.org/wiki/B-tree
|
||||||
|
[4]: http://en.wikipedia.org/wiki/Introsort
|
||||||
|
[5]: http://en.wikipedia.org/wiki/Merge_sort
|
||||||
|
[6]: http://en.wikipedia.org/wiki/Heapsort
|
||||||
|
[7]: http://en.wikipedia.org/wiki/Comb_sort
|
||||||
|
[8]: http://en.wikipedia.org/wiki/Fisher-Yates_shuffle
|
||||||
|
[9]: http://en.wikipedia.org/wiki/Selection_algorithm
|
||||||
|
[10]: http://en.wikipedia.org/wiki/FASTA_format
|
||||||
|
[11]: http://en.wikipedia.org/wiki/FASTQ_format
|
||||||
|
[12]: http://en.wikipedia.org/wiki/Memory_pool
|
||||||
|
[13]: http://en.wikipedia.org/wiki/Mersenne_twister
|
||||||
|
[14]: http://en.wikipedia.org/wiki/Pseudorandom_generator
|
||||||
|
[15]: http://en.wikipedia.org/wiki/Nonlinear_programming
|
||||||
|
[16]: http://en.wikipedia.org/wiki/Suffix_array
|
||||||
|
[17]: https://sites.google.com/site/yuta256/sais
|
||||||
|
[18]: http://en.wikipedia.org/wiki/Hidden_Markov_model
|
||||||
|
[19]: http://en.wikipedia.org/wiki/Smith-Waterman_algorithm
|
||||||
|
[20]: http://en.wikipedia.org/wiki/Newick_format
|
||||||
|
[21]: http://en.wikipedia.org/wiki/Container_(abstract_data_type)
|
||||||
|
[22]: http://en.wikipedia.org/wiki/Template_(C%2B%2B)
|
||||||
|
[23]: http://en.wikipedia.org/wiki/GLib
|
||||||
|
[24]: https://github.com/attractivechaos/klib/tree/master/test
|
||||||
|
[25]: http://klib.sourceforge.net/
|
||||||
|
[26]: http://attractivechaos.wordpress.com/2008/09/02/implementing-generic-hash-library-in-c/
|
||||||
|
[27]: http://attractivechaos.wordpress.com/2008/10/02/using-void-in-generic-c-programming-may-be-inefficient/
|
||||||
|
[28]: http://attractivechaos.wordpress.com/2008/10/11/a-generic-buffered-stream-wrapper/
|
||||||
|
[29]: http://attractivechaos.wordpress.com/2008/09/19/c-array-vs-c-vector/
|
||||||
|
[30]: http://attractivechaos.wordpress.com/2008/09/24/b-tree-vs-binary-search-tree/
|
||||||
|
[31]: http://attractivechaos.wordpress.com/2008/10/07/another-look-at-my-old-benchmark/
|
||||||
|
[32]: http://attractivechaos.wordpress.com/2008/09/13/calculating-median/
|
||||||
|
[33]: http://attractivechaos.wordpress.com/2008/08/28/comparison-of-hash-table-libraries/
|
||||||
|
[34]: http://attractivechaos.wordpress.com/2008/08/28/comparison-of-internal-sorting-algorithms/
|
||||||
|
[35]: http://attractivechaos.wordpress.com/2008/08/24/derivative-free-optimization-dfo/
|
||||||
|
[36]: http://en.wikipedia.org/wiki/C_preprocessor#Token_concatenation
|
||||||
|
[37]: http://en.wikipedia.org/wiki/C_preprocessor
|
||||||
|
|
||||||
|
[wiki-avl]: https://en.wikipedia.org/wiki/AVL_tree
|
||||||
|
|
||||||
|
[kbtree]: http://attractivechaos.github.io/klib/#KBtree%3A%20generic%20ordered%20map:%5B%5BKBtree%3A%20generic%20ordered%20map%5D%5D
|
||||||
|
[khash]: http://attractivechaos.github.io/klib/#Khash%3A%20generic%20hash%20table:%5B%5BKhash%3A%20generic%20hash%20table%5D%5D
|
||||||
|
[kseq]: http://attractivechaos.github.io/klib/#Kseq%3A%20stream%20buffer%20and%20FASTA%2FQ%20parser:%5B%5BKseq%3A%20stream%20buffer%20and%20FASTA%2FQ%20parser%5D%5D
|
||||||
|
[ksort]: http://attractivechaos.github.io/klib/#Ksort%3A%20sorting%2C%20shuffling%2C%20heap%20and%20k-small:%5B%5BKsort%3A%20sorting%2C%20shuffling%2C%20heap%20and%20k-small%5D%5D
|
||||||
|
[kavl]: http://attractivechaos.github.io/klib/#KAVL%3A%20generic%20intrusive%20AVL%20tree
|
||||||
|
[ketopt]: http://attractivechaos.github.io/klib/#Ketopt%3A%20parsing%20command-line%20arguments
|
||||||
|
|
@ -0,0 +1,555 @@
|
||||||
|
/* The MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
|
||||||
|
2011 Attractive Chaos <attractor@live.co.uk>
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include "bgzf.h"
|
||||||
|
|
||||||
|
#ifdef _USE_KNETFILE
|
||||||
|
#include "knetfile.h"
|
||||||
|
typedef knetFile *_bgzf_file_t;
|
||||||
|
#define _bgzf_open(fn, mode) knet_open(fn, mode)
|
||||||
|
#define _bgzf_dopen(fp, mode) knet_dopen(fp, mode)
|
||||||
|
#define _bgzf_close(fp) knet_close(fp)
|
||||||
|
#define _bgzf_fileno(fp) ((fp)->fd)
|
||||||
|
#define _bgzf_tell(fp) knet_tell(fp)
|
||||||
|
#define _bgzf_seek(fp, offset, whence) knet_seek(fp, offset, whence)
|
||||||
|
#define _bgzf_read(fp, buf, len) knet_read(fp, buf, len)
|
||||||
|
#define _bgzf_write(fp, buf, len) knet_write(fp, buf, len)
|
||||||
|
#else // ~defined(_USE_KNETFILE)
|
||||||
|
#if defined(_WIN32) || defined(_MSC_VER)
|
||||||
|
#define ftello(fp) ftell(fp)
|
||||||
|
#define fseeko(fp, offset, whence) fseek(fp, offset, whence)
|
||||||
|
#else // ~defined(_WIN32)
|
||||||
|
extern off_t ftello(FILE *stream);
|
||||||
|
extern int fseeko(FILE *stream, off_t offset, int whence);
|
||||||
|
#endif // ~defined(_WIN32)
|
||||||
|
typedef FILE *_bgzf_file_t;
|
||||||
|
#define _bgzf_open(fn, mode) fopen(fn, mode)
|
||||||
|
#define _bgzf_dopen(fp, mode) fdopen(fp, mode)
|
||||||
|
#define _bgzf_close(fp) fclose(fp)
|
||||||
|
#define _bgzf_fileno(fp) fileno(fp)
|
||||||
|
#define _bgzf_tell(fp) ftello(fp)
|
||||||
|
#define _bgzf_seek(fp, offset, whence) fseeko(fp, offset, whence)
|
||||||
|
#define _bgzf_read(fp, buf, len) fread(buf, 1, len, fp)
|
||||||
|
#define _bgzf_write(fp, buf, len) fwrite(buf, 1, len, fp)
|
||||||
|
#endif // ~define(_USE_KNETFILE)
|
||||||
|
|
||||||
|
#define BLOCK_HEADER_LENGTH 18
|
||||||
|
#define BLOCK_FOOTER_LENGTH 8
|
||||||
|
|
||||||
|
/* BGZF/GZIP header (speciallized from RFC 1952; little endian):
|
||||||
|
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
|
||||||
|
| 31|139| 8| 4| 0| 0|255| 6| 66| 67| 2|BLK_LEN|
|
||||||
|
+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
|
||||||
|
*/
|
||||||
|
static const uint8_t g_magic[19] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\0\0";
|
||||||
|
|
||||||
|
#ifdef BGZF_CACHE
|
||||||
|
typedef struct {
|
||||||
|
int size;
|
||||||
|
uint8_t *block;
|
||||||
|
int64_t end_offset;
|
||||||
|
} cache_t;
|
||||||
|
#include "khash.h"
|
||||||
|
KHASH_MAP_INIT_INT64(cache, cache_t)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static inline void packInt16(uint8_t *buffer, uint16_t value)
|
||||||
|
{
|
||||||
|
buffer[0] = value;
|
||||||
|
buffer[1] = value >> 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int unpackInt16(const uint8_t *buffer)
|
||||||
|
{
|
||||||
|
return buffer[0] | buffer[1] << 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void packInt32(uint8_t *buffer, uint32_t value)
|
||||||
|
{
|
||||||
|
buffer[0] = value;
|
||||||
|
buffer[1] = value >> 8;
|
||||||
|
buffer[2] = value >> 16;
|
||||||
|
buffer[3] = value >> 24;
|
||||||
|
}
|
||||||
|
|
||||||
|
static BGZF *bgzf_read_init()
|
||||||
|
{
|
||||||
|
BGZF *fp;
|
||||||
|
fp = calloc(1, sizeof(BGZF));
|
||||||
|
fp->open_mode = 'r';
|
||||||
|
fp->uncompressed_block = malloc(BGZF_MAX_BLOCK_SIZE);
|
||||||
|
fp->compressed_block = malloc(BGZF_MAX_BLOCK_SIZE);
|
||||||
|
#ifdef BGZF_CACHE
|
||||||
|
fp->cache = kh_init(cache);
|
||||||
|
#endif
|
||||||
|
return fp;
|
||||||
|
}
|
||||||
|
|
||||||
|
static BGZF *bgzf_write_init(int compress_level) // compress_level==-1 for the default level
|
||||||
|
{
|
||||||
|
BGZF *fp;
|
||||||
|
fp = calloc(1, sizeof(BGZF));
|
||||||
|
fp->open_mode = 'w';
|
||||||
|
fp->uncompressed_block = malloc(BGZF_MAX_BLOCK_SIZE);
|
||||||
|
fp->compressed_block = malloc(BGZF_MAX_BLOCK_SIZE);
|
||||||
|
fp->compress_level = compress_level < 0? Z_DEFAULT_COMPRESSION : compress_level; // Z_DEFAULT_COMPRESSION==-1
|
||||||
|
if (fp->compress_level > 9) fp->compress_level = Z_DEFAULT_COMPRESSION;
|
||||||
|
return fp;
|
||||||
|
}
|
||||||
|
// get the compress level from the mode string
|
||||||
|
static int mode2level(const char *__restrict mode)
|
||||||
|
{
|
||||||
|
int i, compress_level = -1;
|
||||||
|
for (i = 0; mode[i]; ++i)
|
||||||
|
if (mode[i] >= '0' && mode[i] <= '9') break;
|
||||||
|
if (mode[i]) compress_level = (int)mode[i] - '0';
|
||||||
|
if (strchr(mode, 'u')) compress_level = 0;
|
||||||
|
return compress_level;
|
||||||
|
}
|
||||||
|
|
||||||
|
BGZF *bgzf_open(const char *path, const char *mode)
|
||||||
|
{
|
||||||
|
BGZF *fp = 0;
|
||||||
|
if (strchr(mode, 'r') || strchr(mode, 'R')) {
|
||||||
|
_bgzf_file_t fpr;
|
||||||
|
if ((fpr = _bgzf_open(path, "r")) == 0) return 0;
|
||||||
|
fp = bgzf_read_init();
|
||||||
|
fp->fp = fpr;
|
||||||
|
} else if (strchr(mode, 'w') || strchr(mode, 'W')) {
|
||||||
|
FILE *fpw;
|
||||||
|
if ((fpw = fopen(path, "w")) == 0) return 0;
|
||||||
|
fp = bgzf_write_init(mode2level(mode));
|
||||||
|
fp->fp = fpw;
|
||||||
|
}
|
||||||
|
return fp;
|
||||||
|
}
|
||||||
|
|
||||||
|
BGZF *bgzf_dopen(int fd, const char *mode)
|
||||||
|
{
|
||||||
|
BGZF *fp = 0;
|
||||||
|
if (strchr(mode, 'r') || strchr(mode, 'R')) {
|
||||||
|
_bgzf_file_t fpr;
|
||||||
|
if ((fpr = _bgzf_dopen(fd, "r")) == 0) return 0;
|
||||||
|
fp = bgzf_read_init();
|
||||||
|
fp->fp = fpr;
|
||||||
|
} else if (strchr(mode, 'w') || strchr(mode, 'W')) {
|
||||||
|
FILE *fpw;
|
||||||
|
if ((fpw = fdopen(fd, "w")) == 0) return 0;
|
||||||
|
fp = bgzf_write_init(mode2level(mode));
|
||||||
|
fp->fp = fpw;
|
||||||
|
}
|
||||||
|
return fp;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Deflate the block in fp->uncompressed_block into fp->compressed_block. Also adds an extra field that stores the compressed block length.
|
||||||
|
static int deflate_block(BGZF *fp, int block_length)
|
||||||
|
{
|
||||||
|
uint8_t *buffer = fp->compressed_block;
|
||||||
|
int buffer_size = BGZF_BLOCK_SIZE;
|
||||||
|
int input_length = block_length;
|
||||||
|
int compressed_length = 0;
|
||||||
|
int remaining;
|
||||||
|
uint32_t crc;
|
||||||
|
|
||||||
|
assert(block_length <= BGZF_BLOCK_SIZE); // guaranteed by the caller
|
||||||
|
memcpy(buffer, g_magic, BLOCK_HEADER_LENGTH); // the last two bytes are a place holder for the length of the block
|
||||||
|
while (1) { // loop to retry for blocks that do not compress enough
|
||||||
|
int status;
|
||||||
|
z_stream zs;
|
||||||
|
zs.zalloc = NULL;
|
||||||
|
zs.zfree = NULL;
|
||||||
|
zs.next_in = fp->uncompressed_block;
|
||||||
|
zs.avail_in = input_length;
|
||||||
|
zs.next_out = (void*)&buffer[BLOCK_HEADER_LENGTH];
|
||||||
|
zs.avail_out = buffer_size - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
|
||||||
|
status = deflateInit2(&zs, fp->compress_level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY); // -15 to disable zlib header/footer
|
||||||
|
if (status != Z_OK) {
|
||||||
|
fp->errcode |= BGZF_ERR_ZLIB;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
status = deflate(&zs, Z_FINISH);
|
||||||
|
if (status != Z_STREAM_END) { // not compressed enough
|
||||||
|
deflateEnd(&zs); // reset the stream
|
||||||
|
if (status == Z_OK) { // reduce the size and recompress
|
||||||
|
input_length -= 1024;
|
||||||
|
assert(input_length > 0); // logically, this should not happen
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
fp->errcode |= BGZF_ERR_ZLIB;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (deflateEnd(&zs) != Z_OK) {
|
||||||
|
fp->errcode |= BGZF_ERR_ZLIB;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
compressed_length = zs.total_out;
|
||||||
|
compressed_length += BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;
|
||||||
|
assert(compressed_length <= BGZF_BLOCK_SIZE);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(compressed_length > 0);
|
||||||
|
packInt16((uint8_t*)&buffer[16], compressed_length - 1); // write the compressed_length; -1 to fit 2 bytes
|
||||||
|
crc = crc32(0L, NULL, 0L);
|
||||||
|
crc = crc32(crc, fp->uncompressed_block, input_length);
|
||||||
|
packInt32((uint8_t*)&buffer[compressed_length-8], crc);
|
||||||
|
packInt32((uint8_t*)&buffer[compressed_length-4], input_length);
|
||||||
|
|
||||||
|
remaining = block_length - input_length;
|
||||||
|
if (remaining > 0) {
|
||||||
|
assert(remaining <= input_length);
|
||||||
|
memcpy(fp->uncompressed_block, fp->uncompressed_block + input_length, remaining);
|
||||||
|
}
|
||||||
|
fp->block_offset = remaining;
|
||||||
|
return compressed_length;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Inflate the block in fp->compressed_block into fp->uncompressed_block
|
||||||
|
static int inflate_block(BGZF* fp, int block_length)
|
||||||
|
{
|
||||||
|
z_stream zs;
|
||||||
|
zs.zalloc = NULL;
|
||||||
|
zs.zfree = NULL;
|
||||||
|
zs.next_in = fp->compressed_block + 18;
|
||||||
|
zs.avail_in = block_length - 16;
|
||||||
|
zs.next_out = fp->uncompressed_block;
|
||||||
|
zs.avail_out = BGZF_BLOCK_SIZE;
|
||||||
|
|
||||||
|
if (inflateInit2(&zs, -15) != Z_OK) {
|
||||||
|
fp->errcode |= BGZF_ERR_ZLIB;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (inflate(&zs, Z_FINISH) != Z_STREAM_END) {
|
||||||
|
inflateEnd(&zs);
|
||||||
|
fp->errcode |= BGZF_ERR_ZLIB;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (inflateEnd(&zs) != Z_OK) {
|
||||||
|
fp->errcode |= BGZF_ERR_ZLIB;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return zs.total_out;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int check_header(const uint8_t *header)
|
||||||
|
{
|
||||||
|
return (header[0] == 31 && header[1] == 139 && header[2] == 8 && (header[3] & 4) != 0
|
||||||
|
&& unpackInt16((uint8_t*)&header[10]) == 6
|
||||||
|
&& header[12] == 'B' && header[13] == 'C'
|
||||||
|
&& unpackInt16((uint8_t*)&header[14]) == 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef BGZF_CACHE
|
||||||
|
static void free_cache(BGZF *fp)
|
||||||
|
{
|
||||||
|
khint_t k;
|
||||||
|
khash_t(cache) *h = (khash_t(cache)*)fp->cache;
|
||||||
|
if (fp->open_mode != 'r') return;
|
||||||
|
for (k = kh_begin(h); k < kh_end(h); ++k)
|
||||||
|
if (kh_exist(h, k)) free(kh_val(h, k).block);
|
||||||
|
kh_destroy(cache, h);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int load_block_from_cache(BGZF *fp, int64_t block_address)
|
||||||
|
{
|
||||||
|
khint_t k;
|
||||||
|
cache_t *p;
|
||||||
|
khash_t(cache) *h = (khash_t(cache)*)fp->cache;
|
||||||
|
k = kh_get(cache, h, block_address);
|
||||||
|
if (k == kh_end(h)) return 0;
|
||||||
|
p = &kh_val(h, k);
|
||||||
|
if (fp->block_length != 0) fp->block_offset = 0;
|
||||||
|
fp->block_address = block_address;
|
||||||
|
fp->block_length = p->size;
|
||||||
|
memcpy(fp->uncompressed_block, p->block, BGZF_BLOCK_SIZE);
|
||||||
|
_bgzf_seek((_bgzf_file_t)fp->fp, p->end_offset, SEEK_SET);
|
||||||
|
return p->size;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void cache_block(BGZF *fp, int size)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
khint_t k;
|
||||||
|
cache_t *p;
|
||||||
|
khash_t(cache) *h = (khash_t(cache)*)fp->cache;
|
||||||
|
if (BGZF_BLOCK_SIZE >= fp->cache_size) return;
|
||||||
|
if ((kh_size(h) + 1) * BGZF_BLOCK_SIZE > fp->cache_size) {
|
||||||
|
/* A better way would be to remove the oldest block in the
|
||||||
|
* cache, but here we remove a random one for simplicity. This
|
||||||
|
* should not have a big impact on performance. */
|
||||||
|
for (k = kh_begin(h); k < kh_end(h); ++k)
|
||||||
|
if (kh_exist(h, k)) break;
|
||||||
|
if (k < kh_end(h)) {
|
||||||
|
free(kh_val(h, k).block);
|
||||||
|
kh_del(cache, h, k);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
k = kh_put(cache, h, fp->block_address, &ret);
|
||||||
|
if (ret == 0) return; // if this happens, a bug!
|
||||||
|
p = &kh_val(h, k);
|
||||||
|
p->size = fp->block_length;
|
||||||
|
p->end_offset = fp->block_address + size;
|
||||||
|
p->block = malloc(BGZF_BLOCK_SIZE);
|
||||||
|
memcpy(kh_val(h, k).block, fp->uncompressed_block, BGZF_BLOCK_SIZE);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static void free_cache(BGZF *fp) {}
|
||||||
|
static int load_block_from_cache(BGZF *fp, int64_t block_address) {return 0;}
|
||||||
|
static void cache_block(BGZF *fp, int size) {}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int bgzf_read_block(BGZF *fp)
|
||||||
|
{
|
||||||
|
uint8_t header[BLOCK_HEADER_LENGTH], *compressed_block;
|
||||||
|
int count, size = 0, block_length, remaining;
|
||||||
|
int64_t block_address;
|
||||||
|
block_address = _bgzf_tell((_bgzf_file_t)fp->fp);
|
||||||
|
if (load_block_from_cache(fp, block_address)) return 0;
|
||||||
|
count = _bgzf_read(fp->fp, header, sizeof(header));
|
||||||
|
if (count == 0) { // no data read
|
||||||
|
fp->block_length = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (count != sizeof(header) || !check_header(header)) {
|
||||||
|
fp->errcode |= BGZF_ERR_HEADER;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
size = count;
|
||||||
|
block_length = unpackInt16((uint8_t*)&header[16]) + 1; // +1 because when writing this number, we used "-1"
|
||||||
|
compressed_block = (uint8_t*)fp->compressed_block;
|
||||||
|
memcpy(compressed_block, header, BLOCK_HEADER_LENGTH);
|
||||||
|
remaining = block_length - BLOCK_HEADER_LENGTH;
|
||||||
|
count = _bgzf_read(fp->fp, &compressed_block[BLOCK_HEADER_LENGTH], remaining);
|
||||||
|
if (count != remaining) {
|
||||||
|
fp->errcode |= BGZF_ERR_IO;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
size += count;
|
||||||
|
if ((count = inflate_block(fp, block_length)) < 0) return -1;
|
||||||
|
if (fp->block_length != 0) fp->block_offset = 0; // Do not reset offset if this read follows a seek.
|
||||||
|
fp->block_address = block_address;
|
||||||
|
fp->block_length = count;
|
||||||
|
cache_block(fp, size);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ssize_t bgzf_read(BGZF *fp, void *data, ssize_t length)
|
||||||
|
{
|
||||||
|
ssize_t bytes_read = 0;
|
||||||
|
uint8_t *output = data;
|
||||||
|
if (length <= 0) return 0;
|
||||||
|
assert(fp->open_mode == 'r');
|
||||||
|
while (bytes_read < length) {
|
||||||
|
int copy_length, available = fp->block_length - fp->block_offset;
|
||||||
|
uint8_t *buffer;
|
||||||
|
if (available <= 0) {
|
||||||
|
if (bgzf_read_block(fp) != 0) return -1;
|
||||||
|
available = fp->block_length - fp->block_offset;
|
||||||
|
if (available <= 0) break;
|
||||||
|
}
|
||||||
|
copy_length = length - bytes_read < available? length - bytes_read : available;
|
||||||
|
buffer = fp->uncompressed_block;
|
||||||
|
memcpy(output, buffer + fp->block_offset, copy_length);
|
||||||
|
fp->block_offset += copy_length;
|
||||||
|
output += copy_length;
|
||||||
|
bytes_read += copy_length;
|
||||||
|
}
|
||||||
|
if (fp->block_offset == fp->block_length) {
|
||||||
|
fp->block_address = _bgzf_tell((_bgzf_file_t)fp->fp);
|
||||||
|
fp->block_offset = fp->block_length = 0;
|
||||||
|
}
|
||||||
|
return bytes_read;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bgzf_flush(BGZF *fp)
|
||||||
|
{
|
||||||
|
assert(fp->open_mode == 'w');
|
||||||
|
while (fp->block_offset > 0) {
|
||||||
|
int block_length;
|
||||||
|
block_length = deflate_block(fp, fp->block_offset);
|
||||||
|
if (block_length < 0) return -1;
|
||||||
|
if (fwrite(fp->compressed_block, 1, block_length, fp->fp) != block_length) {
|
||||||
|
fp->errcode |= BGZF_ERR_IO; // possibly truncated file
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
fp->block_address += block_length;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bgzf_flush_try(BGZF *fp, ssize_t size)
|
||||||
|
{
|
||||||
|
if (fp->block_offset + size > BGZF_BLOCK_SIZE)
|
||||||
|
return bgzf_flush(fp);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
ssize_t bgzf_write(BGZF *fp, const void *data, ssize_t length)
|
||||||
|
{
|
||||||
|
const uint8_t *input = data;
|
||||||
|
int block_length = BGZF_BLOCK_SIZE, bytes_written;
|
||||||
|
assert(fp->open_mode == 'w');
|
||||||
|
input = data;
|
||||||
|
bytes_written = 0;
|
||||||
|
while (bytes_written < length) {
|
||||||
|
uint8_t* buffer = fp->uncompressed_block;
|
||||||
|
int copy_length = block_length - fp->block_offset < length - bytes_written? block_length - fp->block_offset : length - bytes_written;
|
||||||
|
memcpy(buffer + fp->block_offset, input, copy_length);
|
||||||
|
fp->block_offset += copy_length;
|
||||||
|
input += copy_length;
|
||||||
|
bytes_written += copy_length;
|
||||||
|
if (fp->block_offset == block_length && bgzf_flush(fp)) break;
|
||||||
|
}
|
||||||
|
return bytes_written;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bgzf_close(BGZF* fp)
|
||||||
|
{
|
||||||
|
int ret, count, block_length;
|
||||||
|
if (fp == 0) return -1;
|
||||||
|
if (fp->open_mode == 'w') {
|
||||||
|
if (bgzf_flush(fp) != 0) return -1;
|
||||||
|
block_length = deflate_block(fp, 0); // write an empty block
|
||||||
|
count = fwrite(fp->compressed_block, 1, block_length, fp->fp);
|
||||||
|
if (fflush(fp->fp) != 0) {
|
||||||
|
fp->errcode |= BGZF_ERR_IO;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ret = fp->open_mode == 'w'? fclose(fp->fp) : _bgzf_close(fp->fp);
|
||||||
|
if (ret != 0) return -1;
|
||||||
|
free(fp->uncompressed_block);
|
||||||
|
free(fp->compressed_block);
|
||||||
|
free_cache(fp);
|
||||||
|
free(fp);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bgzf_set_cache_size(BGZF *fp, int cache_size)
|
||||||
|
{
|
||||||
|
if (fp) fp->cache_size = cache_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bgzf_check_EOF(BGZF *fp)
|
||||||
|
{
|
||||||
|
static uint8_t magic[28] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0";
|
||||||
|
uint8_t buf[28];
|
||||||
|
off_t offset;
|
||||||
|
offset = _bgzf_tell((_bgzf_file_t)fp->fp);
|
||||||
|
if (_bgzf_seek(fp->fp, -28, SEEK_END) < 0) return 0;
|
||||||
|
_bgzf_read(fp->fp, buf, 28);
|
||||||
|
_bgzf_seek(fp->fp, offset, SEEK_SET);
|
||||||
|
return (memcmp(magic, buf, 28) == 0)? 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t bgzf_seek(BGZF* fp, int64_t pos, int where)
|
||||||
|
{
|
||||||
|
int block_offset;
|
||||||
|
int64_t block_address;
|
||||||
|
|
||||||
|
if (fp->open_mode != 'r' || where != SEEK_SET) {
|
||||||
|
fp->errcode |= BGZF_ERR_MISUSE;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
block_offset = pos & 0xFFFF;
|
||||||
|
block_address = pos >> 16;
|
||||||
|
if (_bgzf_seek(fp->fp, block_address, SEEK_SET) < 0) {
|
||||||
|
fp->errcode |= BGZF_ERR_IO;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
fp->block_length = 0; // indicates current block has not been loaded
|
||||||
|
fp->block_address = block_address;
|
||||||
|
fp->block_offset = block_offset;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bgzf_is_bgzf(const char *fn)
|
||||||
|
{
|
||||||
|
uint8_t buf[16];
|
||||||
|
int n;
|
||||||
|
_bgzf_file_t fp;
|
||||||
|
if ((fp = _bgzf_open(fn, "r")) == 0) return 0;
|
||||||
|
n = _bgzf_read(fp, buf, 16);
|
||||||
|
_bgzf_close(fp);
|
||||||
|
if (n != 16) return 0;
|
||||||
|
return memcmp(g_magic, buf, 16) == 0? 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bgzf_getc(BGZF *fp)
|
||||||
|
{
|
||||||
|
int c;
|
||||||
|
if (fp->block_offset >= fp->block_length) {
|
||||||
|
if (bgzf_read_block(fp) != 0) return -2; /* error */
|
||||||
|
if (fp->block_length == 0) return -1; /* end-of-file */
|
||||||
|
}
|
||||||
|
c = ((unsigned char*)fp->uncompressed_block)[fp->block_offset++];
|
||||||
|
if (fp->block_offset == fp->block_length) {
|
||||||
|
fp->block_address = _bgzf_tell((_bgzf_file_t)fp->fp);
|
||||||
|
fp->block_offset = 0;
|
||||||
|
fp->block_length = 0;
|
||||||
|
}
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef kroundup32
|
||||||
|
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int bgzf_getline(BGZF *fp, int delim, kstring_t *str)
|
||||||
|
{
|
||||||
|
int l, state = 0;
|
||||||
|
unsigned char *buf = (unsigned char*)fp->uncompressed_block;
|
||||||
|
str->l = 0;
|
||||||
|
do {
|
||||||
|
if (fp->block_offset >= fp->block_length) {
|
||||||
|
if (bgzf_read_block(fp) != 0) { state = -2; break; }
|
||||||
|
if (fp->block_length == 0) { state = -1; break; }
|
||||||
|
}
|
||||||
|
for (l = fp->block_offset; l < fp->block_length && buf[l] != delim; ++l);
|
||||||
|
if (l < fp->block_length) state = 1;
|
||||||
|
l -= fp->block_offset;
|
||||||
|
if (str->l + l + 1 >= str->m) {
|
||||||
|
str->m = str->l + l + 2;
|
||||||
|
kroundup32(str->m);
|
||||||
|
str->s = (char*)realloc(str->s, str->m);
|
||||||
|
}
|
||||||
|
memcpy(str->s + str->l, buf + fp->block_offset, l);
|
||||||
|
str->l += l;
|
||||||
|
fp->block_offset += l + 1;
|
||||||
|
if (fp->block_offset >= fp->block_length) {
|
||||||
|
fp->block_address = _bgzf_tell((_bgzf_file_t)fp->fp);
|
||||||
|
fp->block_offset = 0;
|
||||||
|
fp->block_length = 0;
|
||||||
|
}
|
||||||
|
} while (state == 0);
|
||||||
|
if (str->l == 0 && state < 0) return state;
|
||||||
|
str->s[str->l] = 0;
|
||||||
|
return str->l;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,196 @@
|
||||||
|
/* The MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
|
||||||
|
2011 Attractive Chaos <attractor@live.co.uk>
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* The BGZF library was originally written by Bob Handsaker from the Broad
|
||||||
|
* Institute. It was later improved by the SAMtools developers. */
|
||||||
|
|
||||||
|
#ifndef __BGZF_H
|
||||||
|
#define __BGZF_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <zlib.h>
|
||||||
|
|
||||||
|
#define BGZF_BLOCK_SIZE 0x10000
|
||||||
|
#define BGZF_MAX_BLOCK_SIZE 0x10000
|
||||||
|
|
||||||
|
#define BGZF_ERR_ZLIB 1
|
||||||
|
#define BGZF_ERR_HEADER 2
|
||||||
|
#define BGZF_ERR_IO 4
|
||||||
|
#define BGZF_ERR_MISUSE 8
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int open_mode:8, compress_level:8, errcode:16;
|
||||||
|
int cache_size;
|
||||||
|
int block_length, block_offset;
|
||||||
|
int64_t block_address;
|
||||||
|
void *uncompressed_block, *compressed_block;
|
||||||
|
void *cache; // a pointer to a hash table
|
||||||
|
void *fp; // actual file handler; FILE* on writing; FILE* or knetFile* on reading
|
||||||
|
} BGZF;
|
||||||
|
|
||||||
|
#ifndef KSTRING_T
|
||||||
|
#define KSTRING_T kstring_t
|
||||||
|
typedef struct __kstring_t {
|
||||||
|
size_t l, m;
|
||||||
|
char *s;
|
||||||
|
} kstring_t;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/******************
|
||||||
|
* Basic routines *
|
||||||
|
******************/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Open an existing file descriptor for reading or writing.
|
||||||
|
*
|
||||||
|
* @param fd file descriptor
|
||||||
|
* @param mode mode matching /[rwu0-9]+/: 'r' for reading, 'w' for writing and a digit specifies
|
||||||
|
* the zlib compression level; if both 'r' and 'w' are present, 'w' is ignored.
|
||||||
|
* @return BGZF file handler; 0 on error
|
||||||
|
*/
|
||||||
|
BGZF* bgzf_dopen(int fd, const char *mode);
|
||||||
|
|
||||||
|
#define bgzf_fdopen(fd, mode) bgzf_dopen((fd), (mode)) // for backward compatibility
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Open the specified file for reading or writing.
|
||||||
|
*/
|
||||||
|
BGZF* bgzf_open(const char* path, const char *mode);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close the BGZF and free all associated resources.
|
||||||
|
*
|
||||||
|
* @param fp BGZF file handler
|
||||||
|
* @return 0 on success and -1 on error
|
||||||
|
*/
|
||||||
|
int bgzf_close(BGZF *fp);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read up to _length_ bytes from the file storing into _data_.
|
||||||
|
*
|
||||||
|
* @param fp BGZF file handler
|
||||||
|
* @param data data array to read into
|
||||||
|
* @param length size of data to read
|
||||||
|
* @return number of bytes actually read; 0 on end-of-file and -1 on error
|
||||||
|
*/
|
||||||
|
ssize_t bgzf_read(BGZF *fp, void *data, ssize_t length);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write _length_ bytes from _data_ to the file.
|
||||||
|
*
|
||||||
|
* @param fp BGZF file handler
|
||||||
|
* @param data data array to write
|
||||||
|
* @param length size of data to write
|
||||||
|
* @return number of bytes actually written; -1 on error
|
||||||
|
*/
|
||||||
|
ssize_t bgzf_write(BGZF *fp, const void *data, ssize_t length);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write the data in the buffer to the file.
|
||||||
|
*/
|
||||||
|
int bgzf_flush(BGZF *fp);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return a virtual file pointer to the current location in the file.
|
||||||
|
* No interpetation of the value should be made, other than a subsequent
|
||||||
|
* call to bgzf_seek can be used to position the file at the same point.
|
||||||
|
* Return value is non-negative on success.
|
||||||
|
*/
|
||||||
|
#define bgzf_tell(fp) ((fp->block_address << 16) | (fp->block_offset & 0xFFFF))
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the file to read from the location specified by _pos_.
|
||||||
|
*
|
||||||
|
* @param fp BGZF file handler
|
||||||
|
* @param pos virtual file offset returned by bgzf_tell()
|
||||||
|
* @param whence must be SEEK_SET
|
||||||
|
* @return 0 on success and -1 on error
|
||||||
|
*/
|
||||||
|
int64_t bgzf_seek(BGZF *fp, int64_t pos, int whence);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the BGZF end-of-file (EOF) marker is present
|
||||||
|
*
|
||||||
|
* @param fp BGZF file handler opened for reading
|
||||||
|
* @return 1 if EOF is present; 0 if not or on I/O error
|
||||||
|
*/
|
||||||
|
int bgzf_check_EOF(BGZF *fp);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a file is in the BGZF format
|
||||||
|
*
|
||||||
|
* @param fn file name
|
||||||
|
* @return 1 if _fn_ is BGZF; 0 if not or on I/O error
|
||||||
|
*/
|
||||||
|
int bgzf_is_bgzf(const char *fn);
|
||||||
|
|
||||||
|
/*********************
|
||||||
|
* Advanced routines *
|
||||||
|
*********************/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the cache size. Only effective when compiled with -DBGZF_CACHE.
|
||||||
|
*
|
||||||
|
* @param fp BGZF file handler
|
||||||
|
* @param size size of cache in bytes; 0 to disable caching (default)
|
||||||
|
*/
|
||||||
|
void bgzf_set_cache_size(BGZF *fp, int size);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Flush the file if the remaining buffer size is smaller than _size_
|
||||||
|
*/
|
||||||
|
int bgzf_flush_try(BGZF *fp, ssize_t size);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read one byte from a BGZF file. It is faster than bgzf_read()
|
||||||
|
* @param fp BGZF file handler
|
||||||
|
* @return byte read; -1 on end-of-file or error
|
||||||
|
*/
|
||||||
|
int bgzf_getc(BGZF *fp);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read one line from a BGZF file. It is faster than bgzf_getc()
|
||||||
|
*
|
||||||
|
* @param fp BGZF file handler
|
||||||
|
* @param delim delimitor
|
||||||
|
* @param str string to write to; must be initialized
|
||||||
|
* @return length of the string; 0 on end-of-file; negative on error
|
||||||
|
*/
|
||||||
|
int bgzf_getline(BGZF *fp, int delim, kstring_t *str);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read the next BGZF block.
|
||||||
|
*/
|
||||||
|
int bgzf_read_block(BGZF *fp);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,202 @@
|
||||||
|
#ifndef KAVL_HPP
|
||||||
|
#define KAVL_HPP
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
|
|
||||||
|
namespace klib {
|
||||||
|
|
||||||
|
template<class T, typename Less = std::less<T> >
|
||||||
|
class Avl {
|
||||||
|
static const int MAX_DEPTH = 64;
|
||||||
|
struct Node {
|
||||||
|
T data;
|
||||||
|
signed char balance;
|
||||||
|
unsigned size;
|
||||||
|
Node *p[2];
|
||||||
|
};
|
||||||
|
Node *root;
|
||||||
|
inline int cmp_func(const T &x, const T &y) {
|
||||||
|
return Less()(y, x) - Less()(x, y);
|
||||||
|
}
|
||||||
|
inline unsigned child_size(Node *p, int dir) {
|
||||||
|
return p->p[dir]? p->p[dir]->size : 0;
|
||||||
|
};
|
||||||
|
// one rotation: (a,(b,c)q)p => ((a,b)p,c)q
|
||||||
|
inline Node *rotate1(Node *p, int dir) { // dir=0 to left; dir=1 to right
|
||||||
|
int opp = 1 - dir; // opposite direction
|
||||||
|
Node *q = p->p[opp];
|
||||||
|
unsigned size_p = p->size;
|
||||||
|
p->size -= q->size - child_size(q, dir);
|
||||||
|
q->size = size_p;
|
||||||
|
p->p[opp] = q->p[dir];
|
||||||
|
q->p[dir] = p;
|
||||||
|
return q;
|
||||||
|
};
|
||||||
|
// two consecutive rotations: (a,((b,c)r,d)q)p => ((a,b)p,(c,d)q)r
|
||||||
|
inline Node *rotate2(Node *p, int dir) {
|
||||||
|
int b1, opp = 1 - dir;
|
||||||
|
Node *q = p->p[opp], *r = q->p[dir];
|
||||||
|
unsigned size_x_dir = child_size(r, dir);
|
||||||
|
r->size = p->size;
|
||||||
|
p->size -= q->size - size_x_dir;
|
||||||
|
q->size -= size_x_dir + 1;
|
||||||
|
p->p[opp] = r->p[dir];
|
||||||
|
r->p[dir] = p;
|
||||||
|
q->p[dir] = r->p[opp];
|
||||||
|
r->p[opp] = q;
|
||||||
|
b1 = dir == 0? +1 : -1;
|
||||||
|
if (r->balance == b1) q->balance = 0, p->balance = -b1;
|
||||||
|
else if (r->balance == 0) q->balance = p->balance = 0;
|
||||||
|
else q->balance = b1, p->balance = 0;
|
||||||
|
r->balance = 0;
|
||||||
|
return r;
|
||||||
|
};
|
||||||
|
void destroy(Node *r) {
|
||||||
|
Node *p, *q;
|
||||||
|
for (p = r; p; p = q) {
|
||||||
|
if (p->p[0] == 0) {
|
||||||
|
q = p->p[1];
|
||||||
|
delete p;
|
||||||
|
} else {
|
||||||
|
q = p->p[0];
|
||||||
|
p->p[0] = q->p[1];
|
||||||
|
q->p[1] = p;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
public:
|
||||||
|
Avl() : root(NULL) {};
|
||||||
|
~Avl() { destroy(root); };
|
||||||
|
unsigned size() const { return root? root->size : 0; }
|
||||||
|
T *find(const T &data, unsigned *cnt_ = NULL) {
|
||||||
|
Node *p = root;
|
||||||
|
unsigned cnt = 0;
|
||||||
|
while (p != 0) {
|
||||||
|
int cmp = cmp_func(data, p->data);
|
||||||
|
if (cmp >= 0) cnt += child_size(p, 0) + 1;
|
||||||
|
if (cmp < 0) p = p->p[0];
|
||||||
|
else if (cmp > 0) p = p->p[1];
|
||||||
|
else break;
|
||||||
|
}
|
||||||
|
if (cnt_) *cnt_ = cnt;
|
||||||
|
return p? &p->data : NULL;
|
||||||
|
};
|
||||||
|
T *insert(const T &data, bool *is_new = NULL, unsigned *cnt_ = NULL) {
|
||||||
|
unsigned char stack[MAX_DEPTH];
|
||||||
|
Node *path[MAX_DEPTH];
|
||||||
|
Node *bp, *bq;
|
||||||
|
Node *x, *p, *q, *r = 0; // _r_ is potentially the new root
|
||||||
|
int i, which = 0, top, b1, path_len;
|
||||||
|
unsigned cnt = 0;
|
||||||
|
bp = root, bq = 0;
|
||||||
|
if (is_new) *is_new = false;
|
||||||
|
// find the insertion location
|
||||||
|
for (p = bp, q = bq, top = path_len = 0; p; q = p, p = p->p[which]) {
|
||||||
|
int cmp = cmp_func(data, p->data);
|
||||||
|
if (cmp >= 0) cnt += child_size(p, 0) + 1;
|
||||||
|
if (cmp == 0) {
|
||||||
|
if (cnt_) *cnt_ = cnt;
|
||||||
|
return &p->data;
|
||||||
|
}
|
||||||
|
if (p->balance != 0)
|
||||||
|
bq = q, bp = p, top = 0;
|
||||||
|
stack[top++] = which = (cmp > 0);
|
||||||
|
path[path_len++] = p;
|
||||||
|
}
|
||||||
|
if (cnt_) *cnt_ = cnt;
|
||||||
|
x = new Node;
|
||||||
|
x->data = data, x->balance = 0, x->size = 1, x->p[0] = x->p[1] = 0;
|
||||||
|
if (is_new) *is_new = true;
|
||||||
|
if (q == 0) root = x;
|
||||||
|
else q->p[which] = x;
|
||||||
|
if (bp == 0) return &x->data;
|
||||||
|
for (i = 0; i < path_len; ++i) ++path[i]->size;
|
||||||
|
for (p = bp, top = 0; p != x; p = p->p[stack[top]], ++top) /* update balance factors */
|
||||||
|
if (stack[top] == 0) --p->balance;
|
||||||
|
else ++p->balance;
|
||||||
|
if (bp->balance > -2 && bp->balance < 2) return &x->data; /* no re-balance needed */
|
||||||
|
// re-balance
|
||||||
|
which = (bp->balance < 0);
|
||||||
|
b1 = which == 0? +1 : -1;
|
||||||
|
q = bp->p[1 - which];
|
||||||
|
if (q->balance == b1) {
|
||||||
|
r = rotate1(bp, which);
|
||||||
|
q->balance = bp->balance = 0;
|
||||||
|
} else r = rotate2(bp, which);
|
||||||
|
if (bq == 0) root = r;
|
||||||
|
else bq->p[bp != bq->p[0]] = r;
|
||||||
|
return &x->data;
|
||||||
|
};
|
||||||
|
bool erase(const T &data) {
|
||||||
|
Node *p, *path[MAX_DEPTH], fake;
|
||||||
|
unsigned char dir[MAX_DEPTH];
|
||||||
|
int i, d = 0, cmp;
|
||||||
|
fake.p[0] = root, fake.p[1] = 0;
|
||||||
|
for (cmp = -1, p = &fake; cmp; cmp = cmp_func(data, p->data)) {
|
||||||
|
int which = (cmp > 0);
|
||||||
|
dir[d] = which;
|
||||||
|
path[d++] = p;
|
||||||
|
p = p->p[which];
|
||||||
|
if (p == 0) return false;
|
||||||
|
}
|
||||||
|
for (i = 1; i < d; ++i) --path[i]->size;
|
||||||
|
if (p->p[1] == 0) { // ((1,.)2,3)4 => (1,3)4; p=2
|
||||||
|
path[d-1]->p[dir[d-1]] = p->p[0];
|
||||||
|
} else {
|
||||||
|
Node *q = p->p[1];
|
||||||
|
if (q->p[0] == 0) { // ((1,2)3,4)5 => ((1)2,4)5; p=3
|
||||||
|
q->p[0] = p->p[0];
|
||||||
|
q->balance = p->balance;
|
||||||
|
path[d-1]->p[dir[d-1]] = q;
|
||||||
|
path[d] = q, dir[d++] = 1;
|
||||||
|
q->size = p->size - 1;
|
||||||
|
} else { // ((1,((.,2)3,4)5)6,7)8 => ((1,(2,4)5)3,7)8; p=6
|
||||||
|
Node *r;
|
||||||
|
int e = d++; // backup _d_
|
||||||
|
for (;;) {
|
||||||
|
dir[d] = 0;
|
||||||
|
path[d++] = q;
|
||||||
|
r = q->p[0];
|
||||||
|
if (r->p[0] == 0) break;
|
||||||
|
q = r;
|
||||||
|
}
|
||||||
|
r->p[0] = p->p[0];
|
||||||
|
q->p[0] = r->p[1];
|
||||||
|
r->p[1] = p->p[1];
|
||||||
|
r->balance = p->balance;
|
||||||
|
path[e-1]->p[dir[e-1]] = r;
|
||||||
|
path[e] = r, dir[e] = 1;
|
||||||
|
for (i = e + 1; i < d; ++i) --path[i]->size;
|
||||||
|
r->size = p->size - 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while (--d > 0) {
|
||||||
|
Node *q = path[d];
|
||||||
|
int which, other, b1 = 1, b2 = 2;
|
||||||
|
which = dir[d], other = 1 - which;
|
||||||
|
if (which) b1 = -b1, b2 = -b2;
|
||||||
|
q->balance += b1;
|
||||||
|
if (q->balance == b1) break;
|
||||||
|
else if (q->balance == b2) {
|
||||||
|
Node *r = q->p[other];
|
||||||
|
if (r->balance == -b1) {
|
||||||
|
path[d-1]->p[dir[d-1]] = rotate2(q, which);
|
||||||
|
} else {
|
||||||
|
path[d-1]->p[dir[d-1]] = rotate1(q, which);
|
||||||
|
if (r->balance == 0) {
|
||||||
|
r->balance = -b1;
|
||||||
|
q->balance = b1;
|
||||||
|
break;
|
||||||
|
} else r->balance = q->balance = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
root = fake.p[0];
|
||||||
|
delete p;
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
} // end of namespace klib
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,163 @@
|
||||||
|
#ifndef KHASH_HPP
|
||||||
|
#define KHASH_HPP
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <functional>
|
||||||
|
#include <cstdlib> // for malloc() etc
|
||||||
|
#include <cstring> // for memset()
|
||||||
|
|
||||||
|
#include <stdint.h> // for uint32_t
|
||||||
|
|
||||||
|
namespace klib {
|
||||||
|
|
||||||
|
#ifndef kroundup32 // FIXME: doesn't work for 64-bit integers
|
||||||
|
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
|
||||||
|
#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
|
||||||
|
#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
|
||||||
|
#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
|
||||||
|
#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
|
||||||
|
#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
|
||||||
|
#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
|
||||||
|
#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
|
||||||
|
#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
|
||||||
|
|
||||||
|
#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4)
|
||||||
|
|
||||||
|
template<class T, class Hash, class Eq = std::equal_to<T>, typename khint_t = uint32_t>
|
||||||
|
class KHash {
|
||||||
|
khint_t n_buckets, count, n_occupied, upper_bound;
|
||||||
|
uint32_t *flags;
|
||||||
|
T *keys;
|
||||||
|
public:
|
||||||
|
KHash() : n_buckets(0), count(0), n_occupied(0), upper_bound(0), flags(NULL), keys(NULL) {};
|
||||||
|
~KHash() { std::free(flags); std::free(keys); };
|
||||||
|
khint_t capacity(void) const { return n_buckets; };
|
||||||
|
khint_t size(void) const { return count; };
|
||||||
|
khint_t begin(void) const { return 0; };
|
||||||
|
khint_t end(void) const { return n_buckets; };
|
||||||
|
|
||||||
|
void exist(khint_t x) const { return !__ac_iseither(flags, x); };
|
||||||
|
T &at(khint_t x) { return keys[x]; };
|
||||||
|
|
||||||
|
khint_t get(const T &key) const {
|
||||||
|
if (n_buckets) {
|
||||||
|
khint_t k, i, last, mask, step = 0;
|
||||||
|
mask = n_buckets - 1;
|
||||||
|
k = Hash()(key); i = k & mask;
|
||||||
|
last = i;
|
||||||
|
while (!__ac_isempty(flags, i) && (__ac_isdel(flags, i) || !Eq()(keys[i], key))) {
|
||||||
|
i = (i + (++step)) & mask;
|
||||||
|
if (i == last) return n_buckets;
|
||||||
|
}
|
||||||
|
return __ac_iseither(flags, i)? n_buckets : i;
|
||||||
|
} else return 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
int resize(khint_t new_n_buckets) {
|
||||||
|
uint32_t *new_flags = 0;
|
||||||
|
khint_t j = 1;
|
||||||
|
{
|
||||||
|
kroundup32(new_n_buckets);
|
||||||
|
if (new_n_buckets < 4) new_n_buckets = 4;
|
||||||
|
if (count >= (new_n_buckets>>1) + (new_n_buckets>>2)) j = 0; /* requested count is too small */
|
||||||
|
else { /* hash table count to be changed (shrink or expand); rehash */
|
||||||
|
new_flags = (uint32_t*)std::malloc(__ac_fsize(new_n_buckets) * sizeof(uint32_t));
|
||||||
|
if (!new_flags) return -1;
|
||||||
|
::memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(uint32_t));
|
||||||
|
if (n_buckets < new_n_buckets) { /* expand */
|
||||||
|
T *new_keys = (T*)std::realloc((void *)keys, new_n_buckets * sizeof(T));
|
||||||
|
if (!new_keys) { std::free(new_flags); return -1; }
|
||||||
|
keys = new_keys;
|
||||||
|
} /* otherwise shrink */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (j) { /* rehashing is needed */
|
||||||
|
for (j = 0; j != n_buckets; ++j) {
|
||||||
|
if (__ac_iseither(flags, j) == 0) {
|
||||||
|
T key = keys[j];
|
||||||
|
khint_t new_mask;
|
||||||
|
new_mask = new_n_buckets - 1;
|
||||||
|
__ac_set_isdel_true(flags, j);
|
||||||
|
while (1) { /* kick-out process; sort of like in Cuckoo hashing */
|
||||||
|
khint_t k, i, step = 0;
|
||||||
|
k = Hash()(key);
|
||||||
|
i = k & new_mask;
|
||||||
|
while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask;
|
||||||
|
__ac_set_isempty_false(new_flags, i);
|
||||||
|
if (i < n_buckets && __ac_iseither(flags, i) == 0) { /* kick out the existing element */
|
||||||
|
{ T tmp = keys[i]; keys[i] = key; key = tmp; }
|
||||||
|
__ac_set_isdel_true(flags, i); /* mark it as deleted in the old hash table */
|
||||||
|
} else { /* write the element and jump out of the loop */
|
||||||
|
keys[i] = key;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (n_buckets > new_n_buckets) /* shrink the hash table */
|
||||||
|
keys = (T*)std::realloc((void *)keys, new_n_buckets * sizeof(T));
|
||||||
|
std::free(flags); /* free the working space */
|
||||||
|
flags = new_flags;
|
||||||
|
n_buckets = new_n_buckets;
|
||||||
|
n_occupied = count;
|
||||||
|
upper_bound = (n_buckets>>1) + (n_buckets>>2);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
khint_t put(const T &key, int *ret) {
|
||||||
|
khint_t x;
|
||||||
|
if (n_occupied >= upper_bound) { /* update the hash table */
|
||||||
|
if (n_buckets > (count<<1)) {
|
||||||
|
if (resize(n_buckets - 1) < 0) { /* clear "deleted" elements */
|
||||||
|
*ret = -1; return n_buckets;
|
||||||
|
}
|
||||||
|
} else if (resize(n_buckets + 1) < 0) { /* expand the hash table */
|
||||||
|
*ret = -1; return n_buckets;
|
||||||
|
}
|
||||||
|
} /* TODO: to implement automatically shrinking; resize() already support shrinking */
|
||||||
|
{
|
||||||
|
khint_t k, i, site, last, mask = n_buckets - 1, step = 0;
|
||||||
|
x = site = n_buckets; k = Hash()(key); i = k & mask;
|
||||||
|
if (__ac_isempty(flags, i)) x = i; /* for speed up */
|
||||||
|
else {
|
||||||
|
last = i;
|
||||||
|
while (!__ac_isempty(flags, i) && (__ac_isdel(flags, i) || !Eq()(keys[i], key))) {
|
||||||
|
if (__ac_isdel(flags, i)) site = i;
|
||||||
|
i = (i + (++step)) & mask;
|
||||||
|
if (i == last) { x = site; break; }
|
||||||
|
}
|
||||||
|
if (x == n_buckets) {
|
||||||
|
if (__ac_isempty(flags, i) && site != n_buckets) x = site;
|
||||||
|
else x = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (__ac_isempty(flags, x)) { /* not present at all */
|
||||||
|
keys[x] = key;
|
||||||
|
__ac_set_isboth_false(flags, x);
|
||||||
|
++count; ++n_occupied;
|
||||||
|
*ret = 1;
|
||||||
|
} else if (__ac_isdel(flags, x)) { /* deleted */
|
||||||
|
keys[x] = key;
|
||||||
|
__ac_set_isboth_false(flags, x);
|
||||||
|
++count;
|
||||||
|
*ret = 2;
|
||||||
|
} else *ret = 0; /* Don't touch keys[x] if present and not deleted */
|
||||||
|
return x;
|
||||||
|
};
|
||||||
|
|
||||||
|
void del(khint_t x) {
|
||||||
|
if (x != n_buckets && !__ac_iseither(flags, x)) {
|
||||||
|
__ac_set_isdel_true(flags, x);
|
||||||
|
--count;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
} // end of namespace klib
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,258 @@
|
||||||
|
#ifndef __AC_KHASHL_HPP
|
||||||
|
#define __AC_KHASHL_HPP
|
||||||
|
|
||||||
|
#include <functional> // for std::equal_to
|
||||||
|
#include <cstdlib> // for malloc() etc
|
||||||
|
#include <cstring> // for memset()
|
||||||
|
#include <stdint.h> // for uint32_t
|
||||||
|
|
||||||
|
/* // ==> Code example <==
|
||||||
|
#include <cstdio>
|
||||||
|
#include "khashl.hpp"
|
||||||
|
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
klib::KHashMap<uint32_t, int, std::hash<uint32_t> > h; // NB: C++98 doesn't have std::hash
|
||||||
|
uint32_t k;
|
||||||
|
int absent;
|
||||||
|
h[43] = 1, h[53] = 2, h[63] = 3, h[73] = 4; // one way to insert
|
||||||
|
k = h.put(53, &absent), h.value(k) = -2; // another way to insert
|
||||||
|
if (!absent) printf("already in the table\n"); // which allows to test presence
|
||||||
|
if (h.get(33) == h.end()) printf("not found!\n"); // test presence without insertion
|
||||||
|
h.del(h.get(43)); // deletion
|
||||||
|
for (k = 0; k != h.end(); ++k) // traversal
|
||||||
|
if (h.occupied(k)) // some buckets are not occupied; skip them
|
||||||
|
printf("%u => %d\n", h.key(k), h.value(k));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
namespace klib {
|
||||||
|
|
||||||
|
/***********
|
||||||
|
* HashSet *
|
||||||
|
***********/
|
||||||
|
|
||||||
|
template<class T, class Hash, class Eq = std::equal_to<T>, typename khint_t = uint32_t>
|
||||||
|
class KHashSet {
|
||||||
|
khint_t bits, count;
|
||||||
|
uint32_t *used;
|
||||||
|
T *keys;
|
||||||
|
static inline uint32_t __kh_used(const uint32_t *flag, khint_t i) { return flag[i>>5] >> (i&0x1fU) & 1U; };
|
||||||
|
static inline void __kh_set_used(uint32_t *flag, khint_t i) { flag[i>>5] |= 1U<<(i&0x1fU); };
|
||||||
|
static inline void __kh_set_unused(uint32_t *flag, khint_t i) { flag[i>>5] &= ~(1U<<(i&0x1fU)); };
|
||||||
|
static inline khint_t __kh_fsize(khint_t m) { return m<32? 1 : m>>5; }
|
||||||
|
static inline khint_t __kh_h2b(uint32_t hash, khint_t bits) { return hash * 2654435769U >> (32 - bits); }
|
||||||
|
static inline khint_t __kh_h2b(uint64_t hash, khint_t bits) { return hash * 11400714819323198485ULL >> (64 - bits); }
|
||||||
|
public:
|
||||||
|
KHashSet() : bits(0), count(0), used(0), keys(0) {};
|
||||||
|
~KHashSet() { std::free(used); std::free(keys); };
|
||||||
|
inline khint_t n_buckets() const { return used? khint_t(1) << bits : 0; }
|
||||||
|
inline khint_t end() const { return n_buckets(); }
|
||||||
|
inline khint_t size() const { return count; }
|
||||||
|
inline T &key(khint_t x) { return keys[x]; };
|
||||||
|
inline bool occupied(khint_t x) const { return (__kh_used(used, x) != 0); }
|
||||||
|
void clear(void) {
|
||||||
|
if (!used) return;
|
||||||
|
memset(used, 0, __kh_fsize(n_buckets()) * sizeof(uint32_t));
|
||||||
|
count = 0;
|
||||||
|
}
|
||||||
|
khint_t get(const T &key) const {
|
||||||
|
khint_t i, last, mask, nb;
|
||||||
|
if (keys == 0) return 0;
|
||||||
|
nb = n_buckets();
|
||||||
|
mask = nb - khint_t(1);
|
||||||
|
i = last = __kh_h2b(Hash()(key), bits);
|
||||||
|
while (__kh_used(used, i) && !Eq()(keys[i], key)) {
|
||||||
|
i = (i + khint_t(1)) & mask;
|
||||||
|
if (i == last) return nb;
|
||||||
|
}
|
||||||
|
return !__kh_used(used, i)? nb : i;
|
||||||
|
}
|
||||||
|
int resize(khint_t new_nb) {
|
||||||
|
uint32_t *new_used = 0;
|
||||||
|
khint_t j = 0, x = new_nb, nb, new_bits, new_mask;
|
||||||
|
while ((x >>= khint_t(1)) != 0) ++j;
|
||||||
|
if (new_nb & (new_nb - 1)) ++j;
|
||||||
|
new_bits = j > 2? j : 2;
|
||||||
|
new_nb = khint_t(1) << new_bits;
|
||||||
|
if (count > (new_nb>>1) + (new_nb>>2)) return 0; /* requested size is too small */
|
||||||
|
new_used = (uint32_t*)std::malloc(__kh_fsize(new_nb) * sizeof(uint32_t));
|
||||||
|
memset(new_used, 0, __kh_fsize(new_nb) * sizeof(uint32_t));
|
||||||
|
if (!new_used) return -1; /* not enough memory */
|
||||||
|
nb = n_buckets();
|
||||||
|
if (nb < new_nb) { /* expand */
|
||||||
|
T *new_keys = (T*)std::realloc(keys, new_nb * sizeof(T));
|
||||||
|
if (!new_keys) { std::free(new_used); return -1; }
|
||||||
|
keys = new_keys;
|
||||||
|
} /* otherwise shrink */
|
||||||
|
new_mask = new_nb - 1;
|
||||||
|
for (j = 0; j != nb; ++j) {
|
||||||
|
if (!__kh_used(used, j)) continue;
|
||||||
|
T key = keys[j];
|
||||||
|
__kh_set_unused(used, j);
|
||||||
|
while (1) { /* kick-out process; sort of like in Cuckoo hashing */
|
||||||
|
khint_t i;
|
||||||
|
i = __kh_h2b(Hash()(key), new_bits);
|
||||||
|
while (__kh_used(new_used, i)) i = (i + khint_t(1)) & new_mask;
|
||||||
|
__kh_set_used(new_used, i);
|
||||||
|
if (i < nb && __kh_used(used, i)) { /* kick out the existing element */
|
||||||
|
{ T tmp = keys[i]; keys[i] = key; key = tmp; }
|
||||||
|
__kh_set_unused(used, i); /* mark it as deleted in the old hash table */
|
||||||
|
} else { /* write the element and jump out of the loop */
|
||||||
|
keys[i] = key;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (nb > new_nb) /* shrink the hash table */
|
||||||
|
keys = (T*)std::realloc(keys, new_nb * sizeof(T));
|
||||||
|
std::free(used); /* free the working space */
|
||||||
|
used = new_used, bits = new_bits;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
khint_t put(const T &key, int *absent_ = 0) {
|
||||||
|
khint_t nb, i, last, mask;
|
||||||
|
int absent = -1;
|
||||||
|
nb = n_buckets();
|
||||||
|
if (count >= (nb>>1) + (nb>>2)) { /* rehashing */
|
||||||
|
if (resize(nb + khint_t(1)) < 0) {
|
||||||
|
if (absent_) *absent_ = -1;
|
||||||
|
return nb;
|
||||||
|
}
|
||||||
|
nb = n_buckets();
|
||||||
|
} /* TODO: to implement automatically shrinking; resize() already support shrinking */
|
||||||
|
mask = nb - 1;
|
||||||
|
i = last = __kh_h2b(Hash()(key), bits);
|
||||||
|
while (__kh_used(used, i) && !Eq()(keys[i], key)) {
|
||||||
|
i = (i + 1U) & mask;
|
||||||
|
if (i == last) break;
|
||||||
|
}
|
||||||
|
if (!__kh_used(used, i)) { /* not present at all */
|
||||||
|
keys[i] = key;
|
||||||
|
__kh_set_used(used, i);
|
||||||
|
++count, absent = 1;
|
||||||
|
} else absent = 0; /* Don't touch keys[i] if present */
|
||||||
|
if (absent_) *absent_ = absent;
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
int del(khint_t i) {
|
||||||
|
khint_t j = i, k, mask, nb = n_buckets();
|
||||||
|
if (keys == 0 || i >= nb) return 0;
|
||||||
|
mask = nb - khint_t(1);
|
||||||
|
while (1) {
|
||||||
|
j = (j + khint_t(1)) & mask;
|
||||||
|
if (j == i || !__kh_used(used, j)) break; /* j==i only when the table is completely full */
|
||||||
|
k = __kh_h2b(Hash()(keys[j]), bits);
|
||||||
|
if ((j > i && (k <= i || k > j)) || (j < i && (k <= i && k > j)))
|
||||||
|
keys[i] = keys[j], i = j;
|
||||||
|
}
|
||||||
|
__kh_set_unused(used, i);
|
||||||
|
--count;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/***********
|
||||||
|
* HashMap *
|
||||||
|
***********/
|
||||||
|
|
||||||
|
template<class KType, class VType>
|
||||||
|
struct KHashMapBucket { KType key; VType val; };
|
||||||
|
|
||||||
|
template<class T, class Hash, typename khint_t>
|
||||||
|
struct KHashMapHash { khint_t operator() (const T &a) const { return Hash()(a.key); } };
|
||||||
|
|
||||||
|
template<class T, class Eq>
|
||||||
|
struct KHashMapEq { bool operator() (const T &a, const T &b) const { return Eq()(a.key, b.key); } };
|
||||||
|
|
||||||
|
template<class KType, class VType, class Hash, class Eq=std::equal_to<KType>, typename khint_t=uint32_t>
|
||||||
|
class KHashMap : public KHashSet<KHashMapBucket<KType, VType>,
|
||||||
|
KHashMapHash<KHashMapBucket<KType, VType>, Hash, khint_t>,
|
||||||
|
KHashMapEq<KHashMapBucket<KType, VType>, Eq>, khint_t>
|
||||||
|
{
|
||||||
|
typedef KHashMapBucket<KType, VType> bucket_t;
|
||||||
|
typedef KHashSet<bucket_t, KHashMapHash<bucket_t, Hash, khint_t>, KHashMapEq<bucket_t, Eq>, khint_t> hashset_t;
|
||||||
|
public:
|
||||||
|
khint_t get(const KType &key) const {
|
||||||
|
bucket_t t = { key, VType() };
|
||||||
|
return hashset_t::get(t);
|
||||||
|
}
|
||||||
|
khint_t put(const KType &key, int *absent) {
|
||||||
|
bucket_t t = { key, VType() };
|
||||||
|
return hashset_t::put(t, absent);
|
||||||
|
}
|
||||||
|
inline KType &key(khint_t i) { return hashset_t::key(i).key; }
|
||||||
|
inline VType &value(khint_t i) { return hashset_t::key(i).val; }
|
||||||
|
inline VType &operator[] (const KType &key) {
|
||||||
|
bucket_t t = { key, VType() };
|
||||||
|
return value(hashset_t::put(t));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/****************************
|
||||||
|
* HashSet with cached hash *
|
||||||
|
****************************/
|
||||||
|
|
||||||
|
template<class KType, typename khint_t>
|
||||||
|
struct KHashSetCachedBucket { KType key; khint_t hash; };
|
||||||
|
|
||||||
|
template<class T, typename khint_t>
|
||||||
|
struct KHashCachedHash { khint_t operator() (const T &a) const { return a.hash; } };
|
||||||
|
|
||||||
|
template<class T, class Eq>
|
||||||
|
struct KHashCachedEq { bool operator() (const T &a, const T &b) const { return a.hash == b.hash && Eq()(a.key, b.key); } };
|
||||||
|
|
||||||
|
template<class KType, class Hash, class Eq = std::equal_to<KType>, typename khint_t = uint32_t>
|
||||||
|
class KHashSetCached : public KHashSet<KHashSetCachedBucket<KType, khint_t>,
|
||||||
|
KHashCachedHash<KHashSetCachedBucket<KType, khint_t>, khint_t>,
|
||||||
|
KHashCachedEq<KHashSetCachedBucket<KType, khint_t>, Eq>, khint_t>
|
||||||
|
{
|
||||||
|
typedef KHashSetCachedBucket<KType, khint_t> bucket_t;
|
||||||
|
typedef KHashSet<bucket_t, KHashCachedHash<bucket_t, khint_t>, KHashCachedEq<bucket_t, Eq>, khint_t> hashset_t;
|
||||||
|
public:
|
||||||
|
khint_t get(const KType &key) const {
|
||||||
|
bucket_t t = { key, Hash()(key) };
|
||||||
|
return hashset_t::get(t);
|
||||||
|
}
|
||||||
|
khint_t put(const KType &key, int *absent) {
|
||||||
|
bucket_t t = { key, Hash()(key) };
|
||||||
|
return hashset_t::put(t, absent);
|
||||||
|
}
|
||||||
|
inline KType &key(khint_t i) { return hashset_t::key(i).key; }
|
||||||
|
};
|
||||||
|
|
||||||
|
/****************************
|
||||||
|
* HashMap with cached hash *
|
||||||
|
****************************/
|
||||||
|
|
||||||
|
template<class KType, class VType, typename khint_t>
|
||||||
|
struct KHashMapCachedBucket { KType key; VType val; khint_t hash; };
|
||||||
|
|
||||||
|
template<class KType, class VType, class Hash, class Eq = std::equal_to<KType>, typename khint_t = uint32_t>
|
||||||
|
class KHashMapCached : public KHashSet<KHashMapCachedBucket<KType, VType, khint_t>,
|
||||||
|
KHashCachedHash<KHashMapCachedBucket<KType, VType, khint_t>, khint_t>,
|
||||||
|
KHashCachedEq<KHashMapCachedBucket<KType, VType, khint_t>, Eq>, khint_t>
|
||||||
|
{
|
||||||
|
typedef KHashMapCachedBucket<KType, VType, khint_t> bucket_t;
|
||||||
|
typedef KHashSet<bucket_t, KHashCachedHash<bucket_t, khint_t>, KHashCachedEq<bucket_t, Eq>, khint_t> hashset_t;
|
||||||
|
public:
|
||||||
|
khint_t get(const KType &key) const {
|
||||||
|
bucket_t t = { key, VType(), Hash()(key) };
|
||||||
|
return hashset_t::get(t);
|
||||||
|
}
|
||||||
|
khint_t put(const KType &key, int *absent) {
|
||||||
|
bucket_t t = { key, VType(), Hash()(key) };
|
||||||
|
return hashset_t::put(t, absent);
|
||||||
|
}
|
||||||
|
inline KType &key(khint_t i) { return hashset_t::key(i).key; }
|
||||||
|
inline VType &value(khint_t i) { return hashset_t::key(i).val; }
|
||||||
|
inline VType &operator[] (const KType &key) {
|
||||||
|
bucket_t t = { key, VType(), Hash()(key) };
|
||||||
|
return value(hashset_t::put(t));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* __AC_KHASHL_HPP */
|
||||||
|
|
@ -0,0 +1,224 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "kalloc.h"
|
||||||
|
|
||||||
|
/* In kalloc, a *core* is a large chunk of contiguous memory. Each core is
|
||||||
|
* associated with a master header, which keeps the size of the current core
|
||||||
|
* and the pointer to next core. Kalloc allocates small *blocks* of memory from
|
||||||
|
* the cores and organizes free memory blocks in a circular single-linked list.
|
||||||
|
*
|
||||||
|
* In the following diagram, "@" stands for the header of a free block (of type
|
||||||
|
* header_t), "#" for the header of an allocated block (of type size_t), "-"
|
||||||
|
* for free memory, and "+" for allocated memory.
|
||||||
|
*
|
||||||
|
* master This region is core 1. master This region is core 2.
|
||||||
|
* | |
|
||||||
|
* *@-------#++++++#++++++++++++@-------- *@----------#++++++++++++#+++++++@------------
|
||||||
|
* | | | |
|
||||||
|
* p=p->ptr->ptr->ptr->ptr p->ptr p->ptr->ptr p->ptr->ptr->ptr
|
||||||
|
*/
|
||||||
|
typedef struct header_t {
|
||||||
|
size_t size;
|
||||||
|
struct header_t *ptr;
|
||||||
|
} header_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
void *par;
|
||||||
|
size_t min_core_size;
|
||||||
|
header_t base, *loop_head, *core_head; /* base is a zero-sized block always kept in the loop */
|
||||||
|
} kmem_t;
|
||||||
|
|
||||||
|
static void panic(const char *s)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "%s\n", s);
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
void *km_init2(void *km_par, size_t min_core_size)
|
||||||
|
{
|
||||||
|
kmem_t *km;
|
||||||
|
km = (kmem_t*)kcalloc(km_par, 1, sizeof(kmem_t));
|
||||||
|
km->par = km_par;
|
||||||
|
if (km_par) km->min_core_size = min_core_size > 0? min_core_size : ((kmem_t*)km_par)->min_core_size - 2;
|
||||||
|
else km->min_core_size = min_core_size > 0? min_core_size : 0x80000;
|
||||||
|
return (void*)km;
|
||||||
|
}
|
||||||
|
|
||||||
|
void *km_init(void) { return km_init2(0, 0); }
|
||||||
|
|
||||||
|
void km_destroy(void *_km)
|
||||||
|
{
|
||||||
|
kmem_t *km = (kmem_t*)_km;
|
||||||
|
void *km_par;
|
||||||
|
header_t *p, *q;
|
||||||
|
if (km == NULL) return;
|
||||||
|
km_par = km->par;
|
||||||
|
for (p = km->core_head; p != NULL;) {
|
||||||
|
q = p->ptr;
|
||||||
|
kfree(km_par, p);
|
||||||
|
p = q;
|
||||||
|
}
|
||||||
|
kfree(km_par, km);
|
||||||
|
}
|
||||||
|
|
||||||
|
static header_t *morecore(kmem_t *km, size_t nu)
|
||||||
|
{
|
||||||
|
header_t *q;
|
||||||
|
size_t bytes, *p;
|
||||||
|
nu = (nu + 1 + (km->min_core_size - 1)) / km->min_core_size * km->min_core_size; /* the first +1 for core header */
|
||||||
|
bytes = nu * sizeof(header_t);
|
||||||
|
q = (header_t*)kmalloc(km->par, bytes);
|
||||||
|
if (!q) panic("[morecore] insufficient memory");
|
||||||
|
q->ptr = km->core_head, q->size = nu, km->core_head = q;
|
||||||
|
p = (size_t*)(q + 1);
|
||||||
|
*p = nu - 1; /* the size of the free block; -1 because the first unit is used for the core header */
|
||||||
|
kfree(km, p + 1); /* initialize the new "core"; NB: the core header is not looped. */
|
||||||
|
return km->loop_head;
|
||||||
|
}
|
||||||
|
|
||||||
|
void kfree(void *_km, void *ap) /* kfree() also adds a new core to the circular list */
|
||||||
|
{
|
||||||
|
header_t *p, *q;
|
||||||
|
kmem_t *km = (kmem_t*)_km;
|
||||||
|
|
||||||
|
if (!ap) return;
|
||||||
|
if (km == NULL) {
|
||||||
|
free(ap);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
p = (header_t*)((size_t*)ap - 1);
|
||||||
|
p->size = *((size_t*)ap - 1);
|
||||||
|
/* Find the pointer that points to the block to be freed. The following loop can stop on two conditions:
|
||||||
|
*
|
||||||
|
* a) "p>q && p<q->ptr": @------#++++++++#+++++++@------- @---------------#+++++++@-------
|
||||||
|
* (can also be in | | | -> | |
|
||||||
|
* two cores) q p q->ptr q q->ptr
|
||||||
|
*
|
||||||
|
* @-------- #+++++++++@-------- @-------- @------------------
|
||||||
|
* | | | -> | |
|
||||||
|
* q p q->ptr q q->ptr
|
||||||
|
*
|
||||||
|
* b) "q>=q->ptr && (p>q || p<q->ptr)": @-------#+++++ @--------#+++++++ @-------#+++++ @----------------
|
||||||
|
* | | | -> | |
|
||||||
|
* q->ptr q p q->ptr q
|
||||||
|
*
|
||||||
|
* #+++++++@----- #++++++++@------- @------------- #++++++++@-------
|
||||||
|
* | | | -> | |
|
||||||
|
* p q->ptr q q->ptr q
|
||||||
|
*/
|
||||||
|
for (q = km->loop_head; !(p > q && p < q->ptr); q = q->ptr)
|
||||||
|
if (q >= q->ptr && (p > q || p < q->ptr)) break;
|
||||||
|
if (p + p->size == q->ptr) { /* two adjacent blocks, merge p and q->ptr (the 2nd and 4th cases) */
|
||||||
|
p->size += q->ptr->size;
|
||||||
|
p->ptr = q->ptr->ptr;
|
||||||
|
} else if (p + p->size > q->ptr && q->ptr >= p) {
|
||||||
|
panic("[kfree] The end of the allocated block enters a free block.");
|
||||||
|
} else p->ptr = q->ptr; /* backup q->ptr */
|
||||||
|
|
||||||
|
if (q + q->size == p) { /* two adjacent blocks, merge q and p (the other two cases) */
|
||||||
|
q->size += p->size;
|
||||||
|
q->ptr = p->ptr;
|
||||||
|
km->loop_head = q;
|
||||||
|
} else if (q + q->size > p && p >= q) {
|
||||||
|
panic("[kfree] The end of a free block enters the allocated block.");
|
||||||
|
} else km->loop_head = p, q->ptr = p; /* in two cores, cannot be merged; create a new block in the list */
|
||||||
|
}
|
||||||
|
|
||||||
|
void *kmalloc(void *_km, size_t n_bytes)
|
||||||
|
{
|
||||||
|
kmem_t *km = (kmem_t*)_km;
|
||||||
|
size_t n_units;
|
||||||
|
header_t *p, *q;
|
||||||
|
|
||||||
|
if (n_bytes == 0) return 0;
|
||||||
|
if (km == NULL) return malloc(n_bytes);
|
||||||
|
n_units = (n_bytes + sizeof(size_t) + sizeof(header_t) - 1) / sizeof(header_t); /* header+n_bytes requires at least this number of units */
|
||||||
|
|
||||||
|
if (!(q = km->loop_head)) /* the first time when kmalloc() is called, intialize it */
|
||||||
|
q = km->loop_head = km->base.ptr = &km->base;
|
||||||
|
for (p = q->ptr;; q = p, p = p->ptr) { /* search for a suitable block */
|
||||||
|
if (p->size >= n_units) { /* p->size if the size of current block. This line means the current block is large enough. */
|
||||||
|
if (p->size == n_units) q->ptr = p->ptr; /* no need to split the block */
|
||||||
|
else { /* split the block. NB: memory is allocated at the end of the block! */
|
||||||
|
p->size -= n_units; /* reduce the size of the free block */
|
||||||
|
p += p->size; /* p points to the allocated block */
|
||||||
|
*(size_t*)p = n_units; /* set the size */
|
||||||
|
}
|
||||||
|
km->loop_head = q; /* set the end of chain */
|
||||||
|
return (size_t*)p + 1;
|
||||||
|
}
|
||||||
|
if (p == km->loop_head) { /* then ask for more "cores" */
|
||||||
|
if ((p = morecore(km, n_units)) == 0) return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void *kcalloc(void *_km, size_t count, size_t size)
|
||||||
|
{
|
||||||
|
kmem_t *km = (kmem_t*)_km;
|
||||||
|
void *p;
|
||||||
|
if (size == 0 || count == 0) return 0;
|
||||||
|
if (km == NULL) return calloc(count, size);
|
||||||
|
p = kmalloc(km, count * size);
|
||||||
|
memset(p, 0, count * size);
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
void *krealloc(void *_km, void *ap, size_t n_bytes) // TODO: this can be made more efficient in principle
|
||||||
|
{
|
||||||
|
kmem_t *km = (kmem_t*)_km;
|
||||||
|
size_t cap, *p, *q;
|
||||||
|
|
||||||
|
if (n_bytes == 0) {
|
||||||
|
kfree(km, ap); return 0;
|
||||||
|
}
|
||||||
|
if (km == NULL) return realloc(ap, n_bytes);
|
||||||
|
if (ap == NULL) return kmalloc(km, n_bytes);
|
||||||
|
p = (size_t*)ap - 1;
|
||||||
|
cap = (*p) * sizeof(header_t) - sizeof(size_t);
|
||||||
|
if (cap >= n_bytes) return ap; /* TODO: this prevents shrinking */
|
||||||
|
q = (size_t*)kmalloc(km, n_bytes);
|
||||||
|
memcpy(q, ap, cap);
|
||||||
|
kfree(km, ap);
|
||||||
|
return q;
|
||||||
|
}
|
||||||
|
|
||||||
|
void *krelocate(void *km, void *ap, size_t n_bytes)
|
||||||
|
{
|
||||||
|
void *p;
|
||||||
|
if (km == 0 || ap == 0) return ap;
|
||||||
|
p = kmalloc(km, n_bytes);
|
||||||
|
memcpy(p, ap, n_bytes);
|
||||||
|
kfree(km, ap);
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
void km_stat(const void *_km, km_stat_t *s)
|
||||||
|
{
|
||||||
|
kmem_t *km = (kmem_t*)_km;
|
||||||
|
header_t *p;
|
||||||
|
memset(s, 0, sizeof(km_stat_t));
|
||||||
|
if (km == NULL || km->loop_head == NULL) return;
|
||||||
|
for (p = km->loop_head;; p = p->ptr) {
|
||||||
|
s->available += p->size * sizeof(header_t);
|
||||||
|
if (p->size != 0) ++s->n_blocks; /* &kmem_t::base is always one of the cores. It is zero-sized. */
|
||||||
|
if (p->ptr > p && p + p->size > p->ptr)
|
||||||
|
panic("[km_stat] The end of a free block enters another free block.");
|
||||||
|
if (p->ptr == km->loop_head) break;
|
||||||
|
}
|
||||||
|
for (p = km->core_head; p != NULL; p = p->ptr) {
|
||||||
|
size_t size = p->size * sizeof(header_t);
|
||||||
|
++s->n_cores;
|
||||||
|
s->capacity += size;
|
||||||
|
s->largest = s->largest > size? s->largest : size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void km_stat_print(const void *km)
|
||||||
|
{
|
||||||
|
km_stat_t st;
|
||||||
|
km_stat(km, &st);
|
||||||
|
fprintf(stderr, "[km_stat] cap=%ld, avail=%ld, largest=%ld, n_core=%ld, n_block=%ld\n",
|
||||||
|
st.capacity, st.available, st.largest, st.n_blocks, st.n_cores);
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,87 @@
|
||||||
|
#ifndef _KALLOC_H_
|
||||||
|
#define _KALLOC_H_
|
||||||
|
|
||||||
|
#include <stddef.h> /* for size_t */
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
size_t capacity, available, n_blocks, n_cores, largest;
|
||||||
|
} km_stat_t;
|
||||||
|
|
||||||
|
void *kmalloc(void *km, size_t size);
|
||||||
|
void *krealloc(void *km, void *ptr, size_t size);
|
||||||
|
void *krelocate(void *km, void *ap, size_t n_bytes);
|
||||||
|
void *kcalloc(void *km, size_t count, size_t size);
|
||||||
|
void kfree(void *km, void *ptr);
|
||||||
|
|
||||||
|
void *km_init(void);
|
||||||
|
void *km_init2(void *km_par, size_t min_core_size);
|
||||||
|
void km_destroy(void *km);
|
||||||
|
void km_stat(const void *_km, km_stat_t *s);
|
||||||
|
void km_stat_print(const void *km);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define Kmalloc(km, type, cnt) ((type*)kmalloc((km), (cnt) * sizeof(type)))
|
||||||
|
#define Kcalloc(km, type, cnt) ((type*)kcalloc((km), (cnt), sizeof(type)))
|
||||||
|
#define Krealloc(km, type, ptr, cnt) ((type*)krealloc((km), (ptr), (cnt) * sizeof(type)))
|
||||||
|
|
||||||
|
#define Kexpand(km, type, a, m) do { \
|
||||||
|
(m) = (m) >= 4? (m) + ((m)>>1) : 16; \
|
||||||
|
(a) = Krealloc(km, type, (a), (m)); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define KMALLOC(km, ptr, len) ((ptr) = (__typeof__(ptr))kmalloc((km), (len) * sizeof(*(ptr))))
|
||||||
|
#define KCALLOC(km, ptr, len) ((ptr) = (__typeof__(ptr))kcalloc((km), (len), sizeof(*(ptr))))
|
||||||
|
#define KREALLOC(km, ptr, len) ((ptr) = (__typeof__(ptr))krealloc((km), (ptr), (len) * sizeof(*(ptr))))
|
||||||
|
|
||||||
|
#define KEXPAND(km, a, m) do { \
|
||||||
|
(m) = (m) >= 4? (m) + ((m)>>1) : 16; \
|
||||||
|
KREALLOC((km), (a), (m)); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#ifndef klib_unused
|
||||||
|
#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3)
|
||||||
|
#define klib_unused __attribute__ ((__unused__))
|
||||||
|
#else
|
||||||
|
#define klib_unused
|
||||||
|
#endif
|
||||||
|
#endif /* klib_unused */
|
||||||
|
|
||||||
|
#define KALLOC_POOL_INIT2(SCOPE, name, kmptype_t) \
|
||||||
|
typedef struct { \
|
||||||
|
size_t cnt, n, max; \
|
||||||
|
kmptype_t **buf; \
|
||||||
|
void *km; \
|
||||||
|
} kmp_##name##_t; \
|
||||||
|
SCOPE kmp_##name##_t *kmp_init_##name(void *km) { \
|
||||||
|
kmp_##name##_t *mp; \
|
||||||
|
mp = Kcalloc(km, kmp_##name##_t, 1); \
|
||||||
|
mp->km = km; \
|
||||||
|
return mp; \
|
||||||
|
} \
|
||||||
|
SCOPE void kmp_destroy_##name(kmp_##name##_t *mp) { \
|
||||||
|
size_t k; \
|
||||||
|
for (k = 0; k < mp->n; ++k) kfree(mp->km, mp->buf[k]); \
|
||||||
|
kfree(mp->km, mp->buf); kfree(mp->km, mp); \
|
||||||
|
} \
|
||||||
|
SCOPE kmptype_t *kmp_alloc_##name(kmp_##name##_t *mp) { \
|
||||||
|
++mp->cnt; \
|
||||||
|
if (mp->n == 0) return (kmptype_t*)kcalloc(mp->km, 1, sizeof(kmptype_t)); \
|
||||||
|
return mp->buf[--mp->n]; \
|
||||||
|
} \
|
||||||
|
SCOPE void kmp_free_##name(kmp_##name##_t *mp, kmptype_t *p) { \
|
||||||
|
--mp->cnt; \
|
||||||
|
if (mp->n == mp->max) Kexpand(mp->km, kmptype_t*, mp->buf, mp->max); \
|
||||||
|
mp->buf[mp->n++] = p; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define KALLOC_POOL_INIT(name, kmptype_t) \
|
||||||
|
KALLOC_POOL_INIT2(static inline klib_unused, name, kmptype_t)
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,306 @@
|
||||||
|
/* The MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2021 by Attractive Chaos <attractor@live.co.uk>
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of this software and associated documentation files (the
|
||||||
|
"Software"), to deal in the Software without restriction, including
|
||||||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be
|
||||||
|
included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||||
|
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||||
|
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* An example:
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "kavl-lite.h"
|
||||||
|
|
||||||
|
struct my_node {
|
||||||
|
char key;
|
||||||
|
KAVLL_HEAD(struct my_node) head;
|
||||||
|
};
|
||||||
|
#define my_cmp(p, q) (((q)->key < (p)->key) - ((p)->key < (q)->key))
|
||||||
|
KAVLL_INIT(my, struct my_node, head, my_cmp)
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
const char *str = "MNOLKQOPHIA"; // from wiki, except a duplicate
|
||||||
|
struct my_node *root = 0;
|
||||||
|
int i, l = strlen(str);
|
||||||
|
for (i = 0; i < l; ++i) { // insert in the input order
|
||||||
|
struct my_node *q, *p = malloc(sizeof(*p));
|
||||||
|
p->key = str[i];
|
||||||
|
q = my_insert(&root, p);
|
||||||
|
if (p != q) free(p); // if already present, free
|
||||||
|
}
|
||||||
|
my_itr_t itr;
|
||||||
|
my_itr_first(root, &itr); // place at first
|
||||||
|
do { // traverse
|
||||||
|
const struct my_node *p = kavll_at(&itr);
|
||||||
|
putchar(p->key);
|
||||||
|
free((void*)p); // free node
|
||||||
|
} while (my_itr_next(&itr));
|
||||||
|
putchar('\n');
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KAVL_LITE_H
|
||||||
|
#define KAVL_LITE_H
|
||||||
|
|
||||||
|
#ifdef __STRICT_ANSI__
|
||||||
|
#define inline __inline__
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define KAVLL_MAX_DEPTH 64
|
||||||
|
|
||||||
|
#define KAVLL_HEAD(__type) \
|
||||||
|
struct { \
|
||||||
|
__type *p[2]; \
|
||||||
|
signed char balance; /* balance factor */ \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define __KAVLL_FIND(pre, __scope, __type, __head, __cmp) \
|
||||||
|
__scope __type *pre##_find(const __type *root, const __type *x) { \
|
||||||
|
const __type *p = root; \
|
||||||
|
while (p != 0) { \
|
||||||
|
int cmp; \
|
||||||
|
cmp = __cmp(x, p); \
|
||||||
|
if (cmp < 0) p = p->__head.p[0]; \
|
||||||
|
else if (cmp > 0) p = p->__head.p[1]; \
|
||||||
|
else break; \
|
||||||
|
} \
|
||||||
|
return (__type*)p; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define __KAVLL_ROTATE(pre, __type, __head) \
|
||||||
|
/* one rotation: (a,(b,c)q)p => ((a,b)p,c)q */ \
|
||||||
|
static inline __type *pre##_rotate1(__type *p, int dir) { /* dir=0 to left; dir=1 to right */ \
|
||||||
|
int opp = 1 - dir; /* opposite direction */ \
|
||||||
|
__type *q = p->__head.p[opp]; \
|
||||||
|
p->__head.p[opp] = q->__head.p[dir]; \
|
||||||
|
q->__head.p[dir] = p; \
|
||||||
|
return q; \
|
||||||
|
} \
|
||||||
|
/* two consecutive rotations: (a,((b,c)r,d)q)p => ((a,b)p,(c,d)q)r */ \
|
||||||
|
static inline __type *pre##_rotate2(__type *p, int dir) { \
|
||||||
|
int b1, opp = 1 - dir; \
|
||||||
|
__type *q = p->__head.p[opp], *r = q->__head.p[dir]; \
|
||||||
|
p->__head.p[opp] = r->__head.p[dir]; \
|
||||||
|
r->__head.p[dir] = p; \
|
||||||
|
q->__head.p[dir] = r->__head.p[opp]; \
|
||||||
|
r->__head.p[opp] = q; \
|
||||||
|
b1 = dir == 0? +1 : -1; \
|
||||||
|
if (r->__head.balance == b1) q->__head.balance = 0, p->__head.balance = -b1; \
|
||||||
|
else if (r->__head.balance == 0) q->__head.balance = p->__head.balance = 0; \
|
||||||
|
else q->__head.balance = b1, p->__head.balance = 0; \
|
||||||
|
r->__head.balance = 0; \
|
||||||
|
return r; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define __KAVLL_INSERT(pre, __scope, __type, __head, __cmp) \
|
||||||
|
__scope __type *pre##_insert(__type **root_, __type *x) { \
|
||||||
|
unsigned char stack[KAVLL_MAX_DEPTH]; \
|
||||||
|
__type *path[KAVLL_MAX_DEPTH]; \
|
||||||
|
__type *bp, *bq; \
|
||||||
|
__type *p, *q, *r = 0; /* _r_ is potentially the new root */ \
|
||||||
|
int which = 0, top, b1, path_len; \
|
||||||
|
bp = *root_, bq = 0; \
|
||||||
|
/* find the insertion location */ \
|
||||||
|
for (p = bp, q = bq, top = path_len = 0; p; q = p, p = p->__head.p[which]) { \
|
||||||
|
int cmp; \
|
||||||
|
cmp = __cmp(x, p); \
|
||||||
|
if (cmp == 0) return p; \
|
||||||
|
if (p->__head.balance != 0) \
|
||||||
|
bq = q, bp = p, top = 0; \
|
||||||
|
stack[top++] = which = (cmp > 0); \
|
||||||
|
path[path_len++] = p; \
|
||||||
|
} \
|
||||||
|
x->__head.balance = 0, x->__head.p[0] = x->__head.p[1] = 0; \
|
||||||
|
if (q == 0) *root_ = x; \
|
||||||
|
else q->__head.p[which] = x; \
|
||||||
|
if (bp == 0) return x; \
|
||||||
|
for (p = bp, top = 0; p != x; p = p->__head.p[stack[top]], ++top) /* update balance factors */ \
|
||||||
|
if (stack[top] == 0) --p->__head.balance; \
|
||||||
|
else ++p->__head.balance; \
|
||||||
|
if (bp->__head.balance > -2 && bp->__head.balance < 2) return x; /* no re-balance needed */ \
|
||||||
|
/* re-balance */ \
|
||||||
|
which = (bp->__head.balance < 0); \
|
||||||
|
b1 = which == 0? +1 : -1; \
|
||||||
|
q = bp->__head.p[1 - which]; \
|
||||||
|
if (q->__head.balance == b1) { \
|
||||||
|
r = pre##_rotate1(bp, which); \
|
||||||
|
q->__head.balance = bp->__head.balance = 0; \
|
||||||
|
} else r = pre##_rotate2(bp, which); \
|
||||||
|
if (bq == 0) *root_ = r; \
|
||||||
|
else bq->__head.p[bp != bq->__head.p[0]] = r; \
|
||||||
|
return x; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define __KAVLL_ERASE(pre, __scope, __type, __head, __cmp) \
|
||||||
|
__scope __type *pre##_erase(__type **root_, const __type *x) { \
|
||||||
|
__type *p, *path[KAVLL_MAX_DEPTH], fake; \
|
||||||
|
unsigned char dir[KAVLL_MAX_DEPTH]; \
|
||||||
|
int d = 0, cmp; \
|
||||||
|
fake.__head.p[0] = *root_, fake.__head.p[1] = 0; \
|
||||||
|
if (x) { \
|
||||||
|
for (cmp = -1, p = &fake; cmp; cmp = __cmp(x, p)) { \
|
||||||
|
int which = (cmp > 0); \
|
||||||
|
dir[d] = which; \
|
||||||
|
path[d++] = p; \
|
||||||
|
p = p->__head.p[which]; \
|
||||||
|
if (p == 0) return 0; \
|
||||||
|
} \
|
||||||
|
} else { \
|
||||||
|
for (p = &fake; p; p = p->__head.p[0]) \
|
||||||
|
dir[d] = 0, path[d++] = p; \
|
||||||
|
p = path[--d]; \
|
||||||
|
} \
|
||||||
|
if (p->__head.p[1] == 0) { /* ((1,.)2,3)4 => (1,3)4; p=2 */ \
|
||||||
|
path[d-1]->__head.p[dir[d-1]] = p->__head.p[0]; \
|
||||||
|
} else { \
|
||||||
|
__type *q = p->__head.p[1]; \
|
||||||
|
if (q->__head.p[0] == 0) { /* ((1,2)3,4)5 => ((1)2,4)5; p=3 */ \
|
||||||
|
q->__head.p[0] = p->__head.p[0]; \
|
||||||
|
q->__head.balance = p->__head.balance; \
|
||||||
|
path[d-1]->__head.p[dir[d-1]] = q; \
|
||||||
|
path[d] = q, dir[d++] = 1; \
|
||||||
|
} else { /* ((1,((.,2)3,4)5)6,7)8 => ((1,(2,4)5)3,7)8; p=6 */ \
|
||||||
|
__type *r; \
|
||||||
|
int e = d++; /* backup _d_ */\
|
||||||
|
for (;;) { \
|
||||||
|
dir[d] = 0; \
|
||||||
|
path[d++] = q; \
|
||||||
|
r = q->__head.p[0]; \
|
||||||
|
if (r->__head.p[0] == 0) break; \
|
||||||
|
q = r; \
|
||||||
|
} \
|
||||||
|
r->__head.p[0] = p->__head.p[0]; \
|
||||||
|
q->__head.p[0] = r->__head.p[1]; \
|
||||||
|
r->__head.p[1] = p->__head.p[1]; \
|
||||||
|
r->__head.balance = p->__head.balance; \
|
||||||
|
path[e-1]->__head.p[dir[e-1]] = r; \
|
||||||
|
path[e] = r, dir[e] = 1; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
while (--d > 0) { \
|
||||||
|
__type *q = path[d]; \
|
||||||
|
int which, other, b1 = 1, b2 = 2; \
|
||||||
|
which = dir[d], other = 1 - which; \
|
||||||
|
if (which) b1 = -b1, b2 = -b2; \
|
||||||
|
q->__head.balance += b1; \
|
||||||
|
if (q->__head.balance == b1) break; \
|
||||||
|
else if (q->__head.balance == b2) { \
|
||||||
|
__type *r = q->__head.p[other]; \
|
||||||
|
if (r->__head.balance == -b1) { \
|
||||||
|
path[d-1]->__head.p[dir[d-1]] = pre##_rotate2(q, which); \
|
||||||
|
} else { \
|
||||||
|
path[d-1]->__head.p[dir[d-1]] = pre##_rotate1(q, which); \
|
||||||
|
if (r->__head.balance == 0) { \
|
||||||
|
r->__head.balance = -b1; \
|
||||||
|
q->__head.balance = b1; \
|
||||||
|
break; \
|
||||||
|
} else r->__head.balance = q->__head.balance = 0; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
*root_ = fake.__head.p[0]; \
|
||||||
|
return p; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define kavll_free(__type, __head, __root, __free) do { \
|
||||||
|
__type *_p, *_q; \
|
||||||
|
for (_p = __root; _p; _p = _q) { \
|
||||||
|
if (_p->__head.p[0] == 0) { \
|
||||||
|
_q = _p->__head.p[1]; \
|
||||||
|
__free(_p); \
|
||||||
|
} else { \
|
||||||
|
_q = _p->__head.p[0]; \
|
||||||
|
_p->__head.p[0] = _q->__head.p[1]; \
|
||||||
|
_q->__head.p[1] = _p; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define kavll_size(__type, __head, __root, __cnt) do { \
|
||||||
|
__type *_p, *_q; \
|
||||||
|
*(__cnt) = 0; \
|
||||||
|
for (_p = __root; _p; _p = _q) { \
|
||||||
|
if (_p->__head.p[0] == 0) { \
|
||||||
|
_q = _p->__head.p[1]; \
|
||||||
|
++*(__cnt); \
|
||||||
|
} else { \
|
||||||
|
_q = _p->__head.p[0]; \
|
||||||
|
_p->__head.p[0] = _q->__head.p[1]; \
|
||||||
|
_q->__head.p[1] = _p; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define __KAVLL_ITR(pre, __scope, __type, __head, __cmp) \
|
||||||
|
typedef struct pre##_itr_t { \
|
||||||
|
const __type *stack[KAVLL_MAX_DEPTH], **top, *right; /* _right_ points to the right child of *top */ \
|
||||||
|
} pre##_itr_t; \
|
||||||
|
__scope void pre##_itr_first(const __type *root, struct pre##_itr_t *itr) { \
|
||||||
|
const __type *p; \
|
||||||
|
for (itr->top = itr->stack - 1, p = root; p; p = p->__head.p[0]) \
|
||||||
|
*++itr->top = p; \
|
||||||
|
itr->right = (*itr->top)->__head.p[1]; \
|
||||||
|
} \
|
||||||
|
__scope int pre##_itr_find(const __type *root, const __type *x, struct pre##_itr_t *itr) { \
|
||||||
|
const __type *p = root; \
|
||||||
|
itr->top = itr->stack - 1; \
|
||||||
|
while (p != 0) { \
|
||||||
|
int cmp; \
|
||||||
|
cmp = __cmp(x, p); \
|
||||||
|
if (cmp < 0) *++itr->top = p, p = p->__head.p[0]; \
|
||||||
|
else if (cmp > 0) p = p->__head.p[1]; \
|
||||||
|
else break; \
|
||||||
|
} \
|
||||||
|
if (p) { \
|
||||||
|
*++itr->top = p; \
|
||||||
|
itr->right = p->__head.p[1]; \
|
||||||
|
return 1; \
|
||||||
|
} else if (itr->top >= itr->stack) { \
|
||||||
|
itr->right = (*itr->top)->__head.p[1]; \
|
||||||
|
return 0; \
|
||||||
|
} else return 0; \
|
||||||
|
} \
|
||||||
|
__scope int pre##_itr_next(struct pre##_itr_t *itr) { \
|
||||||
|
for (;;) { \
|
||||||
|
const __type *p; \
|
||||||
|
for (p = itr->right, --itr->top; p; p = p->__head.p[0]) \
|
||||||
|
*++itr->top = p; \
|
||||||
|
if (itr->top < itr->stack) return 0; \
|
||||||
|
itr->right = (*itr->top)->__head.p[1]; \
|
||||||
|
return 1; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define kavll_at(itr) ((itr)->top < (itr)->stack? 0 : *(itr)->top)
|
||||||
|
|
||||||
|
#define KAVLL_INIT2(pre, __scope, __type, __head, __cmp) \
|
||||||
|
__KAVLL_FIND(pre, __scope, __type, __head, __cmp) \
|
||||||
|
__KAVLL_ROTATE(pre, __type, __head) \
|
||||||
|
__KAVLL_INSERT(pre, __scope, __type, __head, __cmp) \
|
||||||
|
__KAVLL_ERASE(pre, __scope, __type, __head, __cmp) \
|
||||||
|
__KAVLL_ITR(pre, __scope, __type, __head, __cmp)
|
||||||
|
|
||||||
|
#define KAVLL_INIT(pre, __type, __head, __cmp) \
|
||||||
|
KAVLL_INIT2(pre,, __type, __head, __cmp)
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,400 @@
|
||||||
|
/* The MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2018 by Attractive Chaos <attractor@live.co.uk>
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of this software and associated documentation files (the
|
||||||
|
"Software"), to deal in the Software without restriction, including
|
||||||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be
|
||||||
|
included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||||
|
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||||
|
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* An example:
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "kavl.h"
|
||||||
|
|
||||||
|
struct my_node {
|
||||||
|
char key;
|
||||||
|
KAVL_HEAD(struct my_node) head;
|
||||||
|
};
|
||||||
|
#define my_cmp(p, q) (((q)->key < (p)->key) - ((p)->key < (q)->key))
|
||||||
|
KAVL_INIT(my, struct my_node, head, my_cmp)
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
const char *str = "MNOLKQOPHIA"; // from wiki, except a duplicate
|
||||||
|
struct my_node *root = 0;
|
||||||
|
int i, l = strlen(str);
|
||||||
|
for (i = 0; i < l; ++i) { // insert in the input order
|
||||||
|
struct my_node *q, *p = malloc(sizeof(*p));
|
||||||
|
p->key = str[i];
|
||||||
|
q = kavl_insert(my, &root, p, 0);
|
||||||
|
if (p != q) free(p); // if already present, free
|
||||||
|
}
|
||||||
|
kavl_itr_t(my) itr;
|
||||||
|
kavl_itr_first(my, root, &itr); // place at first
|
||||||
|
do { // traverse
|
||||||
|
const struct my_node *p = kavl_at(&itr);
|
||||||
|
putchar(p->key);
|
||||||
|
free((void*)p); // free node
|
||||||
|
} while (kavl_itr_next(my, &itr));
|
||||||
|
putchar('\n');
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KAVL_H
|
||||||
|
#define KAVL_H
|
||||||
|
|
||||||
|
#ifdef __STRICT_ANSI__
|
||||||
|
#define inline __inline__
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define KAVL_MAX_DEPTH 64
|
||||||
|
|
||||||
|
#define kavl_size(head, p) ((p)? (p)->head.size : 0)
|
||||||
|
#define kavl_size_child(head, q, i) ((q)->head.p[(i)]? (q)->head.p[(i)]->head.size : 0)
|
||||||
|
|
||||||
|
#define KAVL_HEAD(__type) \
|
||||||
|
struct { \
|
||||||
|
__type *p[2]; \
|
||||||
|
signed char balance; /* balance factor */ \
|
||||||
|
unsigned size; /* #elements in subtree */ \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define __KAVL_FIND(suf, __scope, __type, __head, __cmp) \
|
||||||
|
__scope __type *kavl_find_##suf(const __type *root, const __type *x, unsigned *cnt_) { \
|
||||||
|
const __type *p = root; \
|
||||||
|
unsigned cnt = 0; \
|
||||||
|
while (p != 0) { \
|
||||||
|
int cmp; \
|
||||||
|
cmp = __cmp(x, p); \
|
||||||
|
if (cmp >= 0) cnt += kavl_size_child(__head, p, 0) + 1; \
|
||||||
|
if (cmp < 0) p = p->__head.p[0]; \
|
||||||
|
else if (cmp > 0) p = p->__head.p[1]; \
|
||||||
|
else break; \
|
||||||
|
} \
|
||||||
|
if (cnt_) *cnt_ = cnt; \
|
||||||
|
return (__type*)p; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define __KAVL_ROTATE(suf, __type, __head) \
|
||||||
|
/* one rotation: (a,(b,c)q)p => ((a,b)p,c)q */ \
|
||||||
|
static inline __type *kavl_rotate1_##suf(__type *p, int dir) { /* dir=0 to left; dir=1 to right */ \
|
||||||
|
int opp = 1 - dir; /* opposite direction */ \
|
||||||
|
__type *q = p->__head.p[opp]; \
|
||||||
|
unsigned size_p = p->__head.size; \
|
||||||
|
p->__head.size -= q->__head.size - kavl_size_child(__head, q, dir); \
|
||||||
|
q->__head.size = size_p; \
|
||||||
|
p->__head.p[opp] = q->__head.p[dir]; \
|
||||||
|
q->__head.p[dir] = p; \
|
||||||
|
return q; \
|
||||||
|
} \
|
||||||
|
/* two consecutive rotations: (a,((b,c)r,d)q)p => ((a,b)p,(c,d)q)r */ \
|
||||||
|
static inline __type *kavl_rotate2_##suf(__type *p, int dir) { \
|
||||||
|
int b1, opp = 1 - dir; \
|
||||||
|
__type *q = p->__head.p[opp], *r = q->__head.p[dir]; \
|
||||||
|
unsigned size_x_dir = kavl_size_child(__head, r, dir); \
|
||||||
|
r->__head.size = p->__head.size; \
|
||||||
|
p->__head.size -= q->__head.size - size_x_dir; \
|
||||||
|
q->__head.size -= size_x_dir + 1; \
|
||||||
|
p->__head.p[opp] = r->__head.p[dir]; \
|
||||||
|
r->__head.p[dir] = p; \
|
||||||
|
q->__head.p[dir] = r->__head.p[opp]; \
|
||||||
|
r->__head.p[opp] = q; \
|
||||||
|
b1 = dir == 0? +1 : -1; \
|
||||||
|
if (r->__head.balance == b1) q->__head.balance = 0, p->__head.balance = -b1; \
|
||||||
|
else if (r->__head.balance == 0) q->__head.balance = p->__head.balance = 0; \
|
||||||
|
else q->__head.balance = b1, p->__head.balance = 0; \
|
||||||
|
r->__head.balance = 0; \
|
||||||
|
return r; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define __KAVL_INSERT(suf, __scope, __type, __head, __cmp) \
|
||||||
|
__scope __type *kavl_insert_##suf(__type **root_, __type *x, unsigned *cnt_) { \
|
||||||
|
unsigned char stack[KAVL_MAX_DEPTH]; \
|
||||||
|
__type *path[KAVL_MAX_DEPTH]; \
|
||||||
|
__type *bp, *bq; \
|
||||||
|
__type *p, *q, *r = 0; /* _r_ is potentially the new root */ \
|
||||||
|
int i, which = 0, top, b1, path_len; \
|
||||||
|
unsigned cnt = 0; \
|
||||||
|
bp = *root_, bq = 0; \
|
||||||
|
/* find the insertion location */ \
|
||||||
|
for (p = bp, q = bq, top = path_len = 0; p; q = p, p = p->__head.p[which]) { \
|
||||||
|
int cmp; \
|
||||||
|
cmp = __cmp(x, p); \
|
||||||
|
if (cmp >= 0) cnt += kavl_size_child(__head, p, 0) + 1; \
|
||||||
|
if (cmp == 0) { \
|
||||||
|
if (cnt_) *cnt_ = cnt; \
|
||||||
|
return p; \
|
||||||
|
} \
|
||||||
|
if (p->__head.balance != 0) \
|
||||||
|
bq = q, bp = p, top = 0; \
|
||||||
|
stack[top++] = which = (cmp > 0); \
|
||||||
|
path[path_len++] = p; \
|
||||||
|
} \
|
||||||
|
if (cnt_) *cnt_ = cnt; \
|
||||||
|
x->__head.balance = 0, x->__head.size = 1, x->__head.p[0] = x->__head.p[1] = 0; \
|
||||||
|
if (q == 0) *root_ = x; \
|
||||||
|
else q->__head.p[which] = x; \
|
||||||
|
if (bp == 0) return x; \
|
||||||
|
for (i = 0; i < path_len; ++i) ++path[i]->__head.size; \
|
||||||
|
for (p = bp, top = 0; p != x; p = p->__head.p[stack[top]], ++top) /* update balance factors */ \
|
||||||
|
if (stack[top] == 0) --p->__head.balance; \
|
||||||
|
else ++p->__head.balance; \
|
||||||
|
if (bp->__head.balance > -2 && bp->__head.balance < 2) return x; /* no re-balance needed */ \
|
||||||
|
/* re-balance */ \
|
||||||
|
which = (bp->__head.balance < 0); \
|
||||||
|
b1 = which == 0? +1 : -1; \
|
||||||
|
q = bp->__head.p[1 - which]; \
|
||||||
|
if (q->__head.balance == b1) { \
|
||||||
|
r = kavl_rotate1_##suf(bp, which); \
|
||||||
|
q->__head.balance = bp->__head.balance = 0; \
|
||||||
|
} else r = kavl_rotate2_##suf(bp, which); \
|
||||||
|
if (bq == 0) *root_ = r; \
|
||||||
|
else bq->__head.p[bp != bq->__head.p[0]] = r; \
|
||||||
|
return x; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define __KAVL_ERASE(suf, __scope, __type, __head, __cmp) \
|
||||||
|
__scope __type *kavl_erase_##suf(__type **root_, const __type *x, unsigned *cnt_) { \
|
||||||
|
__type *p, *path[KAVL_MAX_DEPTH], fake; \
|
||||||
|
unsigned char dir[KAVL_MAX_DEPTH]; \
|
||||||
|
int i, d = 0, cmp; \
|
||||||
|
unsigned cnt = 0; \
|
||||||
|
fake.__head.p[0] = *root_, fake.__head.p[1] = 0; \
|
||||||
|
if (cnt_) *cnt_ = 0; \
|
||||||
|
if (x) { \
|
||||||
|
for (cmp = -1, p = &fake; cmp; cmp = __cmp(x, p)) { \
|
||||||
|
int which = (cmp > 0); \
|
||||||
|
if (cmp > 0) cnt += kavl_size_child(__head, p, 0) + 1; \
|
||||||
|
dir[d] = which; \
|
||||||
|
path[d++] = p; \
|
||||||
|
p = p->__head.p[which]; \
|
||||||
|
if (p == 0) { \
|
||||||
|
if (cnt_) *cnt_ = 0; \
|
||||||
|
return 0; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
cnt += kavl_size_child(__head, p, 0) + 1; /* because p==x is not counted */ \
|
||||||
|
} else { \
|
||||||
|
for (p = &fake, cnt = 1; p; p = p->__head.p[0]) \
|
||||||
|
dir[d] = 0, path[d++] = p; \
|
||||||
|
p = path[--d]; \
|
||||||
|
} \
|
||||||
|
if (cnt_) *cnt_ = cnt; \
|
||||||
|
for (i = 1; i < d; ++i) --path[i]->__head.size; \
|
||||||
|
if (p->__head.p[1] == 0) { /* ((1,.)2,3)4 => (1,3)4; p=2 */ \
|
||||||
|
path[d-1]->__head.p[dir[d-1]] = p->__head.p[0]; \
|
||||||
|
} else { \
|
||||||
|
__type *q = p->__head.p[1]; \
|
||||||
|
if (q->__head.p[0] == 0) { /* ((1,2)3,4)5 => ((1)2,4)5; p=3 */ \
|
||||||
|
q->__head.p[0] = p->__head.p[0]; \
|
||||||
|
q->__head.balance = p->__head.balance; \
|
||||||
|
path[d-1]->__head.p[dir[d-1]] = q; \
|
||||||
|
path[d] = q, dir[d++] = 1; \
|
||||||
|
q->__head.size = p->__head.size - 1; \
|
||||||
|
} else { /* ((1,((.,2)3,4)5)6,7)8 => ((1,(2,4)5)3,7)8; p=6 */ \
|
||||||
|
__type *r; \
|
||||||
|
int e = d++; /* backup _d_ */\
|
||||||
|
for (;;) { \
|
||||||
|
dir[d] = 0; \
|
||||||
|
path[d++] = q; \
|
||||||
|
r = q->__head.p[0]; \
|
||||||
|
if (r->__head.p[0] == 0) break; \
|
||||||
|
q = r; \
|
||||||
|
} \
|
||||||
|
r->__head.p[0] = p->__head.p[0]; \
|
||||||
|
q->__head.p[0] = r->__head.p[1]; \
|
||||||
|
r->__head.p[1] = p->__head.p[1]; \
|
||||||
|
r->__head.balance = p->__head.balance; \
|
||||||
|
path[e-1]->__head.p[dir[e-1]] = r; \
|
||||||
|
path[e] = r, dir[e] = 1; \
|
||||||
|
for (i = e + 1; i < d; ++i) --path[i]->__head.size; \
|
||||||
|
r->__head.size = p->__head.size - 1; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
while (--d > 0) { \
|
||||||
|
__type *q = path[d]; \
|
||||||
|
int which, other, b1 = 1, b2 = 2; \
|
||||||
|
which = dir[d], other = 1 - which; \
|
||||||
|
if (which) b1 = -b1, b2 = -b2; \
|
||||||
|
q->__head.balance += b1; \
|
||||||
|
if (q->__head.balance == b1) break; \
|
||||||
|
else if (q->__head.balance == b2) { \
|
||||||
|
__type *r = q->__head.p[other]; \
|
||||||
|
if (r->__head.balance == -b1) { \
|
||||||
|
path[d-1]->__head.p[dir[d-1]] = kavl_rotate2_##suf(q, which); \
|
||||||
|
} else { \
|
||||||
|
path[d-1]->__head.p[dir[d-1]] = kavl_rotate1_##suf(q, which); \
|
||||||
|
if (r->__head.balance == 0) { \
|
||||||
|
r->__head.balance = -b1; \
|
||||||
|
q->__head.balance = b1; \
|
||||||
|
break; \
|
||||||
|
} else r->__head.balance = q->__head.balance = 0; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
*root_ = fake.__head.p[0]; \
|
||||||
|
return p; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define kavl_free(__type, __head, __root, __free) do { \
|
||||||
|
__type *_p, *_q; \
|
||||||
|
for (_p = __root; _p; _p = _q) { \
|
||||||
|
if (_p->__head.p[0] == 0) { \
|
||||||
|
_q = _p->__head.p[1]; \
|
||||||
|
__free(_p); \
|
||||||
|
} else { \
|
||||||
|
_q = _p->__head.p[0]; \
|
||||||
|
_p->__head.p[0] = _q->__head.p[1]; \
|
||||||
|
_q->__head.p[1] = _p; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define __KAVL_ITR(suf, __scope, __type, __head, __cmp) \
|
||||||
|
struct kavl_itr_##suf { \
|
||||||
|
const __type *stack[KAVL_MAX_DEPTH], **top, *right; /* _right_ points to the right child of *top */ \
|
||||||
|
}; \
|
||||||
|
__scope void kavl_itr_first_##suf(const __type *root, struct kavl_itr_##suf *itr) { \
|
||||||
|
const __type *p; \
|
||||||
|
for (itr->top = itr->stack - 1, p = root; p; p = p->__head.p[0]) \
|
||||||
|
*++itr->top = p; \
|
||||||
|
itr->right = (*itr->top)->__head.p[1]; \
|
||||||
|
} \
|
||||||
|
__scope int kavl_itr_find_##suf(const __type *root, const __type *x, struct kavl_itr_##suf *itr) { \
|
||||||
|
const __type *p = root; \
|
||||||
|
itr->top = itr->stack - 1; \
|
||||||
|
while (p != 0) { \
|
||||||
|
int cmp; \
|
||||||
|
cmp = __cmp(x, p); \
|
||||||
|
if (cmp < 0) *++itr->top = p, p = p->__head.p[0]; \
|
||||||
|
else if (cmp > 0) p = p->__head.p[1]; \
|
||||||
|
else break; \
|
||||||
|
} \
|
||||||
|
if (p) { \
|
||||||
|
*++itr->top = p; \
|
||||||
|
itr->right = p->__head.p[1]; \
|
||||||
|
return 1; \
|
||||||
|
} else if (itr->top >= itr->stack) { \
|
||||||
|
itr->right = (*itr->top)->__head.p[1]; \
|
||||||
|
return 0; \
|
||||||
|
} else return 0; \
|
||||||
|
} \
|
||||||
|
__scope int kavl_itr_next_##suf(struct kavl_itr_##suf *itr) { \
|
||||||
|
for (;;) { \
|
||||||
|
const __type *p; \
|
||||||
|
for (p = itr->right, --itr->top; p; p = p->__head.p[0]) \
|
||||||
|
*++itr->top = p; \
|
||||||
|
if (itr->top < itr->stack) return 0; \
|
||||||
|
itr->right = (*itr->top)->__head.p[1]; \
|
||||||
|
return 1; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Insert a node to the tree
|
||||||
|
*
|
||||||
|
* @param suf name suffix used in KAVL_INIT()
|
||||||
|
* @param proot pointer to the root of the tree (in/out: root may change)
|
||||||
|
* @param x node to insert (in)
|
||||||
|
* @param cnt number of nodes smaller than or equal to _x_; can be NULL (out)
|
||||||
|
*
|
||||||
|
* @return _x_ if not present in the tree, or the node equal to x.
|
||||||
|
*/
|
||||||
|
#define kavl_insert(suf, proot, x, cnt) kavl_insert_##suf(proot, x, cnt)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find a node in the tree
|
||||||
|
*
|
||||||
|
* @param suf name suffix used in KAVL_INIT()
|
||||||
|
* @param root root of the tree
|
||||||
|
* @param x node value to find (in)
|
||||||
|
* @param cnt number of nodes smaller than or equal to _x_; can be NULL (out)
|
||||||
|
*
|
||||||
|
* @return node equal to _x_ if present, or NULL if absent
|
||||||
|
*/
|
||||||
|
#define kavl_find(suf, root, x, cnt) kavl_find_##suf(root, x, cnt)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete a node from the tree
|
||||||
|
*
|
||||||
|
* @param suf name suffix used in KAVL_INIT()
|
||||||
|
* @param proot pointer to the root of the tree (in/out: root may change)
|
||||||
|
* @param x node value to delete; if NULL, delete the first node (in)
|
||||||
|
*
|
||||||
|
* @return node removed from the tree if present, or NULL if absent
|
||||||
|
*/
|
||||||
|
#define kavl_erase(suf, proot, x, cnt) kavl_erase_##suf(proot, x, cnt)
|
||||||
|
#define kavl_erase_first(suf, proot) kavl_erase_##suf(proot, 0, 0)
|
||||||
|
|
||||||
|
#define kavl_itr_t(suf) struct kavl_itr_##suf
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Place the iterator at the smallest object
|
||||||
|
*
|
||||||
|
* @param suf name suffix used in KAVL_INIT()
|
||||||
|
* @param root root of the tree
|
||||||
|
* @param itr iterator
|
||||||
|
*/
|
||||||
|
#define kavl_itr_first(suf, root, itr) kavl_itr_first_##suf(root, itr)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Place the iterator at the object equal to or greater than the query
|
||||||
|
*
|
||||||
|
* @param suf name suffix used in KAVL_INIT()
|
||||||
|
* @param root root of the tree
|
||||||
|
* @param x query (in)
|
||||||
|
* @param itr iterator (out)
|
||||||
|
*
|
||||||
|
* @return 1 if find; 0 otherwise. kavl_at(itr) is NULL if and only if query is
|
||||||
|
* larger than all objects in the tree
|
||||||
|
*/
|
||||||
|
#define kavl_itr_find(suf, root, x, itr) kavl_itr_find_##suf(root, x, itr)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Move to the next object in order
|
||||||
|
*
|
||||||
|
* @param itr iterator (modified)
|
||||||
|
*
|
||||||
|
* @return 1 if there is a next object; 0 otherwise
|
||||||
|
*/
|
||||||
|
#define kavl_itr_next(suf, itr) kavl_itr_next_##suf(itr)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the pointer at the iterator
|
||||||
|
*
|
||||||
|
* @param itr iterator
|
||||||
|
*
|
||||||
|
* @return pointer if present; NULL otherwise
|
||||||
|
*/
|
||||||
|
#define kavl_at(itr) ((itr)->top < (itr)->stack? 0 : *(itr)->top)
|
||||||
|
|
||||||
|
#define KAVL_INIT2(suf, __scope, __type, __head, __cmp) \
|
||||||
|
__KAVL_FIND(suf, __scope, __type, __head, __cmp) \
|
||||||
|
__KAVL_ROTATE(suf, __type, __head) \
|
||||||
|
__KAVL_INSERT(suf, __scope, __type, __head, __cmp) \
|
||||||
|
__KAVL_ERASE(suf, __scope, __type, __head, __cmp) \
|
||||||
|
__KAVL_ITR(suf, __scope, __type, __head, __cmp)
|
||||||
|
|
||||||
|
#define KAVL_INIT(suf, __type, __head, __cmp) \
|
||||||
|
KAVL_INIT2(suf,, __type, __head, __cmp)
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,30 @@
|
||||||
|
#ifndef KBIT_H
|
||||||
|
#define KBIT_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
static inline uint64_t kbi_popcount64(uint64_t y) // standard popcount; from wikipedia
|
||||||
|
{
|
||||||
|
y -= ((y >> 1) & 0x5555555555555555ull);
|
||||||
|
y = (y & 0x3333333333333333ull) + (y >> 2 & 0x3333333333333333ull);
|
||||||
|
return ((y + (y >> 4)) & 0xf0f0f0f0f0f0f0full) * 0x101010101010101ull >> 56;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint64_t kbi_DNAcount64(uint64_t y, int c) // count #A/C/G/T from a 2-bit encoded integer; from BWA
|
||||||
|
{
|
||||||
|
// reduce nucleotide counting to bits counting
|
||||||
|
y = ((c&2)? y : ~y) >> 1 & ((c&1)? y : ~y) & 0x5555555555555555ull;
|
||||||
|
// count the number of 1s in y
|
||||||
|
y = (y & 0x3333333333333333ull) + (y >> 2 & 0x3333333333333333ull);
|
||||||
|
return ((y + (y >> 4)) & 0xf0f0f0f0f0f0f0full) * 0x101010101010101ull >> 56;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef kroundup32 // round a 32-bit integer to the next closet integer; from "bit twiddling hacks"
|
||||||
|
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef kbi_swap
|
||||||
|
#define kbi_swap(a, b) (((a) ^= (b)), ((b) ^= (a)), ((a) ^= (b))) // from "bit twiddling hacks"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,437 @@
|
||||||
|
/*-
|
||||||
|
* Copyright 1997-1999, 2001, John-Mark Gurney.
|
||||||
|
* 2008-2009, Attractive Chaos <attractor@live.co.uk>
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __AC_KBTREE_H
|
||||||
|
#define __AC_KBTREE_H
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#define KB_MAX_DEPTH 64
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int32_t is_internal:1, n:31;
|
||||||
|
} kbnode_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
kbnode_t *x;
|
||||||
|
int i;
|
||||||
|
} kbpos_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
kbpos_t stack[KB_MAX_DEPTH], *p;
|
||||||
|
} kbitr_t;
|
||||||
|
|
||||||
|
#define __KB_KEY(type, x) ((type*)((char*)x + 4))
|
||||||
|
#define __KB_PTR(btr, x) ((kbnode_t**)((char*)x + btr->off_ptr))
|
||||||
|
|
||||||
|
#define __KB_TREE_T(name) \
|
||||||
|
typedef struct { \
|
||||||
|
kbnode_t *root; \
|
||||||
|
int off_key, off_ptr, ilen, elen; \
|
||||||
|
int n, t; \
|
||||||
|
int n_keys, n_nodes; \
|
||||||
|
} kbtree_##name##_t;
|
||||||
|
|
||||||
|
#define __KB_INIT(name, key_t) \
|
||||||
|
kbtree_##name##_t *kb_init_##name(int size) \
|
||||||
|
{ \
|
||||||
|
kbtree_##name##_t *b; \
|
||||||
|
b = (kbtree_##name##_t*)calloc(1, sizeof(kbtree_##name##_t)); \
|
||||||
|
b->t = ((size - 4 - sizeof(void*)) / (sizeof(void*) + sizeof(key_t)) + 1) >> 1; \
|
||||||
|
if (b->t < 2) { \
|
||||||
|
free(b); return 0; \
|
||||||
|
} \
|
||||||
|
b->n = 2 * b->t - 1; \
|
||||||
|
b->off_ptr = 4 + b->n * sizeof(key_t); \
|
||||||
|
b->ilen = (4 + sizeof(void*) + b->n * (sizeof(void*) + sizeof(key_t)) + 3) >> 2 << 2; \
|
||||||
|
b->elen = (b->off_ptr + 3) >> 2 << 2; \
|
||||||
|
b->root = (kbnode_t*)calloc(1, b->ilen); \
|
||||||
|
++b->n_nodes; \
|
||||||
|
return b; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define __kb_destroy(b) do { \
|
||||||
|
int i, max = 8; \
|
||||||
|
kbnode_t *x, **top, **stack = 0; \
|
||||||
|
if (b) { \
|
||||||
|
top = stack = (kbnode_t**)calloc(max, sizeof(kbnode_t*)); \
|
||||||
|
*top++ = (b)->root; \
|
||||||
|
while (top != stack) { \
|
||||||
|
x = *--top; \
|
||||||
|
if (x->is_internal == 0) { free(x); continue; } \
|
||||||
|
for (i = 0; i <= x->n; ++i) \
|
||||||
|
if (__KB_PTR(b, x)[i]) { \
|
||||||
|
if (top - stack == max) { \
|
||||||
|
max <<= 1; \
|
||||||
|
stack = (kbnode_t**)realloc(stack, max * sizeof(kbnode_t*)); \
|
||||||
|
top = stack + (max>>1); \
|
||||||
|
} \
|
||||||
|
*top++ = __KB_PTR(b, x)[i]; \
|
||||||
|
} \
|
||||||
|
free(x); \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
free(b); free(stack); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define __KB_GET_AUX1(name, key_t, __cmp) \
|
||||||
|
static inline int __kb_getp_aux_##name(const kbnode_t * __restrict x, const key_t * __restrict k, int *r) \
|
||||||
|
{ \
|
||||||
|
int tr, *rr, begin = 0, end = x->n; \
|
||||||
|
if (x->n == 0) return -1; \
|
||||||
|
rr = r? r : &tr; \
|
||||||
|
while (begin < end) { \
|
||||||
|
int mid = (begin + end) >> 1; \
|
||||||
|
if (__cmp(__KB_KEY(key_t, x)[mid], *k) < 0) begin = mid + 1; \
|
||||||
|
else end = mid; \
|
||||||
|
} \
|
||||||
|
if (begin == x->n) { *rr = 1; return x->n - 1; } \
|
||||||
|
if ((*rr = __cmp(*k, __KB_KEY(key_t, x)[begin])) < 0) --begin; \
|
||||||
|
return begin; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define __KB_GET(name, key_t) \
|
||||||
|
static key_t *kb_getp_##name(kbtree_##name##_t *b, const key_t * __restrict k) \
|
||||||
|
{ \
|
||||||
|
int i, r = 0; \
|
||||||
|
kbnode_t *x = b->root; \
|
||||||
|
while (x) { \
|
||||||
|
i = __kb_getp_aux_##name(x, k, &r); \
|
||||||
|
if (i >= 0 && r == 0) return &__KB_KEY(key_t, x)[i]; \
|
||||||
|
if (x->is_internal == 0) return 0; \
|
||||||
|
x = __KB_PTR(b, x)[i + 1]; \
|
||||||
|
} \
|
||||||
|
return 0; \
|
||||||
|
} \
|
||||||
|
static inline key_t *kb_get_##name(kbtree_##name##_t *b, const key_t k) \
|
||||||
|
{ \
|
||||||
|
return kb_getp_##name(b, &k); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define __KB_INTERVAL(name, key_t) \
|
||||||
|
static void kb_intervalp_##name(kbtree_##name##_t *b, const key_t * __restrict k, key_t **lower, key_t **upper) \
|
||||||
|
{ \
|
||||||
|
int i, r = 0; \
|
||||||
|
kbnode_t *x = b->root; \
|
||||||
|
*lower = *upper = 0; \
|
||||||
|
while (x) { \
|
||||||
|
i = __kb_getp_aux_##name(x, k, &r); \
|
||||||
|
if (i >= 0 && r == 0) { \
|
||||||
|
*lower = *upper = &__KB_KEY(key_t, x)[i]; \
|
||||||
|
return; \
|
||||||
|
} \
|
||||||
|
if (i >= 0) *lower = &__KB_KEY(key_t, x)[i]; \
|
||||||
|
if (i < x->n - 1) *upper = &__KB_KEY(key_t, x)[i + 1]; \
|
||||||
|
if (x->is_internal == 0) return; \
|
||||||
|
x = __KB_PTR(b, x)[i + 1]; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
static inline void kb_interval_##name(kbtree_##name##_t *b, const key_t k, key_t **lower, key_t **upper) \
|
||||||
|
{ \
|
||||||
|
kb_intervalp_##name(b, &k, lower, upper); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define __KB_PUT(name, key_t, __cmp) \
|
||||||
|
/* x must be an internal node */ \
|
||||||
|
static void __kb_split_##name(kbtree_##name##_t *b, kbnode_t *x, int i, kbnode_t *y) \
|
||||||
|
{ \
|
||||||
|
kbnode_t *z; \
|
||||||
|
z = (kbnode_t*)calloc(1, y->is_internal? b->ilen : b->elen); \
|
||||||
|
++b->n_nodes; \
|
||||||
|
z->is_internal = y->is_internal; \
|
||||||
|
z->n = b->t - 1; \
|
||||||
|
memcpy(__KB_KEY(key_t, z), __KB_KEY(key_t, y) + b->t, sizeof(key_t) * (b->t - 1)); \
|
||||||
|
if (y->is_internal) memcpy(__KB_PTR(b, z), __KB_PTR(b, y) + b->t, sizeof(void*) * b->t); \
|
||||||
|
y->n = b->t - 1; \
|
||||||
|
memmove(__KB_PTR(b, x) + i + 2, __KB_PTR(b, x) + i + 1, sizeof(void*) * (x->n - i)); \
|
||||||
|
__KB_PTR(b, x)[i + 1] = z; \
|
||||||
|
memmove(__KB_KEY(key_t, x) + i + 1, __KB_KEY(key_t, x) + i, sizeof(key_t) * (x->n - i)); \
|
||||||
|
__KB_KEY(key_t, x)[i] = __KB_KEY(key_t, y)[b->t - 1]; \
|
||||||
|
++x->n; \
|
||||||
|
} \
|
||||||
|
static key_t *__kb_putp_aux_##name(kbtree_##name##_t *b, kbnode_t *x, const key_t * __restrict k) \
|
||||||
|
{ \
|
||||||
|
int i = x->n - 1; \
|
||||||
|
key_t *ret; \
|
||||||
|
if (x->is_internal == 0) { \
|
||||||
|
i = __kb_getp_aux_##name(x, k, 0); \
|
||||||
|
if (i != x->n - 1) \
|
||||||
|
memmove(__KB_KEY(key_t, x) + i + 2, __KB_KEY(key_t, x) + i + 1, (x->n - i - 1) * sizeof(key_t)); \
|
||||||
|
ret = &__KB_KEY(key_t, x)[i + 1]; \
|
||||||
|
*ret = *k; \
|
||||||
|
++x->n; \
|
||||||
|
} else { \
|
||||||
|
i = __kb_getp_aux_##name(x, k, 0) + 1; \
|
||||||
|
if (__KB_PTR(b, x)[i]->n == 2 * b->t - 1) { \
|
||||||
|
__kb_split_##name(b, x, i, __KB_PTR(b, x)[i]); \
|
||||||
|
if (__cmp(*k, __KB_KEY(key_t, x)[i]) > 0) ++i; \
|
||||||
|
} \
|
||||||
|
ret = __kb_putp_aux_##name(b, __KB_PTR(b, x)[i], k); \
|
||||||
|
} \
|
||||||
|
return ret; \
|
||||||
|
} \
|
||||||
|
static key_t *kb_putp_##name(kbtree_##name##_t *b, const key_t * __restrict k) \
|
||||||
|
{ \
|
||||||
|
kbnode_t *r, *s; \
|
||||||
|
++b->n_keys; \
|
||||||
|
r = b->root; \
|
||||||
|
if (r->n == 2 * b->t - 1) { \
|
||||||
|
++b->n_nodes; \
|
||||||
|
s = (kbnode_t*)calloc(1, b->ilen); \
|
||||||
|
b->root = s; s->is_internal = 1; s->n = 0; \
|
||||||
|
__KB_PTR(b, s)[0] = r; \
|
||||||
|
__kb_split_##name(b, s, 0, r); \
|
||||||
|
r = s; \
|
||||||
|
} \
|
||||||
|
return __kb_putp_aux_##name(b, r, k); \
|
||||||
|
} \
|
||||||
|
static inline void kb_put_##name(kbtree_##name##_t *b, const key_t k) \
|
||||||
|
{ \
|
||||||
|
kb_putp_##name(b, &k); \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#define __KB_DEL(name, key_t) \
|
||||||
|
static key_t __kb_delp_aux_##name(kbtree_##name##_t *b, kbnode_t *x, const key_t * __restrict k, int s) \
|
||||||
|
{ \
|
||||||
|
int yn, zn, i, r = 0; \
|
||||||
|
kbnode_t *xp, *y, *z; \
|
||||||
|
key_t kp; \
|
||||||
|
if (x == 0) return *k; \
|
||||||
|
if (s) { /* s can only be 0, 1 or 2 */ \
|
||||||
|
r = x->is_internal == 0? 0 : s == 1? 1 : -1; \
|
||||||
|
i = s == 1? x->n - 1 : -1; \
|
||||||
|
} else i = __kb_getp_aux_##name(x, k, &r); \
|
||||||
|
if (x->is_internal == 0) { \
|
||||||
|
if (s == 2) ++i; \
|
||||||
|
kp = __KB_KEY(key_t, x)[i]; \
|
||||||
|
memmove(__KB_KEY(key_t, x) + i, __KB_KEY(key_t, x) + i + 1, (x->n - i - 1) * sizeof(key_t)); \
|
||||||
|
--x->n; \
|
||||||
|
return kp; \
|
||||||
|
} \
|
||||||
|
if (r == 0) { \
|
||||||
|
if ((yn = __KB_PTR(b, x)[i]->n) >= b->t) { \
|
||||||
|
xp = __KB_PTR(b, x)[i]; \
|
||||||
|
kp = __KB_KEY(key_t, x)[i]; \
|
||||||
|
__KB_KEY(key_t, x)[i] = __kb_delp_aux_##name(b, xp, 0, 1); \
|
||||||
|
return kp; \
|
||||||
|
} else if ((zn = __KB_PTR(b, x)[i + 1]->n) >= b->t) { \
|
||||||
|
xp = __KB_PTR(b, x)[i + 1]; \
|
||||||
|
kp = __KB_KEY(key_t, x)[i]; \
|
||||||
|
__KB_KEY(key_t, x)[i] = __kb_delp_aux_##name(b, xp, 0, 2); \
|
||||||
|
return kp; \
|
||||||
|
} else if (yn == b->t - 1 && zn == b->t - 1) { \
|
||||||
|
y = __KB_PTR(b, x)[i]; z = __KB_PTR(b, x)[i + 1]; \
|
||||||
|
__KB_KEY(key_t, y)[y->n++] = *k; \
|
||||||
|
memmove(__KB_KEY(key_t, y) + y->n, __KB_KEY(key_t, z), z->n * sizeof(key_t)); \
|
||||||
|
if (y->is_internal) memmove(__KB_PTR(b, y) + y->n, __KB_PTR(b, z), (z->n + 1) * sizeof(void*)); \
|
||||||
|
y->n += z->n; \
|
||||||
|
memmove(__KB_KEY(key_t, x) + i, __KB_KEY(key_t, x) + i + 1, (x->n - i - 1) * sizeof(key_t)); \
|
||||||
|
memmove(__KB_PTR(b, x) + i + 1, __KB_PTR(b, x) + i + 2, (x->n - i - 1) * sizeof(void*)); \
|
||||||
|
--x->n; \
|
||||||
|
free(z); \
|
||||||
|
return __kb_delp_aux_##name(b, y, k, s); \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
++i; \
|
||||||
|
if ((xp = __KB_PTR(b, x)[i])->n == b->t - 1) { \
|
||||||
|
if (i > 0 && (y = __KB_PTR(b, x)[i - 1])->n >= b->t) { \
|
||||||
|
memmove(__KB_KEY(key_t, xp) + 1, __KB_KEY(key_t, xp), xp->n * sizeof(key_t)); \
|
||||||
|
if (xp->is_internal) memmove(__KB_PTR(b, xp) + 1, __KB_PTR(b, xp), (xp->n + 1) * sizeof(void*)); \
|
||||||
|
__KB_KEY(key_t, xp)[0] = __KB_KEY(key_t, x)[i - 1]; \
|
||||||
|
__KB_KEY(key_t, x)[i - 1] = __KB_KEY(key_t, y)[y->n - 1]; \
|
||||||
|
if (xp->is_internal) __KB_PTR(b, xp)[0] = __KB_PTR(b, y)[y->n]; \
|
||||||
|
--y->n; ++xp->n; \
|
||||||
|
} else if (i < x->n && (y = __KB_PTR(b, x)[i + 1])->n >= b->t) { \
|
||||||
|
__KB_KEY(key_t, xp)[xp->n++] = __KB_KEY(key_t, x)[i]; \
|
||||||
|
__KB_KEY(key_t, x)[i] = __KB_KEY(key_t, y)[0]; \
|
||||||
|
if (xp->is_internal) __KB_PTR(b, xp)[xp->n] = __KB_PTR(b, y)[0]; \
|
||||||
|
--y->n; \
|
||||||
|
memmove(__KB_KEY(key_t, y), __KB_KEY(key_t, y) + 1, y->n * sizeof(key_t)); \
|
||||||
|
if (y->is_internal) memmove(__KB_PTR(b, y), __KB_PTR(b, y) + 1, (y->n + 1) * sizeof(void*)); \
|
||||||
|
} else if (i > 0 && (y = __KB_PTR(b, x)[i - 1])->n == b->t - 1) { \
|
||||||
|
__KB_KEY(key_t, y)[y->n++] = __KB_KEY(key_t, x)[i - 1]; \
|
||||||
|
memmove(__KB_KEY(key_t, y) + y->n, __KB_KEY(key_t, xp), xp->n * sizeof(key_t)); \
|
||||||
|
if (y->is_internal) memmove(__KB_PTR(b, y) + y->n, __KB_PTR(b, xp), (xp->n + 1) * sizeof(void*)); \
|
||||||
|
y->n += xp->n; \
|
||||||
|
memmove(__KB_KEY(key_t, x) + i - 1, __KB_KEY(key_t, x) + i, (x->n - i) * sizeof(key_t)); \
|
||||||
|
memmove(__KB_PTR(b, x) + i, __KB_PTR(b, x) + i + 1, (x->n - i) * sizeof(void*)); \
|
||||||
|
--x->n; \
|
||||||
|
free(xp); \
|
||||||
|
xp = y; \
|
||||||
|
} else if (i < x->n && (y = __KB_PTR(b, x)[i + 1])->n == b->t - 1) { \
|
||||||
|
__KB_KEY(key_t, xp)[xp->n++] = __KB_KEY(key_t, x)[i]; \
|
||||||
|
memmove(__KB_KEY(key_t, xp) + xp->n, __KB_KEY(key_t, y), y->n * sizeof(key_t)); \
|
||||||
|
if (xp->is_internal) memmove(__KB_PTR(b, xp) + xp->n, __KB_PTR(b, y), (y->n + 1) * sizeof(void*)); \
|
||||||
|
xp->n += y->n; \
|
||||||
|
memmove(__KB_KEY(key_t, x) + i, __KB_KEY(key_t, x) + i + 1, (x->n - i - 1) * sizeof(key_t)); \
|
||||||
|
memmove(__KB_PTR(b, x) + i + 1, __KB_PTR(b, x) + i + 2, (x->n - i - 1) * sizeof(void*)); \
|
||||||
|
--x->n; \
|
||||||
|
free(y); \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
return __kb_delp_aux_##name(b, xp, k, s); \
|
||||||
|
} \
|
||||||
|
static key_t kb_delp_##name(kbtree_##name##_t *b, const key_t * __restrict k) \
|
||||||
|
{ \
|
||||||
|
kbnode_t *x; \
|
||||||
|
key_t ret; \
|
||||||
|
ret = __kb_delp_aux_##name(b, b->root, k, 0); \
|
||||||
|
--b->n_keys; \
|
||||||
|
if (b->root->n == 0 && b->root->is_internal) { \
|
||||||
|
--b->n_nodes; \
|
||||||
|
x = b->root; \
|
||||||
|
b->root = __KB_PTR(b, x)[0]; \
|
||||||
|
free(x); \
|
||||||
|
} \
|
||||||
|
return ret; \
|
||||||
|
} \
|
||||||
|
static inline key_t kb_del_##name(kbtree_##name##_t *b, const key_t k) \
|
||||||
|
{ \
|
||||||
|
return kb_delp_##name(b, &k); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define __KB_ITR(name, key_t) \
|
||||||
|
static inline void kb_itr_first_##name(kbtree_##name##_t *b, kbitr_t *itr) \
|
||||||
|
{ \
|
||||||
|
itr->p = 0; \
|
||||||
|
if (b->n_keys == 0) return; \
|
||||||
|
itr->p = itr->stack; \
|
||||||
|
itr->p->x = b->root; itr->p->i = 0; \
|
||||||
|
while (itr->p->x->is_internal && __KB_PTR(b, itr->p->x)[0] != 0) { \
|
||||||
|
kbnode_t *x = itr->p->x; \
|
||||||
|
++itr->p; \
|
||||||
|
itr->p->x = __KB_PTR(b, x)[0]; itr->p->i = 0; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
static int kb_itr_get_##name(kbtree_##name##_t *b, const key_t * __restrict k, kbitr_t *itr) \
|
||||||
|
{ \
|
||||||
|
int i, r = 0; \
|
||||||
|
itr->p = itr->stack; \
|
||||||
|
itr->p->x = b->root; itr->p->i = 0; \
|
||||||
|
while (itr->p->x) { \
|
||||||
|
i = __kb_getp_aux_##name(itr->p->x, k, &r); \
|
||||||
|
if (i >= 0 && r == 0) return 0; \
|
||||||
|
if (itr->p->x->is_internal == 0) return -1; \
|
||||||
|
itr->p[1].x = __KB_PTR(b, itr->p->x)[i + 1]; \
|
||||||
|
itr->p[1].i = i; \
|
||||||
|
++itr->p; \
|
||||||
|
} \
|
||||||
|
return -1; \
|
||||||
|
} \
|
||||||
|
static inline int kb_itr_next_##name(kbtree_##name##_t *b, kbitr_t *itr) \
|
||||||
|
{ \
|
||||||
|
if (itr->p < itr->stack) return 0; \
|
||||||
|
for (;;) { \
|
||||||
|
++itr->p->i; \
|
||||||
|
while (itr->p->x && itr->p->i <= itr->p->x->n) { \
|
||||||
|
itr->p[1].i = 0; \
|
||||||
|
itr->p[1].x = itr->p->x->is_internal? __KB_PTR(b, itr->p->x)[itr->p->i] : 0; \
|
||||||
|
++itr->p; \
|
||||||
|
} \
|
||||||
|
--itr->p; \
|
||||||
|
if (itr->p < itr->stack) return 0; \
|
||||||
|
if (itr->p->x && itr->p->i < itr->p->x->n) return 1; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define KBTREE_INIT(name, key_t, __cmp) \
|
||||||
|
__KB_TREE_T(name) \
|
||||||
|
__KB_INIT(name, key_t) \
|
||||||
|
__KB_GET_AUX1(name, key_t, __cmp) \
|
||||||
|
__KB_GET(name, key_t) \
|
||||||
|
__KB_INTERVAL(name, key_t) \
|
||||||
|
__KB_PUT(name, key_t, __cmp) \
|
||||||
|
__KB_DEL(name, key_t) \
|
||||||
|
__KB_ITR(name, key_t)
|
||||||
|
|
||||||
|
#define KB_DEFAULT_SIZE 512
|
||||||
|
|
||||||
|
#define kbtree_t(name) kbtree_##name##_t
|
||||||
|
#define kb_init(name, s) kb_init_##name(s)
|
||||||
|
#define kb_destroy(name, b) __kb_destroy(b)
|
||||||
|
#define kb_get(name, b, k) kb_get_##name(b, k)
|
||||||
|
#define kb_put(name, b, k) kb_put_##name(b, k)
|
||||||
|
#define kb_del(name, b, k) kb_del_##name(b, k)
|
||||||
|
#define kb_interval(name, b, k, l, u) kb_interval_##name(b, k, l, u)
|
||||||
|
#define kb_getp(name, b, k) kb_getp_##name(b, k)
|
||||||
|
#define kb_putp(name, b, k) kb_putp_##name(b, k)
|
||||||
|
#define kb_delp(name, b, k) kb_delp_##name(b, k)
|
||||||
|
#define kb_intervalp(name, b, k, l, u) kb_intervalp_##name(b, k, l, u)
|
||||||
|
|
||||||
|
#define kb_itr_first(name, b, i) kb_itr_first_##name(b, i)
|
||||||
|
#define kb_itr_get(name, b, k, i) kb_itr_get_##name(b, k, i)
|
||||||
|
#define kb_itr_next(name, b, i) kb_itr_next_##name(b, i)
|
||||||
|
#define kb_itr_key(type, itr) __KB_KEY(type, (itr)->p->x)[(itr)->p->i]
|
||||||
|
#define kb_itr_valid(itr) ((itr)->p >= (itr)->stack)
|
||||||
|
|
||||||
|
#define kb_size(b) ((b)->n_keys)
|
||||||
|
|
||||||
|
#define kb_generic_cmp(a, b) (((b) < (a)) - ((a) < (b)))
|
||||||
|
#define kb_str_cmp(a, b) strcmp(a, b)
|
||||||
|
|
||||||
|
/* The following is *DEPRECATED*!!! Use the iterator interface instead! */
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
kbnode_t *x;
|
||||||
|
int i;
|
||||||
|
} __kbstack_t;
|
||||||
|
|
||||||
|
#define __kb_traverse(key_t, b, __func) do { \
|
||||||
|
int __kmax = 8; \
|
||||||
|
__kbstack_t *__kstack, *__kp; \
|
||||||
|
__kp = __kstack = (__kbstack_t*)calloc(__kmax, sizeof(__kbstack_t)); \
|
||||||
|
__kp->x = (b)->root; __kp->i = 0; \
|
||||||
|
for (;;) { \
|
||||||
|
while (__kp->x && __kp->i <= __kp->x->n) { \
|
||||||
|
if (__kp - __kstack == __kmax - 1) { \
|
||||||
|
__kmax <<= 1; \
|
||||||
|
__kstack = (__kbstack_t*)realloc(__kstack, __kmax * sizeof(__kbstack_t)); \
|
||||||
|
__kp = __kstack + (__kmax>>1) - 1; \
|
||||||
|
} \
|
||||||
|
(__kp+1)->i = 0; (__kp+1)->x = __kp->x->is_internal? __KB_PTR(b, __kp->x)[__kp->i] : 0; \
|
||||||
|
++__kp; \
|
||||||
|
} \
|
||||||
|
--__kp; \
|
||||||
|
if (__kp >= __kstack) { \
|
||||||
|
if (__kp->x && __kp->i < __kp->x->n) __func(&__KB_KEY(key_t, __kp->x)[__kp->i]); \
|
||||||
|
++__kp->i; \
|
||||||
|
} else break; \
|
||||||
|
} \
|
||||||
|
free(__kstack); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define __kb_get_first(key_t, b, ret) do { \
|
||||||
|
kbnode_t *__x = (b)->root; \
|
||||||
|
while (__KB_PTR(b, __x)[0] != 0) \
|
||||||
|
__x = __KB_PTR(b, __x)[0]; \
|
||||||
|
(ret) = __KB_KEY(key_t, __x)[0]; \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,128 @@
|
||||||
|
#ifndef __AC_KDQ_H
|
||||||
|
#define __AC_KDQ_H
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#define __KDQ_TYPE(type) \
|
||||||
|
typedef struct { \
|
||||||
|
size_t front:58, bits:6, count, mask; \
|
||||||
|
type *a; \
|
||||||
|
} kdq_##type##_t;
|
||||||
|
|
||||||
|
#define kdq_t(type) kdq_##type##_t
|
||||||
|
#define kdq_size(q) ((q)->count)
|
||||||
|
#define kdq_first(q) ((q)->a[(q)->front])
|
||||||
|
#define kdq_last(q) ((q)->a[((q)->front + (q)->count - 1) & (q)->mask])
|
||||||
|
#define kdq_at(q, i) ((q)->a[((q)->front + (i)) & (q)->mask])
|
||||||
|
|
||||||
|
#define __KDQ_IMPL(type, SCOPE) \
|
||||||
|
SCOPE kdq_##type##_t *kdq_init_##type() \
|
||||||
|
{ \
|
||||||
|
kdq_##type##_t *q; \
|
||||||
|
q = (kdq_##type##_t*)calloc(1, sizeof(kdq_##type##_t)); \
|
||||||
|
q->bits = 2, q->mask = (1ULL<<q->bits) - 1; \
|
||||||
|
q->a = (type*)malloc((1<<q->bits) * sizeof(type)); \
|
||||||
|
return q; \
|
||||||
|
} \
|
||||||
|
SCOPE void kdq_destroy_##type(kdq_##type##_t *q) \
|
||||||
|
{ \
|
||||||
|
if (q == 0) return; \
|
||||||
|
free(q->a); free(q); \
|
||||||
|
} \
|
||||||
|
SCOPE int kdq_resize_##type(kdq_##type##_t *q, int new_bits) \
|
||||||
|
{ \
|
||||||
|
size_t new_size = 1ULL<<new_bits, old_size = 1ULL<<q->bits; \
|
||||||
|
if (new_size < q->count) { /* not big enough */ \
|
||||||
|
int i; \
|
||||||
|
for (i = 0; i < 64; ++i) \
|
||||||
|
if (1ULL<<i > q->count) break; \
|
||||||
|
new_bits = i, new_size = 1ULL<<new_bits; \
|
||||||
|
} \
|
||||||
|
if (new_bits == q->bits) return q->bits; /* unchanged */ \
|
||||||
|
if (new_bits > q->bits) q->a = (type*)realloc(q->a, (1ULL<<new_bits) * sizeof(type)); \
|
||||||
|
if (q->front + q->count <= old_size) { /* unwrapped */ \
|
||||||
|
if (q->front + q->count > new_size) /* only happens for shrinking */ \
|
||||||
|
memmove(q->a, q->a + new_size, (q->front + q->count - new_size) * sizeof(type)); \
|
||||||
|
} else { /* wrapped */ \
|
||||||
|
memmove(q->a + (new_size - (old_size - q->front)), q->a + q->front, (old_size - q->front) * sizeof(type)); \
|
||||||
|
q->front = new_size - (old_size - q->front); \
|
||||||
|
} \
|
||||||
|
q->bits = new_bits, q->mask = (1ULL<<q->bits) - 1; \
|
||||||
|
if (new_bits < q->bits) q->a = (type*)realloc(q->a, (1ULL<<new_bits) * sizeof(type)); \
|
||||||
|
return q->bits; \
|
||||||
|
} \
|
||||||
|
SCOPE type *kdq_pushp_##type(kdq_##type##_t *q) \
|
||||||
|
{ \
|
||||||
|
if (q->count == 1ULL<<q->bits) kdq_resize_##type(q, q->bits + 1); \
|
||||||
|
return &q->a[((q->count++) + q->front) & (q)->mask]; \
|
||||||
|
} \
|
||||||
|
SCOPE void kdq_push_##type(kdq_##type##_t *q, type v) \
|
||||||
|
{ \
|
||||||
|
if (q->count == 1ULL<<q->bits) kdq_resize_##type(q, q->bits + 1); \
|
||||||
|
q->a[((q->count++) + q->front) & (q)->mask] = v; \
|
||||||
|
} \
|
||||||
|
SCOPE type *kdq_unshiftp_##type(kdq_##type##_t *q) \
|
||||||
|
{ \
|
||||||
|
if (q->count == 1ULL<<q->bits) kdq_resize_##type(q, q->bits + 1); \
|
||||||
|
++q->count; \
|
||||||
|
q->front = q->front? q->front - 1 : (1ULL<<q->bits) - 1; \
|
||||||
|
return &q->a[q->front]; \
|
||||||
|
} \
|
||||||
|
SCOPE void kdq_unshift_##type(kdq_##type##_t *q, type v) \
|
||||||
|
{ \
|
||||||
|
type *p; \
|
||||||
|
p = kdq_unshiftp_##type(q); \
|
||||||
|
*p = v; \
|
||||||
|
} \
|
||||||
|
SCOPE type *kdq_pop_##type(kdq_##type##_t *q) \
|
||||||
|
{ \
|
||||||
|
return q->count? &q->a[((--q->count) + q->front) & q->mask] : 0; \
|
||||||
|
} \
|
||||||
|
SCOPE type *kdq_shift_##type(kdq_##type##_t *q) \
|
||||||
|
{ \
|
||||||
|
type *d = 0; \
|
||||||
|
if (q->count == 0) return 0; \
|
||||||
|
d = &q->a[q->front++]; \
|
||||||
|
q->front &= q->mask; \
|
||||||
|
--q->count; \
|
||||||
|
return d; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define KDQ_INIT2(type, SCOPE) \
|
||||||
|
__KDQ_TYPE(type) \
|
||||||
|
__KDQ_IMPL(type, SCOPE)
|
||||||
|
|
||||||
|
#ifndef klib_unused
|
||||||
|
#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3)
|
||||||
|
#define klib_unused __attribute__ ((__unused__))
|
||||||
|
#else
|
||||||
|
#define klib_unused
|
||||||
|
#endif
|
||||||
|
#endif /* klib_unused */
|
||||||
|
|
||||||
|
#define KDQ_INIT(type) KDQ_INIT2(type, static inline klib_unused)
|
||||||
|
|
||||||
|
#define KDQ_DECLARE(type) \
|
||||||
|
__KDQ_TYPE(type) \
|
||||||
|
kdq_##type##_t *kdq_init_##type(); \
|
||||||
|
void kdq_destroy_##type(kdq_##type##_t *q); \
|
||||||
|
int kdq_resize_##type(kdq_##type##_t *q, int new_bits); \
|
||||||
|
type *kdq_pushp_##type(kdq_##type##_t *q); \
|
||||||
|
void kdq_push_##type(kdq_##type##_t *q, type v); \
|
||||||
|
type *kdq_unshiftp_##type(kdq_##type##_t *q); \
|
||||||
|
void kdq_unshift_##type(kdq_##type##_t *q, type v); \
|
||||||
|
type *kdq_pop_##type(kdq_##type##_t *q); \
|
||||||
|
type *kdq_shift_##type(kdq_##type##_t *q);
|
||||||
|
|
||||||
|
#define kdq_init(type) kdq_init_##type()
|
||||||
|
#define kdq_destroy(type, q) kdq_destroy_##type(q)
|
||||||
|
#define kdq_resize(type, q, new_bits) kdq_resize_##type(q, new_bits)
|
||||||
|
#define kdq_pushp(type, q) kdq_pushp_##type(q)
|
||||||
|
#define kdq_push(type, q, v) kdq_push_##type(q, v)
|
||||||
|
#define kdq_pop(type, q) kdq_pop_##type(q)
|
||||||
|
#define kdq_unshiftp(type, q) kdq_unshiftp_##type(q)
|
||||||
|
#define kdq_unshift(type, q, v) kdq_unshift_##type(q, v)
|
||||||
|
#define kdq_shift(type, q) kdq_shift_##type(q)
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,186 @@
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "keigen.h"
|
||||||
|
|
||||||
|
void ke_core_strq(int n, double *q, double *b, double *c)
|
||||||
|
{
|
||||||
|
int i, j, k, u, v;
|
||||||
|
double h, f, g, h2;
|
||||||
|
for (i = n - 1; i >= 1; i--) {
|
||||||
|
h = 0.0;
|
||||||
|
if (i > 1)
|
||||||
|
for (k = 0; k < i; k++) {
|
||||||
|
u = i * n + k;
|
||||||
|
h = h + q[u] * q[u];
|
||||||
|
}
|
||||||
|
if (h + 1.0 == 1.0) {
|
||||||
|
c[i] = 0.0;
|
||||||
|
if (i == 1)
|
||||||
|
c[i] = q[i * n + i - 1];
|
||||||
|
b[i] = 0.0;
|
||||||
|
} else {
|
||||||
|
c[i] = sqrt(h);
|
||||||
|
u = i * n + i - 1;
|
||||||
|
if (q[u] > 0.0)
|
||||||
|
c[i] = -c[i];
|
||||||
|
h = h - q[u] * c[i];
|
||||||
|
q[u] = q[u] - c[i];
|
||||||
|
f = 0.0;
|
||||||
|
for (j = 0; j < i; j++) {
|
||||||
|
q[j * n + i] = q[i * n + j] / h;
|
||||||
|
g = 0.0;
|
||||||
|
for (k = 0; k <= j; k++)
|
||||||
|
g = g + q[j * n + k] * q[i * n + k];
|
||||||
|
if (j + 1 < i)
|
||||||
|
for (k = j + 1; k <= i - 1; k++)
|
||||||
|
g = g + q[k * n + j] * q[i * n + k];
|
||||||
|
c[j] = g / h;
|
||||||
|
f = f + g * q[j * n + i];
|
||||||
|
}
|
||||||
|
h2 = f / (h + h);
|
||||||
|
for (j = 0; j < i; j++) {
|
||||||
|
f = q[i * n + j];
|
||||||
|
g = c[j] - h2 * f;
|
||||||
|
c[j] = g;
|
||||||
|
for (k = 0; k <= j; k++) {
|
||||||
|
u = j * n + k;
|
||||||
|
q[u] = q[u] - f * c[k] - g * q[i * n + k];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
b[i] = h;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (i = 0; i < n - 1; i++)
|
||||||
|
c[i] = c[i + 1];
|
||||||
|
c[n - 1] = 0.0;
|
||||||
|
b[0] = 0.0;
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
if (b[i] != 0.0 && i - 1 >= 0)
|
||||||
|
for (j = 0; j < i; j++) {
|
||||||
|
g = 0.0;
|
||||||
|
for (k = 0; k < i; k++)
|
||||||
|
g = g + q[i * n + k] * q[k * n + j];
|
||||||
|
for (k = 0; k < i; k++) {
|
||||||
|
u = k * n + j;
|
||||||
|
q[u] = q[u] - g * q[k * n + i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
u = i * n + i;
|
||||||
|
b[i] = q[u];
|
||||||
|
q[u] = 1.0;
|
||||||
|
if (i - 1 >= 0)
|
||||||
|
for (j = 0; j < i; j++) {
|
||||||
|
q[i * n + j] = 0.0;
|
||||||
|
q[j * n + i] = 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int ke_core_sstq(int n, double *b, double *c, double *q, int cal_ev, double eps, int l)
|
||||||
|
{
|
||||||
|
int i, j, k, m, it, u, v;
|
||||||
|
double d, f, h, g, p, r, e, s;
|
||||||
|
c[n - 1] = 0.0;
|
||||||
|
d = 0.0;
|
||||||
|
f = 0.0;
|
||||||
|
for (j = 0; j < n; j++) {
|
||||||
|
it = 0;
|
||||||
|
h = eps * (fabs(b[j]) + fabs(c[j]));
|
||||||
|
if (h > d)
|
||||||
|
d = h;
|
||||||
|
m = j;
|
||||||
|
while (m < n && fabs(c[m]) > d)
|
||||||
|
m = m + 1;
|
||||||
|
if (m != j) {
|
||||||
|
do {
|
||||||
|
if (it == l) return KE_EXCESS_ITER;
|
||||||
|
it = it + 1;
|
||||||
|
g = b[j];
|
||||||
|
p = (b[j + 1] - g) / (2.0 * c[j]);
|
||||||
|
r = sqrt(p * p + 1.0);
|
||||||
|
if (p >= 0.0)
|
||||||
|
b[j] = c[j] / (p + r);
|
||||||
|
else
|
||||||
|
b[j] = c[j] / (p - r);
|
||||||
|
h = g - b[j];
|
||||||
|
for (i = j + 1; i < n; i++)
|
||||||
|
b[i] = b[i] - h;
|
||||||
|
f = f + h;
|
||||||
|
p = b[m];
|
||||||
|
e = 1.0;
|
||||||
|
s = 0.0;
|
||||||
|
for (i = m - 1; i >= j; i--) {
|
||||||
|
g = e * c[i];
|
||||||
|
h = e * p;
|
||||||
|
if (fabs(p) >= fabs(c[i])) {
|
||||||
|
e = c[i] / p;
|
||||||
|
r = sqrt(e * e + 1.0);
|
||||||
|
c[i + 1] = s * p * r;
|
||||||
|
s = e / r;
|
||||||
|
e = 1.0 / r;
|
||||||
|
} else {
|
||||||
|
e = p / c[i];
|
||||||
|
r = sqrt(e * e + 1.0);
|
||||||
|
c[i + 1] = s * c[i] * r;
|
||||||
|
s = 1.0 / r;
|
||||||
|
e = e / r;
|
||||||
|
}
|
||||||
|
p = e * b[i] - s * g;
|
||||||
|
b[i + 1] = h + s * (e * g + s * b[i]);
|
||||||
|
if (cal_ev) {
|
||||||
|
for (k = 0; k < n; k++) {
|
||||||
|
u = k * n + i + 1;
|
||||||
|
v = u - 1;
|
||||||
|
h = q[u];
|
||||||
|
q[u] = s * q[v] + e * h;
|
||||||
|
q[v] = e * q[v] - s * h;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
c[j] = s * p;
|
||||||
|
b[j] = e * p;
|
||||||
|
}
|
||||||
|
while (fabs(c[j]) > d);
|
||||||
|
}
|
||||||
|
b[j] = b[j] + f;
|
||||||
|
}
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
k = i;
|
||||||
|
p = b[i];
|
||||||
|
if (i + 1 < n) {
|
||||||
|
j = i + 1;
|
||||||
|
while (j < n && b[j] <= p) {
|
||||||
|
k = j;
|
||||||
|
p = b[j];
|
||||||
|
j = j + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (k != i) {
|
||||||
|
b[k] = b[i];
|
||||||
|
b[i] = p;
|
||||||
|
for (j = 0; j < n; j++) {
|
||||||
|
u = j * n + i;
|
||||||
|
v = j * n + k;
|
||||||
|
p = q[u];
|
||||||
|
q[u] = q[v];
|
||||||
|
q[v] = p;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MALLOC(type, size) ((type*)malloc(size * sizeof(type)))
|
||||||
|
|
||||||
|
int ke_eigen_sd(int n, double *a, double *v, int cal_ev, double eps, int max_iter)
|
||||||
|
{
|
||||||
|
double *c;
|
||||||
|
int r;
|
||||||
|
if (1.0 + eps <= 1.0) eps = 1e-7;
|
||||||
|
if (max_iter <= 0) max_iter = 50;
|
||||||
|
c = MALLOC(double, n);
|
||||||
|
ke_core_strq(n, a, v, c);
|
||||||
|
r = ke_core_sstq(n, v, c, a, cal_ev, eps, max_iter);
|
||||||
|
free(c);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,53 @@
|
||||||
|
#ifndef KEIGEN_H
|
||||||
|
#define KEIGEN_H
|
||||||
|
|
||||||
|
#define KE_EXCESS_ITER (-1)
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute eigenvalues/vectors for a dense symmetric matrix
|
||||||
|
*
|
||||||
|
* @param n dimension
|
||||||
|
* @param a input matrix and eigenvalues on return ([n*n]; in & out)
|
||||||
|
* @param v eigenvalues ([n]; out)
|
||||||
|
* @param cal_ev compute eigenvectos or not (faster without vectors)
|
||||||
|
* @param eps precision (<=0 for default)
|
||||||
|
* @param max_itr max iteration (<=0 for detaul)
|
||||||
|
*
|
||||||
|
* @return 0 on success; KE_EXCESS_ITER if too many iterations
|
||||||
|
*/
|
||||||
|
int ke_eigen_sd(int n, double *a, double *v, int cal_ev, double eps, int max_iter);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Transform a real symmetric matrix to a tridiagonal matrix
|
||||||
|
*
|
||||||
|
* @param n dimension
|
||||||
|
* @param q input matrix and transformation matrix ([n*n]; in & out)
|
||||||
|
* @param b diagonal ([n]; out)
|
||||||
|
* @param c subdiagonal ([n]; out)
|
||||||
|
*/
|
||||||
|
void ke_core_strq(int n, double *q, double *b, double *c);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute eigenvalues and eigenvectors for a tridiagonal matrix
|
||||||
|
*
|
||||||
|
* @param n dimension
|
||||||
|
* @param b diagonal and eigenvalues on return ([n]; in & out)
|
||||||
|
* @param c subdiagonal ([n]; in)
|
||||||
|
* @param q transformation matrix and eigenvectors on return ([n*n]; in & out)
|
||||||
|
* @param cal_ev compute eigenvectors or not (faster without vectors)
|
||||||
|
* @param eps precision
|
||||||
|
* @param l max iterations
|
||||||
|
*
|
||||||
|
* @return 0 on success; KE_EXCESS_ITER if too many iterations
|
||||||
|
*/
|
||||||
|
int ke_core_sstq(int n, double *b, double *c, double *q, int cal_ev, double eps, int l);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,120 @@
|
||||||
|
#ifndef KETOPT_H
|
||||||
|
#define KETOPT_H
|
||||||
|
|
||||||
|
#include <string.h> /* for strchr() and strncmp() */
|
||||||
|
|
||||||
|
#define ko_no_argument 0
|
||||||
|
#define ko_required_argument 1
|
||||||
|
#define ko_optional_argument 2
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int ind; /* equivalent to optind */
|
||||||
|
int opt; /* equivalent to optopt */
|
||||||
|
char *arg; /* equivalent to optarg */
|
||||||
|
int longidx; /* index of a long option; or -1 if short */
|
||||||
|
/* private variables not intended for external uses */
|
||||||
|
int i, pos, n_args;
|
||||||
|
} ketopt_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
char *name;
|
||||||
|
int has_arg;
|
||||||
|
int val;
|
||||||
|
} ko_longopt_t;
|
||||||
|
|
||||||
|
static ketopt_t KETOPT_INIT = { 1, 0, 0, -1, 1, 0, 0 };
|
||||||
|
|
||||||
|
static void ketopt_permute(char *argv[], int j, int n) /* move argv[j] over n elements to the left */
|
||||||
|
{
|
||||||
|
int k;
|
||||||
|
char *p = argv[j];
|
||||||
|
for (k = 0; k < n; ++k)
|
||||||
|
argv[j - k] = argv[j - k - 1];
|
||||||
|
argv[j - k] = p;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse command-line options and arguments
|
||||||
|
*
|
||||||
|
* This fuction has a similar interface to GNU's getopt_long(). Each call
|
||||||
|
* parses one option and returns the option name. s->arg points to the option
|
||||||
|
* argument if present. The function returns -1 when all command-line arguments
|
||||||
|
* are parsed. In this case, s->ind is the index of the first non-option
|
||||||
|
* argument.
|
||||||
|
*
|
||||||
|
* @param s status; shall be initialized to KETOPT_INIT on the first call
|
||||||
|
* @param argc length of argv[]
|
||||||
|
* @param argv list of command-line arguments; argv[0] is ignored
|
||||||
|
* @param permute non-zero to move options ahead of non-option arguments
|
||||||
|
* @param ostr option string
|
||||||
|
* @param longopts long options
|
||||||
|
*
|
||||||
|
* @return ASCII for a short option; ko_longopt_t::val for a long option; -1 if
|
||||||
|
* argv[] is fully processed; '?' for an unknown option or an ambiguous
|
||||||
|
* long option; ':' if an option argument is missing
|
||||||
|
*/
|
||||||
|
static int ketopt(ketopt_t *s, int argc, char *argv[], int permute, const char *ostr, const ko_longopt_t *longopts)
|
||||||
|
{
|
||||||
|
int opt = -1, i0, j;
|
||||||
|
if (permute) {
|
||||||
|
while (s->i < argc && (argv[s->i][0] != '-' || argv[s->i][1] == '\0'))
|
||||||
|
++s->i, ++s->n_args;
|
||||||
|
}
|
||||||
|
s->arg = 0, s->longidx = -1, i0 = s->i;
|
||||||
|
if (s->i >= argc || argv[s->i][0] != '-' || argv[s->i][1] == '\0') {
|
||||||
|
s->ind = s->i - s->n_args;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (argv[s->i][0] == '-' && argv[s->i][1] == '-') { /* "--" or a long option */
|
||||||
|
if (argv[s->i][2] == '\0') { /* a bare "--" */
|
||||||
|
ketopt_permute(argv, s->i, s->n_args);
|
||||||
|
++s->i, s->ind = s->i - s->n_args;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
s->opt = 0, opt = '?', s->pos = -1;
|
||||||
|
if (longopts) { /* parse long options */
|
||||||
|
int k, n_exact = 0, n_partial = 0;
|
||||||
|
const ko_longopt_t *o = 0, *o_exact = 0, *o_partial = 0;
|
||||||
|
for (j = 2; argv[s->i][j] != '\0' && argv[s->i][j] != '='; ++j) {} /* find the end of the option name */
|
||||||
|
for (k = 0; longopts[k].name != 0; ++k)
|
||||||
|
if (strncmp(&argv[s->i][2], longopts[k].name, j - 2) == 0) {
|
||||||
|
if (longopts[k].name[j - 2] == 0) ++n_exact, o_exact = &longopts[k];
|
||||||
|
else ++n_partial, o_partial = &longopts[k];
|
||||||
|
}
|
||||||
|
if (n_exact > 1 || (n_exact == 0 && n_partial > 1)) return '?';
|
||||||
|
o = n_exact == 1? o_exact : n_partial == 1? o_partial : 0;
|
||||||
|
if (o) {
|
||||||
|
s->opt = opt = o->val, s->longidx = o - longopts;
|
||||||
|
if (argv[s->i][j] == '=') s->arg = &argv[s->i][j + 1];
|
||||||
|
if (o->has_arg == 1 && argv[s->i][j] == '\0') {
|
||||||
|
if (s->i < argc - 1) s->arg = argv[++s->i];
|
||||||
|
else opt = ':'; /* missing option argument */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else { /* a short option */
|
||||||
|
const char *p;
|
||||||
|
if (s->pos == 0) s->pos = 1;
|
||||||
|
opt = s->opt = argv[s->i][s->pos++];
|
||||||
|
p = strchr((char*)ostr, opt);
|
||||||
|
if (p == 0) {
|
||||||
|
opt = '?'; /* unknown option */
|
||||||
|
} else if (p[1] == ':') {
|
||||||
|
if (argv[s->i][s->pos] == 0) {
|
||||||
|
if (s->i < argc - 1) s->arg = argv[++s->i];
|
||||||
|
else opt = ':'; /* missing option argument */
|
||||||
|
} else s->arg = &argv[s->i][s->pos];
|
||||||
|
s->pos = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (s->pos < 0 || argv[s->i][s->pos] == 0) {
|
||||||
|
++s->i, s->pos = 0;
|
||||||
|
if (s->n_args > 0) /* permute */
|
||||||
|
for (j = i0; j < s->i; ++j)
|
||||||
|
ketopt_permute(argv, j, s->n_args);
|
||||||
|
}
|
||||||
|
s->ind = s->i - s->n_args;
|
||||||
|
return opt;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,586 @@
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include "kexpr.h"
|
||||||
|
|
||||||
|
/***************
|
||||||
|
* Definitions *
|
||||||
|
***************/
|
||||||
|
|
||||||
|
#define KEO_NULL 0
|
||||||
|
#define KEO_POS 1
|
||||||
|
#define KEO_NEG 2
|
||||||
|
#define KEO_BNOT 3
|
||||||
|
#define KEO_LNOT 4
|
||||||
|
#define KEO_POW 5
|
||||||
|
#define KEO_MUL 6
|
||||||
|
#define KEO_DIV 7
|
||||||
|
#define KEO_IDIV 8
|
||||||
|
#define KEO_MOD 9
|
||||||
|
#define KEO_ADD 10
|
||||||
|
#define KEO_SUB 11
|
||||||
|
#define KEO_LSH 12
|
||||||
|
#define KEO_RSH 13
|
||||||
|
#define KEO_LT 14
|
||||||
|
#define KEO_LE 15
|
||||||
|
#define KEO_GT 16
|
||||||
|
#define KEO_GE 17
|
||||||
|
#define KEO_EQ 18
|
||||||
|
#define KEO_NE 19
|
||||||
|
#define KEO_BAND 20
|
||||||
|
#define KEO_BXOR 21
|
||||||
|
#define KEO_BOR 22
|
||||||
|
#define KEO_LAND 23
|
||||||
|
#define KEO_LOR 24
|
||||||
|
|
||||||
|
#define KET_NULL 0
|
||||||
|
#define KET_VAL 1
|
||||||
|
#define KET_OP 2
|
||||||
|
#define KET_FUNC 3
|
||||||
|
|
||||||
|
#define KEF_NULL 0
|
||||||
|
#define KEF_REAL 1
|
||||||
|
|
||||||
|
struct ke1_s;
|
||||||
|
|
||||||
|
typedef struct ke1_s {
|
||||||
|
uint32_t ttype:16, vtype:10, assigned:1, user_func:5; // ttype: token type; vtype: value type
|
||||||
|
int32_t op:8, n_args:24; // op: operator, n_args: number of arguments
|
||||||
|
char *name; // variable name or function name
|
||||||
|
union {
|
||||||
|
void (*builtin)(struct ke1_s *a, struct ke1_s *b); // execution function
|
||||||
|
double (*real_func1)(double);
|
||||||
|
double (*real_func2)(double, double);
|
||||||
|
} f;
|
||||||
|
double r;
|
||||||
|
int64_t i;
|
||||||
|
char *s;
|
||||||
|
} ke1_t;
|
||||||
|
|
||||||
|
static int ke_op[25] = {
|
||||||
|
0,
|
||||||
|
1<<1|1, 1<<1|1, 1<<1|1, 1<<1|1, // unary operators
|
||||||
|
2<<1|1, // pow()
|
||||||
|
3<<1, 3<<1, 3<<1, 3<<1, // * / // %
|
||||||
|
4<<1, 4<<1, // + and -
|
||||||
|
5<<1, 5<<1, // << and >>
|
||||||
|
6<<1, 6<<1, 6<<1, 6<<1, // < > <= >=
|
||||||
|
7<<1, 7<<1, // == !=
|
||||||
|
8<<1, // &
|
||||||
|
9<<1, // ^
|
||||||
|
10<<1,// |
|
||||||
|
11<<1,// &&
|
||||||
|
12<<1 // ||
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char *ke_opstr[] = {
|
||||||
|
"",
|
||||||
|
"+(1)", "-(1)", "~", "!",
|
||||||
|
"**",
|
||||||
|
"*", "/", "//", "%",
|
||||||
|
"+", "-",
|
||||||
|
"<<", ">>",
|
||||||
|
"<", "<=", ">", ">=",
|
||||||
|
"==", "!=",
|
||||||
|
"&",
|
||||||
|
"^",
|
||||||
|
"|",
|
||||||
|
"&&",
|
||||||
|
"||"
|
||||||
|
};
|
||||||
|
|
||||||
|
struct kexpr_s {
|
||||||
|
int n;
|
||||||
|
ke1_t *e;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**********************
|
||||||
|
* Operator functions *
|
||||||
|
**********************/
|
||||||
|
|
||||||
|
#define KE_GEN_CMP(_type, _op) \
|
||||||
|
static void ke_op_##_type(ke1_t *p, ke1_t *q) { \
|
||||||
|
if (p->vtype == KEV_STR && q->vtype == KEV_STR) p->i = (strcmp(p->s, q->s) _op 0); \
|
||||||
|
else p->i = p->vtype == KEV_REAL || q->vtype == KEV_REAL? (p->r _op q->r) : (p->i _op q->i); \
|
||||||
|
p->r = (double)p->i; \
|
||||||
|
p->vtype = KEV_INT; \
|
||||||
|
}
|
||||||
|
|
||||||
|
KE_GEN_CMP(KEO_LT, <)
|
||||||
|
KE_GEN_CMP(KEO_LE, <=)
|
||||||
|
KE_GEN_CMP(KEO_GT, >)
|
||||||
|
KE_GEN_CMP(KEO_GE, >=)
|
||||||
|
KE_GEN_CMP(KEO_EQ, ==)
|
||||||
|
KE_GEN_CMP(KEO_NE, !=)
|
||||||
|
|
||||||
|
#define KE_GEN_BIN_INT(_type, _op) \
|
||||||
|
static void ke_op_##_type(ke1_t *p, ke1_t *q) { \
|
||||||
|
p->i _op q->i; p->r = (double)p->i; \
|
||||||
|
p->vtype = KEV_INT; \
|
||||||
|
}
|
||||||
|
|
||||||
|
KE_GEN_BIN_INT(KEO_BAND, &=)
|
||||||
|
KE_GEN_BIN_INT(KEO_BOR, |=)
|
||||||
|
KE_GEN_BIN_INT(KEO_BXOR, ^=)
|
||||||
|
KE_GEN_BIN_INT(KEO_LSH, <<=)
|
||||||
|
KE_GEN_BIN_INT(KEO_RSH, >>=)
|
||||||
|
KE_GEN_BIN_INT(KEO_MOD, %=)
|
||||||
|
KE_GEN_BIN_INT(KEO_IDIV, /=)
|
||||||
|
|
||||||
|
#define KE_GEN_BIN_BOTH(_type, _op) \
|
||||||
|
static void ke_op_##_type(ke1_t *p, ke1_t *q) { \
|
||||||
|
p->i _op q->i; p->r _op q->r; \
|
||||||
|
p->vtype = p->vtype == KEV_REAL || q->vtype == KEV_REAL? KEV_REAL : KEV_INT; \
|
||||||
|
}
|
||||||
|
|
||||||
|
KE_GEN_BIN_BOTH(KEO_ADD, +=)
|
||||||
|
KE_GEN_BIN_BOTH(KEO_SUB, -=)
|
||||||
|
KE_GEN_BIN_BOTH(KEO_MUL, *=)
|
||||||
|
|
||||||
|
static void ke_op_KEO_DIV(ke1_t *p, ke1_t *q) { p->r /= q->r, p->i = (int64_t)(p->r + .5); p->vtype = KEV_REAL; }
|
||||||
|
static void ke_op_KEO_LAND(ke1_t *p, ke1_t *q) { p->i = (p->i && q->i); p->r = p->i; p->vtype = KEV_INT; }
|
||||||
|
static void ke_op_KEO_LOR(ke1_t *p, ke1_t *q) { p->i = (p->i || q->i); p->r = p->i; p->vtype = KEV_INT; }
|
||||||
|
static void ke_op_KEO_POW(ke1_t *p, ke1_t *q) { p->r = pow(p->r, q->r), p->i = (int64_t)(p->r + .5); p->vtype = p->vtype == KEV_REAL || q->vtype == KEV_REAL? KEV_REAL : KEV_INT; }
|
||||||
|
static void ke_op_KEO_BNOT(ke1_t *p, ke1_t *q) { p->i = ~p->i; p->r = (double)p->i; p->vtype = KEV_INT; }
|
||||||
|
static void ke_op_KEO_LNOT(ke1_t *p, ke1_t *q) { p->i = !p->i; p->r = (double)p->i; p->vtype = KEV_INT; }
|
||||||
|
static void ke_op_KEO_POS(ke1_t *p, ke1_t *q) { } // do nothing
|
||||||
|
static void ke_op_KEO_NEG(ke1_t *p, ke1_t *q) { p->i = -p->i, p->r = -p->r; }
|
||||||
|
|
||||||
|
static void ke_func1_abs(ke1_t *p, ke1_t *q) { if (p->vtype == KEV_INT) p->i = abs(p->i), p->r = (double)p->i; else p->r = fabs(p->r), p->i = (int64_t)(p->r + .5); }
|
||||||
|
|
||||||
|
/**********
|
||||||
|
* Parser *
|
||||||
|
**********/
|
||||||
|
|
||||||
|
static inline char *mystrndup(const char *src, int n)
|
||||||
|
{
|
||||||
|
char *dst;
|
||||||
|
dst = (char*)calloc(n + 1, 1);
|
||||||
|
strncpy(dst, src, n);
|
||||||
|
return dst;
|
||||||
|
}
|
||||||
|
|
||||||
|
// parse a token except "(", ")" and ","
|
||||||
|
static ke1_t ke_read_token(char *p, char **r, int *err, int last_is_val) // it doesn't parse parentheses
|
||||||
|
{
|
||||||
|
char *q = p;
|
||||||
|
ke1_t e;
|
||||||
|
memset(&e, 0, sizeof(ke1_t));
|
||||||
|
if (isalpha(*p) || *p == '_') { // a variable or a function
|
||||||
|
for (; *p && (*p == '_' || isalnum(*p)); ++p);
|
||||||
|
if (*p == '(') e.ttype = KET_FUNC, e.n_args = 1;
|
||||||
|
else e.ttype = KET_VAL, e.vtype = KEV_REAL;
|
||||||
|
e.name = mystrndup(q, p - q);
|
||||||
|
e.i = 0, e.r = 0.;
|
||||||
|
*r = p;
|
||||||
|
} else if (isdigit(*p) || *p == '.') { // a number
|
||||||
|
long x;
|
||||||
|
double y;
|
||||||
|
char *pp;
|
||||||
|
e.ttype = KET_VAL;
|
||||||
|
y = strtod(q, &p);
|
||||||
|
x = strtol(q, &pp, 0); // FIXME: check int/double parsing errors
|
||||||
|
if (q == p && q == pp) { // parse error
|
||||||
|
*err |= KEE_NUM;
|
||||||
|
} else if (p > pp) { // has "." or "[eE]"; then it is a real number
|
||||||
|
e.vtype = KEV_REAL;
|
||||||
|
e.i = (int64_t)(y + .5), e.r = y;
|
||||||
|
*r = p;
|
||||||
|
} else {
|
||||||
|
e.vtype = KEV_INT;
|
||||||
|
e.i = x, e.r = y;
|
||||||
|
*r = pp;
|
||||||
|
}
|
||||||
|
} else if (*p == '"' || *p == '\'') { // a string value
|
||||||
|
int c = *p;
|
||||||
|
for (++p; *p && *p != c; ++p)
|
||||||
|
if (*p == '\\') ++p; // escaping
|
||||||
|
if (*p == c) {
|
||||||
|
e.ttype = KET_VAL, e.vtype = KEV_STR;
|
||||||
|
e.s = mystrndup(q + 1, p - q - 1);
|
||||||
|
*r = p + 1;
|
||||||
|
} else *err |= KEE_UNQU, *r = p;
|
||||||
|
} else { // an operator
|
||||||
|
e.ttype = KET_OP;
|
||||||
|
if (*p == '*' && p[1] == '*') e.op = KEO_POW, e.f.builtin = ke_op_KEO_POW, e.n_args = 2, *r = q + 2;
|
||||||
|
else if (*p == '*') e.op = KEO_MUL, e.f.builtin = ke_op_KEO_MUL, e.n_args = 2, *r = q + 1; // FIXME: NOT working for unary operators
|
||||||
|
else if (*p == '/' && p[1] == '/') e.op = KEO_IDIV, e.f.builtin = ke_op_KEO_IDIV, e.n_args = 2, *r = q + 2;
|
||||||
|
else if (*p == '/') e.op = KEO_DIV, e.f.builtin = ke_op_KEO_DIV, e.n_args = 2, *r = q + 1;
|
||||||
|
else if (*p == '%') e.op = KEO_MOD, e.f.builtin = ke_op_KEO_MOD, e.n_args = 2, *r = q + 1;
|
||||||
|
else if (*p == '+') {
|
||||||
|
if (last_is_val) e.op = KEO_ADD, e.f.builtin = ke_op_KEO_ADD, e.n_args = 2;
|
||||||
|
else e.op = KEO_POS, e.f.builtin = ke_op_KEO_POS, e.n_args = 1;
|
||||||
|
*r = q + 1;
|
||||||
|
} else if (*p == '-') {
|
||||||
|
if (last_is_val) e.op = KEO_SUB, e.f.builtin = ke_op_KEO_SUB, e.n_args = 2;
|
||||||
|
else e.op = KEO_NEG, e.f.builtin = ke_op_KEO_NEG, e.n_args = 1;
|
||||||
|
*r = q + 1;
|
||||||
|
} else if (*p == '=' && p[1] == '=') e.op = KEO_EQ, e.f.builtin = ke_op_KEO_EQ, e.n_args = 2, *r = q + 2;
|
||||||
|
else if (*p == '!' && p[1] == '=') e.op = KEO_NE, e.f.builtin = ke_op_KEO_NE, e.n_args = 2, *r = q + 2;
|
||||||
|
else if (*p == '<' && p[1] == '>') e.op = KEO_NE, e.f.builtin = ke_op_KEO_NE, e.n_args = 2, *r = q + 2;
|
||||||
|
else if (*p == '>' && p[1] == '=') e.op = KEO_GE, e.f.builtin = ke_op_KEO_GE, e.n_args = 2, *r = q + 2;
|
||||||
|
else if (*p == '<' && p[1] == '=') e.op = KEO_LE, e.f.builtin = ke_op_KEO_LE, e.n_args = 2, *r = q + 2;
|
||||||
|
else if (*p == '>' && p[1] == '>') e.op = KEO_RSH, e.f.builtin = ke_op_KEO_RSH, e.n_args = 2, *r = q + 2;
|
||||||
|
else if (*p == '<' && p[1] == '<') e.op = KEO_LSH, e.f.builtin = ke_op_KEO_LSH, e.n_args = 2, *r = q + 2;
|
||||||
|
else if (*p == '>') e.op = KEO_GT, e.f.builtin = ke_op_KEO_GT, e.n_args = 2, *r = q + 1;
|
||||||
|
else if (*p == '<') e.op = KEO_LT, e.f.builtin = ke_op_KEO_LT, e.n_args = 2, *r = q + 1;
|
||||||
|
else if (*p == '|' && p[1] == '|') e.op = KEO_LOR, e.f.builtin = ke_op_KEO_LOR, e.n_args = 2, *r = q + 2;
|
||||||
|
else if (*p == '&' && p[1] == '&') e.op = KEO_LAND, e.f.builtin = ke_op_KEO_LAND, e.n_args = 2, *r = q + 2;
|
||||||
|
else if (*p == '|') e.op = KEO_BOR, e.f.builtin = ke_op_KEO_BOR, e.n_args = 2, *r = q + 1;
|
||||||
|
else if (*p == '&') e.op = KEO_BAND, e.f.builtin = ke_op_KEO_BAND, e.n_args = 2, *r = q + 1;
|
||||||
|
else if (*p == '^') e.op = KEO_BXOR, e.f.builtin = ke_op_KEO_BXOR, e.n_args = 2, *r = q + 1;
|
||||||
|
else if (*p == '~') e.op = KEO_BNOT, e.f.builtin = ke_op_KEO_BNOT, e.n_args = 1, *r = q + 1;
|
||||||
|
else if (*p == '!') e.op = KEO_LNOT, e.f.builtin = ke_op_KEO_LNOT, e.n_args = 1, *r = q + 1;
|
||||||
|
else e.ttype = KET_NULL, *err |= KEE_UNOP;
|
||||||
|
}
|
||||||
|
return e;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline ke1_t *push_back(ke1_t **a, int *n, int *m)
|
||||||
|
{
|
||||||
|
if (*n == *m) {
|
||||||
|
int old_m = *m;
|
||||||
|
*m = *m? *m<<1 : 8;
|
||||||
|
*a = (ke1_t*)realloc(*a, *m * sizeof(ke1_t));
|
||||||
|
memset(*a + old_m, 0, (*m - old_m) * sizeof(ke1_t));
|
||||||
|
}
|
||||||
|
return &(*a)[(*n)++];
|
||||||
|
}
|
||||||
|
|
||||||
|
static ke1_t *ke_parse_core(const char *_s, int *_n, int *err)
|
||||||
|
{
|
||||||
|
char *s, *p, *q;
|
||||||
|
int n_out, m_out, n_op, m_op, last_is_val = 0;
|
||||||
|
ke1_t *out, *op, *t, *u;
|
||||||
|
|
||||||
|
*err = 0; *_n = 0;
|
||||||
|
s = strdup(_s); // make a copy
|
||||||
|
for (p = q = s; *p; ++p) // squeeze out spaces
|
||||||
|
if (!isspace(*p)) *q++ = *p;
|
||||||
|
*q++ = 0;
|
||||||
|
|
||||||
|
out = op = 0;
|
||||||
|
n_out = m_out = n_op = m_op = 0;
|
||||||
|
p = s;
|
||||||
|
while (*p) {
|
||||||
|
if (*p == '(') {
|
||||||
|
t = push_back(&op, &n_op, &m_op); // push to the operator stack
|
||||||
|
t->op = -1, t->ttype = KET_NULL; // ->op < 0 for a left parenthsis
|
||||||
|
++p;
|
||||||
|
} else if (*p == ')') {
|
||||||
|
while (n_op > 0 && op[n_op-1].op >= 0) { // move operators to the output until we see a left parenthesis
|
||||||
|
u = push_back(&out, &n_out, &m_out);
|
||||||
|
*u = op[--n_op];
|
||||||
|
}
|
||||||
|
if (n_op == 0) { // error: extra right parenthesis
|
||||||
|
*err |= KEE_UNRP;
|
||||||
|
break;
|
||||||
|
} else --n_op; // pop out '('
|
||||||
|
if (n_op > 0 && op[n_op-1].ttype == KET_FUNC) { // the top of the operator stack is a function
|
||||||
|
u = push_back(&out, &n_out, &m_out); // move it to the output
|
||||||
|
*u = op[--n_op];
|
||||||
|
if (u->n_args == 1 && strcmp(u->name, "abs") == 0) u->f.builtin = ke_func1_abs;
|
||||||
|
}
|
||||||
|
++p;
|
||||||
|
} else if (*p == ',') { // function arguments separator
|
||||||
|
while (n_op > 0 && op[n_op-1].op >= 0) {
|
||||||
|
u = push_back(&out, &n_out, &m_out);
|
||||||
|
*u = op[--n_op];
|
||||||
|
}
|
||||||
|
if (n_op < 2 || op[n_op-2].ttype != KET_FUNC) { // we should at least see a function and a left parenthesis
|
||||||
|
*err |= KEE_FUNC;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
++op[n_op-2].n_args;
|
||||||
|
++p;
|
||||||
|
} else { // output-able token
|
||||||
|
ke1_t v;
|
||||||
|
v = ke_read_token(p, &p, err, last_is_val);
|
||||||
|
if (*err) break;
|
||||||
|
if (v.ttype == KET_VAL) {
|
||||||
|
u = push_back(&out, &n_out, &m_out);
|
||||||
|
*u = v;
|
||||||
|
last_is_val = 1;
|
||||||
|
} else if (v.ttype == KET_FUNC) {
|
||||||
|
t = push_back(&op, &n_op, &m_op);
|
||||||
|
*t = v;
|
||||||
|
last_is_val = 0;
|
||||||
|
} else if (v.ttype == KET_OP) {
|
||||||
|
int oi = ke_op[v.op];
|
||||||
|
while (n_op > 0 && op[n_op-1].ttype == KET_OP) {
|
||||||
|
int pre = ke_op[op[n_op-1].op]>>1;
|
||||||
|
if (((oi&1) && oi>>1 <= pre) || (!(oi&1) && oi>>1 < pre)) break;
|
||||||
|
u = push_back(&out, &n_out, &m_out);
|
||||||
|
*u = op[--n_op];
|
||||||
|
}
|
||||||
|
t = push_back(&op, &n_op, &m_op);
|
||||||
|
*t = v;
|
||||||
|
last_is_val = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*err == 0) {
|
||||||
|
while (n_op > 0 && op[n_op-1].op >= 0) {
|
||||||
|
u = push_back(&out, &n_out, &m_out);
|
||||||
|
*u = op[--n_op];
|
||||||
|
}
|
||||||
|
if (n_op > 0) *err |= KEE_UNLP;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*err == 0) { // then check if the number of args is correct
|
||||||
|
int i, n;
|
||||||
|
for (i = n = 0; i < n_out; ++i) {
|
||||||
|
ke1_t *e = &out[i];
|
||||||
|
if (e->ttype == KET_VAL) ++n;
|
||||||
|
else n -= e->n_args - 1;
|
||||||
|
}
|
||||||
|
if (n != 1) *err |= KEE_ARG;
|
||||||
|
}
|
||||||
|
|
||||||
|
free(op); free(s);
|
||||||
|
if (*err) {
|
||||||
|
free(out);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
*_n = n_out;
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
kexpr_t *ke_parse(const char *_s, int *err)
|
||||||
|
{
|
||||||
|
int n;
|
||||||
|
ke1_t *e;
|
||||||
|
kexpr_t *ke;
|
||||||
|
e = ke_parse_core(_s, &n, err);
|
||||||
|
if (*err) return 0;
|
||||||
|
ke = (kexpr_t*)calloc(1, sizeof(kexpr_t));
|
||||||
|
ke->n = n, ke->e = e;
|
||||||
|
return ke;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ke_eval(const kexpr_t *ke, int64_t *_i, double *_r, const char **_p, int *ret_type)
|
||||||
|
{
|
||||||
|
ke1_t *stack, *p, *q;
|
||||||
|
int i, top = 0, err = 0;
|
||||||
|
*_i = 0, *_r = 0., *ret_type = 0;
|
||||||
|
for (i = 0; i < ke->n; ++i) {
|
||||||
|
ke1_t *e = &ke->e[i];
|
||||||
|
if ((e->ttype == KET_OP || e->ttype == KET_FUNC) && e->f.builtin == 0) err |= KEE_UNFUNC;
|
||||||
|
else if (e->ttype == KET_VAL && e->name && e->assigned == 0) err |= KEE_UNVAR;
|
||||||
|
}
|
||||||
|
stack = (ke1_t*)malloc(ke->n * sizeof(ke1_t));
|
||||||
|
for (i = 0; i < ke->n; ++i) {
|
||||||
|
ke1_t *e = &ke->e[i];
|
||||||
|
if (e->ttype == KET_OP || e->ttype == KET_FUNC) {
|
||||||
|
if (e->n_args == 2 && e->f.builtin) {
|
||||||
|
q = &stack[--top], p = &stack[top-1];
|
||||||
|
if (e->user_func) {
|
||||||
|
if (e->user_func == KEF_REAL)
|
||||||
|
p->r = e->f.real_func2(p->r, q->r), p->i = (int64_t)(p->r + .5), p->vtype = KEV_REAL;
|
||||||
|
} else e->f.builtin(p, q);
|
||||||
|
} else if (e->n_args == 1 && e->f.builtin) {
|
||||||
|
p = &stack[top-1];
|
||||||
|
if (e->user_func) {
|
||||||
|
if (e->user_func == KEF_REAL)
|
||||||
|
p->r = e->f.real_func1(p->r), p->i = (int64_t)(p->r + .5), p->vtype = KEV_REAL;
|
||||||
|
} else e->f.builtin(&stack[top-1], 0);
|
||||||
|
} else top -= e->n_args - 1;
|
||||||
|
} else stack[top++] = *e;
|
||||||
|
}
|
||||||
|
*ret_type = stack->vtype;
|
||||||
|
*_i = stack->i, *_r = stack->r, *_p = stack->s;
|
||||||
|
free(stack);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t ke_eval_int(const kexpr_t *ke, int *err)
|
||||||
|
{
|
||||||
|
int int_ret;
|
||||||
|
int64_t i;
|
||||||
|
double r;
|
||||||
|
const char *s;
|
||||||
|
*err = ke_eval(ke, &i, &r, &s, &int_ret);
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
double ke_eval_real(const kexpr_t *ke, int *err)
|
||||||
|
{
|
||||||
|
int int_ret;
|
||||||
|
int64_t i;
|
||||||
|
double r;
|
||||||
|
const char *s;
|
||||||
|
*err = ke_eval(ke, &i, &r, &s, &int_ret);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ke_destroy(kexpr_t *ke)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
if (ke == 0) return;
|
||||||
|
for (i = 0; i < ke->n; ++i) {
|
||||||
|
free(ke->e[i].name);
|
||||||
|
free(ke->e[i].s);
|
||||||
|
}
|
||||||
|
free(ke->e); free(ke);
|
||||||
|
}
|
||||||
|
|
||||||
|
int ke_set_int(kexpr_t *ke, const char *var, int64_t y)
|
||||||
|
{
|
||||||
|
int i, n = 0;
|
||||||
|
double yy = (double)y;
|
||||||
|
for (i = 0; i < ke->n; ++i) {
|
||||||
|
ke1_t *e = &ke->e[i];
|
||||||
|
if (e->ttype == KET_VAL && e->name && strcmp(e->name, var) == 0)
|
||||||
|
e->i = y, e->r = yy, e->vtype = KEV_INT, e->assigned = 1, ++n;
|
||||||
|
}
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ke_set_real(kexpr_t *ke, const char *var, double x)
|
||||||
|
{
|
||||||
|
int i, n = 0;
|
||||||
|
int64_t xx = (int64_t)(x + .5);
|
||||||
|
for (i = 0; i < ke->n; ++i) {
|
||||||
|
ke1_t *e = &ke->e[i];
|
||||||
|
if (e->ttype == KET_VAL && e->name && strcmp(e->name, var) == 0)
|
||||||
|
e->r = x, e->i = xx, e->vtype = KEV_REAL, e->assigned = 1, ++n;
|
||||||
|
}
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ke_set_str(kexpr_t *ke, const char *var, const char *x)
|
||||||
|
{
|
||||||
|
int i, n = 0;
|
||||||
|
for (i = 0; i < ke->n; ++i) {
|
||||||
|
ke1_t *e = &ke->e[i];
|
||||||
|
if (e->ttype == KET_VAL && e->name && strcmp(e->name, var) == 0) {
|
||||||
|
if (e->vtype == KEV_STR) free(e->s);
|
||||||
|
e->s = strdup(x);
|
||||||
|
e->i = 0, e->r = 0., e->assigned = 1;
|
||||||
|
e->vtype = KEV_STR;
|
||||||
|
++n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ke_set_real_func1(kexpr_t *ke, const char *name, double (*func)(double))
|
||||||
|
{
|
||||||
|
int i, n = 0;
|
||||||
|
for (i = 0; i < ke->n; ++i) {
|
||||||
|
ke1_t *e = &ke->e[i];
|
||||||
|
if (e->ttype == KET_FUNC && e->n_args == 1 && strcmp(e->name, name) == 0)
|
||||||
|
e->f.real_func1 = func, e->user_func = KEF_REAL, ++n;
|
||||||
|
}
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ke_set_real_func2(kexpr_t *ke, const char *name, double (*func)(double, double))
|
||||||
|
{
|
||||||
|
int i, n = 0;
|
||||||
|
for (i = 0; i < ke->n; ++i) {
|
||||||
|
ke1_t *e = &ke->e[i];
|
||||||
|
if (e->ttype == KET_FUNC && e->n_args == 2 && strcmp(e->name, name) == 0)
|
||||||
|
e->f.real_func2 = func, e->user_func = KEF_REAL, ++n;
|
||||||
|
}
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ke_set_default_func(kexpr_t *ke)
|
||||||
|
{
|
||||||
|
int n = 0;
|
||||||
|
n += ke_set_real_func1(ke, "exp", exp);
|
||||||
|
n += ke_set_real_func1(ke, "log", log);
|
||||||
|
n += ke_set_real_func1(ke, "log10", log10);
|
||||||
|
n += ke_set_real_func1(ke, "sqrt", sqrt);
|
||||||
|
n += ke_set_real_func1(ke, "sin", sin);
|
||||||
|
n += ke_set_real_func1(ke, "cos", cos);
|
||||||
|
n += ke_set_real_func1(ke, "tan", tan);
|
||||||
|
n += ke_set_real_func2(ke, "pow", pow);
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ke_unset(kexpr_t *ke)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < ke->n; ++i) {
|
||||||
|
ke1_t *e = &ke->e[i];
|
||||||
|
if (e->ttype == KET_VAL && e->name) e->assigned = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ke_print(const kexpr_t *ke)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
if (ke == 0) return;
|
||||||
|
for (i = 0; i < ke->n; ++i) {
|
||||||
|
const ke1_t *u = &ke->e[i];
|
||||||
|
if (i) putchar(' ');
|
||||||
|
if (u->ttype == KET_VAL) {
|
||||||
|
if (u->name) printf("%s", u->name);
|
||||||
|
else if (u->vtype == KEV_REAL) printf("%g", u->r);
|
||||||
|
else if (u->vtype == KEV_INT) printf("%lld", (long long)u->i);
|
||||||
|
else if (u->vtype == KEV_STR) printf("\"%s\"", u->s);
|
||||||
|
} else if (u->ttype == KET_OP) {
|
||||||
|
printf("%s", ke_opstr[u->op]);
|
||||||
|
} else if (u->ttype == KET_FUNC) {
|
||||||
|
printf("%s(%d)", u->name, u->n_args);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
putchar('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef KE_MAIN
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int c, err, to_print = 0, is_int = 0;
|
||||||
|
kexpr_t *ke;
|
||||||
|
|
||||||
|
while ((c = getopt(argc, argv, "pi")) >= 0) {
|
||||||
|
if (c == 'p') to_print = 1;
|
||||||
|
else if (c == 'i') is_int = 1;
|
||||||
|
}
|
||||||
|
if (optind == argc) {
|
||||||
|
fprintf(stderr, "Usage: %s [-pi] <expr>\n", argv[0]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
ke = ke_parse(argv[optind], &err);
|
||||||
|
ke_set_default_func(ke);
|
||||||
|
if (err) {
|
||||||
|
fprintf(stderr, "Parse error: 0x%x\n", err);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (!to_print) {
|
||||||
|
int64_t vi;
|
||||||
|
double vr;
|
||||||
|
const char *vs;
|
||||||
|
int i, ret_type;
|
||||||
|
if (argc - optind > 1) {
|
||||||
|
for (i = optind + 1; i < argc; ++i) {
|
||||||
|
char *p, *s = argv[i];
|
||||||
|
for (p = s; *p && *p != '='; ++p);
|
||||||
|
if (*p == 0) continue; // not an assignment
|
||||||
|
*p = 0;
|
||||||
|
ke_set_real(ke, s, strtod(p+1, &p));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
err |= ke_eval(ke, &vi, &vr, &vs, &ret_type);
|
||||||
|
if (err & KEE_UNFUNC)
|
||||||
|
fprintf(stderr, "Evaluation warning: an undefined function returns the first function argument.\n");
|
||||||
|
if (err & KEE_UNVAR) fprintf(stderr, "Evaluation warning: unassigned variables are set to 0.\n");
|
||||||
|
if (ret_type == KEV_INT) printf("%lld\n", (long long)vi);
|
||||||
|
else if (ret_type == KEV_REAL) printf("%g\n", vr);
|
||||||
|
else printf("%s\n", vs);
|
||||||
|
} else ke_print(ke);
|
||||||
|
ke_destroy(ke);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,68 @@
|
||||||
|
#ifndef KEXPR_H
|
||||||
|
#define KEXPR_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
struct kexpr_s;
|
||||||
|
typedef struct kexpr_s kexpr_t;
|
||||||
|
|
||||||
|
// Parse errors
|
||||||
|
#define KEE_UNQU 0x01 // unmatched quotation marks
|
||||||
|
#define KEE_UNLP 0x02 // unmatched left parentheses
|
||||||
|
#define KEE_UNRP 0x04 // unmatched right parentheses
|
||||||
|
#define KEE_UNOP 0x08 // unknown operators
|
||||||
|
#define KEE_FUNC 0x10 // wrong function syntax
|
||||||
|
#define KEE_ARG 0x20
|
||||||
|
#define KEE_NUM 0x40 // fail to parse a number
|
||||||
|
|
||||||
|
// Evaluation errors
|
||||||
|
#define KEE_UNFUNC 0x40 // undefined function
|
||||||
|
#define KEE_UNVAR 0x80 // unassigned variable
|
||||||
|
|
||||||
|
// Return type
|
||||||
|
#define KEV_REAL 1
|
||||||
|
#define KEV_INT 2
|
||||||
|
#define KEV_STR 3
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// parse an expression and return errors in $err
|
||||||
|
kexpr_t *ke_parse(const char *_s, int *err);
|
||||||
|
|
||||||
|
// free memory allocated during parsing
|
||||||
|
void ke_destroy(kexpr_t *ke);
|
||||||
|
|
||||||
|
// set a variable to integer value and return the occurrence of the variable
|
||||||
|
int ke_set_int(kexpr_t *ke, const char *var, int64_t x);
|
||||||
|
|
||||||
|
// set a variable to real value and return the occurrence of the variable
|
||||||
|
int ke_set_real(kexpr_t *ke, const char *var, double x);
|
||||||
|
|
||||||
|
// set a variable to string value and return the occurrence of the variable
|
||||||
|
int ke_set_str(kexpr_t *ke, const char *var, const char *x);
|
||||||
|
|
||||||
|
// set a user-defined function
|
||||||
|
int ke_set_real_func1(kexpr_t *ke, const char *name, double (*func)(double));
|
||||||
|
int ke_set_real_func2(kexpr_t *ke, const char *name, double (*func)(double, double));
|
||||||
|
|
||||||
|
// set default math functions
|
||||||
|
int ke_set_default_func(kexpr_t *ke);
|
||||||
|
|
||||||
|
// mark all variable as unset
|
||||||
|
void ke_unset(kexpr_t *e);
|
||||||
|
|
||||||
|
// evaluate expression; return error code; final value is returned via pointers
|
||||||
|
int ke_eval(const kexpr_t *ke, int64_t *_i, double *_r, const char **_s, int *ret_type);
|
||||||
|
int64_t ke_eval_int(const kexpr_t *ke, int *err);
|
||||||
|
double ke_eval_real(const kexpr_t *ke, int *err);
|
||||||
|
|
||||||
|
// print the expression in Reverse Polish notation (RPN)
|
||||||
|
void ke_print(const kexpr_t *ke);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,79 @@
|
||||||
|
#ifndef AC_KGRAPH_H
|
||||||
|
#define AC_KGRAPH_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "khash.h"
|
||||||
|
#include "kbtree.h"
|
||||||
|
|
||||||
|
typedef unsigned kgint_t;
|
||||||
|
|
||||||
|
#define kgraph_t(name) kh_##name##_t
|
||||||
|
|
||||||
|
#define __KG_BASIC(name, SCOPE, vertex_t, arc_t, ehn) \
|
||||||
|
SCOPE kgraph_t(name) *kg_init_##name(void) { return kh_init(name); } \
|
||||||
|
SCOPE void kg_destroy_##name(kgraph_t(name) *g) { \
|
||||||
|
khint_t k; \
|
||||||
|
if (g == 0) return; \
|
||||||
|
for (k = kh_begin(g); k != kh_end(g); ++k) \
|
||||||
|
if (kh_exist(g, k)) kh_destroy(ehn, kh_val(g, k)._arc); \
|
||||||
|
kh_destroy(name, g); \
|
||||||
|
} \
|
||||||
|
SCOPE vertex_t *kg_get_v_##name(kgraph_t(name) *g, kgint_t v) { \
|
||||||
|
khint_t k = kh_get(name, g, v); \
|
||||||
|
return k == kh_end(g)? 0 : &kh_val(g, k); \
|
||||||
|
} \
|
||||||
|
SCOPE vertex_t *kg_put_v_##name(kgraph_t(name) *g, kgint_t v, int *absent) { \
|
||||||
|
khint_t k; \
|
||||||
|
k = kh_put(name, g, v, absent); \
|
||||||
|
if (*absent) kh_val(g, k)._arc = kh_init(ehn); \
|
||||||
|
return &kh_val(g, k); \
|
||||||
|
} \
|
||||||
|
SCOPE void kg_put_a_##name(kgraph_t(name) *g, kgint_t vbeg, kgint_t vend, int dir, arc_t **pb, arc_t **pe) { \
|
||||||
|
vertex_t *p; \
|
||||||
|
khint_t k; \
|
||||||
|
int absent; \
|
||||||
|
p = kg_put_v_##name(g, vbeg, &absent); \
|
||||||
|
k = kh_put(ehn, p->_arc, vend<<2|dir, &absent); \
|
||||||
|
*pb = &kh_val(p->_arc, k); \
|
||||||
|
p = kg_put_v_##name(g, vend, &absent); \
|
||||||
|
k = kh_put(ehn, p->_arc, vbeg<<2|(~dir&3), &absent); \
|
||||||
|
*pe = &kh_val(p->_arc, k); \
|
||||||
|
} \
|
||||||
|
SCOPE vertex_t *kg_del_v_##name(kgraph_t(name) *g, kgint_t v) { \
|
||||||
|
khint_t k, k0, k2, k3; \
|
||||||
|
khash_t(ehn) *h; \
|
||||||
|
k0 = k = kh_get(name, g, v); \
|
||||||
|
if (k == kh_end(g)) return 0; /* not present in the graph */ \
|
||||||
|
h = kh_val(g, k)._arc; \
|
||||||
|
for (k = kh_begin(h); k != kh_end(h); ++k) /* remove v from its neighbors */ \
|
||||||
|
if (kh_exist(h, k)) { \
|
||||||
|
k2 = kh_get(name, g, kh_key(h, k)>>2); \
|
||||||
|
/* assert(k2 != kh_end(g)); */ \
|
||||||
|
k3 = kh_get(ehn, kh_val(g, k2)._arc, v<<2|(~kh_key(h, k)&3)); \
|
||||||
|
/* assert(k3 != kh_end(kh_val(g, k2)._arc)); */ \
|
||||||
|
kh_del(ehn, kh_val(g, k2)._arc, k3); \
|
||||||
|
} \
|
||||||
|
kh_destroy(ehn, h); \
|
||||||
|
kh_del(name, g, k0); \
|
||||||
|
return &kh_val(g, k0); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define KGRAPH_PRINT(name, SCOPE) \
|
||||||
|
SCOPE void kg_print_##name(kgraph_t(name) *g) { \
|
||||||
|
khint_t k, k2; \
|
||||||
|
for (k = kh_begin(g); k != kh_end(g); ++k) \
|
||||||
|
if (kh_exist(g, k)) { \
|
||||||
|
printf("v %u\n", kh_key(g, k)); \
|
||||||
|
for (k2 = kh_begin(kh_val(g, k)._arc); k2 != kh_end(kh_val(g, k)._arc); ++k2) \
|
||||||
|
if (kh_exist(kh_val(g, k)._arc, k2) && kh_key(g, k) < kh_key(kh_val(g, k)._arc, k2)>>2) \
|
||||||
|
printf("a %u%c%c%u\n", kh_key(g, k), "><"[kh_key(kh_val(g, k)._arc, k2)>>1&1], \
|
||||||
|
"><"[kh_key(kh_val(g, k)._arc, k2)&1], kh_key(kh_val(g, k)._arc, k2)>>2); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define KGRAPH_INIT(name, SCOPE, vertex_t, arc_t, ehn) \
|
||||||
|
KHASH_INIT2(name, SCOPE, kgint_t, vertex_t, 1, kh_int_hash_func, kh_int_hash_equal) \
|
||||||
|
__KG_BASIC(name, SCOPE, vertex_t, arc_t, ehn)
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,627 @@
|
||||||
|
/* The MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor@live.co.uk>
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of this software and associated documentation files (the
|
||||||
|
"Software"), to deal in the Software without restriction, including
|
||||||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be
|
||||||
|
included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||||
|
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||||
|
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
An example:
|
||||||
|
|
||||||
|
#include "khash.h"
|
||||||
|
KHASH_MAP_INIT_INT(32, char)
|
||||||
|
int main() {
|
||||||
|
int ret, is_missing;
|
||||||
|
khiter_t k;
|
||||||
|
khash_t(32) *h = kh_init(32);
|
||||||
|
k = kh_put(32, h, 5, &ret);
|
||||||
|
kh_value(h, k) = 10;
|
||||||
|
k = kh_get(32, h, 10);
|
||||||
|
is_missing = (k == kh_end(h));
|
||||||
|
k = kh_get(32, h, 5);
|
||||||
|
kh_del(32, h, k);
|
||||||
|
for (k = kh_begin(h); k != kh_end(h); ++k)
|
||||||
|
if (kh_exist(h, k)) kh_value(h, k) = 1;
|
||||||
|
kh_destroy(32, h);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
2013-05-02 (0.2.8):
|
||||||
|
|
||||||
|
* Use quadratic probing. When the capacity is power of 2, stepping function
|
||||||
|
i*(i+1)/2 guarantees to traverse each bucket. It is better than double
|
||||||
|
hashing on cache performance and is more robust than linear probing.
|
||||||
|
|
||||||
|
In theory, double hashing should be more robust than quadratic probing.
|
||||||
|
However, my implementation is probably not for large hash tables, because
|
||||||
|
the second hash function is closely tied to the first hash function,
|
||||||
|
which reduce the effectiveness of double hashing.
|
||||||
|
|
||||||
|
Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php
|
||||||
|
|
||||||
|
2011-12-29 (0.2.7):
|
||||||
|
|
||||||
|
* Minor code clean up; no actual effect.
|
||||||
|
|
||||||
|
2011-09-16 (0.2.6):
|
||||||
|
|
||||||
|
* The capacity is a power of 2. This seems to dramatically improve the
|
||||||
|
speed for simple keys. Thank Zilong Tan for the suggestion. Reference:
|
||||||
|
|
||||||
|
- http://code.google.com/p/ulib/
|
||||||
|
- http://nothings.org/computer/judy/
|
||||||
|
|
||||||
|
* Allow to optionally use linear probing which usually has better
|
||||||
|
performance for random input. Double hashing is still the default as it
|
||||||
|
is more robust to certain non-random input.
|
||||||
|
|
||||||
|
* Added Wang's integer hash function (not used by default). This hash
|
||||||
|
function is more robust to certain non-random input.
|
||||||
|
|
||||||
|
2011-02-14 (0.2.5):
|
||||||
|
|
||||||
|
* Allow to declare global functions.
|
||||||
|
|
||||||
|
2009-09-26 (0.2.4):
|
||||||
|
|
||||||
|
* Improve portability
|
||||||
|
|
||||||
|
2008-09-19 (0.2.3):
|
||||||
|
|
||||||
|
* Corrected the example
|
||||||
|
* Improved interfaces
|
||||||
|
|
||||||
|
2008-09-11 (0.2.2):
|
||||||
|
|
||||||
|
* Improved speed a little in kh_put()
|
||||||
|
|
||||||
|
2008-09-10 (0.2.1):
|
||||||
|
|
||||||
|
* Added kh_clear()
|
||||||
|
* Fixed a compiling error
|
||||||
|
|
||||||
|
2008-09-02 (0.2.0):
|
||||||
|
|
||||||
|
* Changed to token concatenation which increases flexibility.
|
||||||
|
|
||||||
|
2008-08-31 (0.1.2):
|
||||||
|
|
||||||
|
* Fixed a bug in kh_get(), which has not been tested previously.
|
||||||
|
|
||||||
|
2008-08-31 (0.1.1):
|
||||||
|
|
||||||
|
* Added destructor
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef __AC_KHASH_H
|
||||||
|
#define __AC_KHASH_H
|
||||||
|
|
||||||
|
/*!
|
||||||
|
@header
|
||||||
|
|
||||||
|
Generic hash table library.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define AC_VERSION_KHASH_H "0.2.8"
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <limits.h>
|
||||||
|
|
||||||
|
/* compiler specific configuration */
|
||||||
|
|
||||||
|
#if UINT_MAX == 0xffffffffu
|
||||||
|
typedef unsigned int khint32_t;
|
||||||
|
#elif ULONG_MAX == 0xffffffffu
|
||||||
|
typedef unsigned long khint32_t;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if ULONG_MAX == ULLONG_MAX
|
||||||
|
typedef unsigned long khint64_t;
|
||||||
|
#else
|
||||||
|
typedef unsigned long long khint64_t;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef kh_inline
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#define kh_inline __inline
|
||||||
|
#else
|
||||||
|
#define kh_inline inline
|
||||||
|
#endif
|
||||||
|
#endif /* kh_inline */
|
||||||
|
|
||||||
|
#ifndef klib_unused
|
||||||
|
#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3)
|
||||||
|
#define klib_unused __attribute__ ((__unused__))
|
||||||
|
#else
|
||||||
|
#define klib_unused
|
||||||
|
#endif
|
||||||
|
#endif /* klib_unused */
|
||||||
|
|
||||||
|
typedef khint32_t khint_t;
|
||||||
|
typedef khint_t khiter_t;
|
||||||
|
|
||||||
|
#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
|
||||||
|
#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
|
||||||
|
#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
|
||||||
|
#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
|
||||||
|
#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
|
||||||
|
#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
|
||||||
|
#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
|
||||||
|
|
||||||
|
#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4)
|
||||||
|
|
||||||
|
#ifndef kroundup32
|
||||||
|
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef kcalloc
|
||||||
|
#define kcalloc(N,Z) calloc(N,Z)
|
||||||
|
#endif
|
||||||
|
#ifndef kmalloc
|
||||||
|
#define kmalloc(Z) malloc(Z)
|
||||||
|
#endif
|
||||||
|
#ifndef krealloc
|
||||||
|
#define krealloc(P,Z) realloc(P,Z)
|
||||||
|
#endif
|
||||||
|
#ifndef kfree
|
||||||
|
#define kfree(P) free(P)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static const double __ac_HASH_UPPER = 0.77;
|
||||||
|
|
||||||
|
#define __KHASH_TYPE(name, khkey_t, khval_t) \
|
||||||
|
typedef struct kh_##name##_s { \
|
||||||
|
khint_t n_buckets, size, n_occupied, upper_bound; \
|
||||||
|
khint32_t *flags; \
|
||||||
|
khkey_t *keys; \
|
||||||
|
khval_t *vals; \
|
||||||
|
} kh_##name##_t;
|
||||||
|
|
||||||
|
#define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \
|
||||||
|
extern kh_##name##_t *kh_init_##name(void); \
|
||||||
|
extern void kh_destroy_##name(kh_##name##_t *h); \
|
||||||
|
extern void kh_clear_##name(kh_##name##_t *h); \
|
||||||
|
extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \
|
||||||
|
extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
|
||||||
|
extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \
|
||||||
|
extern void kh_del_##name(kh_##name##_t *h, khint_t x);
|
||||||
|
|
||||||
|
#define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
|
||||||
|
SCOPE kh_##name##_t *kh_init_##name(void) { \
|
||||||
|
return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t)); \
|
||||||
|
} \
|
||||||
|
SCOPE void kh_destroy_##name(kh_##name##_t *h) \
|
||||||
|
{ \
|
||||||
|
if (h) { \
|
||||||
|
kfree((void *)h->keys); kfree(h->flags); \
|
||||||
|
kfree((void *)h->vals); \
|
||||||
|
kfree(h); \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
SCOPE void kh_clear_##name(kh_##name##_t *h) \
|
||||||
|
{ \
|
||||||
|
if (h && h->flags) { \
|
||||||
|
memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \
|
||||||
|
h->size = h->n_occupied = 0; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
|
||||||
|
{ \
|
||||||
|
if (h->n_buckets) { \
|
||||||
|
khint_t k, i, last, mask, step = 0; \
|
||||||
|
mask = h->n_buckets - 1; \
|
||||||
|
k = __hash_func(key); i = k & mask; \
|
||||||
|
last = i; \
|
||||||
|
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
|
||||||
|
i = (i + (++step)) & mask; \
|
||||||
|
if (i == last) return h->n_buckets; \
|
||||||
|
} \
|
||||||
|
return __ac_iseither(h->flags, i)? h->n_buckets : i; \
|
||||||
|
} else return 0; \
|
||||||
|
} \
|
||||||
|
SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
|
||||||
|
{ /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \
|
||||||
|
khint32_t *new_flags = 0; \
|
||||||
|
khint_t j = 1; \
|
||||||
|
{ \
|
||||||
|
kroundup32(new_n_buckets); \
|
||||||
|
if (new_n_buckets < 4) new_n_buckets = 4; \
|
||||||
|
if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \
|
||||||
|
else { /* hash table size to be changed (shrink or expand); rehash */ \
|
||||||
|
new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
|
||||||
|
if (!new_flags) return -1; \
|
||||||
|
memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
|
||||||
|
if (h->n_buckets < new_n_buckets) { /* expand */ \
|
||||||
|
khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
|
||||||
|
if (!new_keys) { kfree(new_flags); return -1; } \
|
||||||
|
h->keys = new_keys; \
|
||||||
|
if (kh_is_map) { \
|
||||||
|
khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
|
||||||
|
if (!new_vals) { kfree(new_flags); return -1; } \
|
||||||
|
h->vals = new_vals; \
|
||||||
|
} \
|
||||||
|
} /* otherwise shrink */ \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
if (j) { /* rehashing is needed */ \
|
||||||
|
for (j = 0; j != h->n_buckets; ++j) { \
|
||||||
|
if (__ac_iseither(h->flags, j) == 0) { \
|
||||||
|
khkey_t key = h->keys[j]; \
|
||||||
|
khval_t val; \
|
||||||
|
khint_t new_mask; \
|
||||||
|
new_mask = new_n_buckets - 1; \
|
||||||
|
if (kh_is_map) val = h->vals[j]; \
|
||||||
|
__ac_set_isdel_true(h->flags, j); \
|
||||||
|
while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
|
||||||
|
khint_t k, i, step = 0; \
|
||||||
|
k = __hash_func(key); \
|
||||||
|
i = k & new_mask; \
|
||||||
|
while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \
|
||||||
|
__ac_set_isempty_false(new_flags, i); \
|
||||||
|
if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \
|
||||||
|
{ khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
|
||||||
|
if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
|
||||||
|
__ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \
|
||||||
|
} else { /* write the element and jump out of the loop */ \
|
||||||
|
h->keys[i] = key; \
|
||||||
|
if (kh_is_map) h->vals[i] = val; \
|
||||||
|
break; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \
|
||||||
|
h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
|
||||||
|
if (kh_is_map) h->vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
|
||||||
|
} \
|
||||||
|
kfree(h->flags); /* free the working space */ \
|
||||||
|
h->flags = new_flags; \
|
||||||
|
h->n_buckets = new_n_buckets; \
|
||||||
|
h->n_occupied = h->size; \
|
||||||
|
h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
|
||||||
|
} \
|
||||||
|
return 0; \
|
||||||
|
} \
|
||||||
|
SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
|
||||||
|
{ \
|
||||||
|
khint_t x; \
|
||||||
|
if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \
|
||||||
|
if (h->n_buckets > (h->size<<1)) { \
|
||||||
|
if (kh_resize_##name(h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */ \
|
||||||
|
*ret = -1; return h->n_buckets; \
|
||||||
|
} \
|
||||||
|
} else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the hash table */ \
|
||||||
|
*ret = -1; return h->n_buckets; \
|
||||||
|
} \
|
||||||
|
} /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
|
||||||
|
{ \
|
||||||
|
khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \
|
||||||
|
x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \
|
||||||
|
if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \
|
||||||
|
else { \
|
||||||
|
last = i; \
|
||||||
|
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
|
||||||
|
if (__ac_isdel(h->flags, i)) site = i; \
|
||||||
|
i = (i + (++step)) & mask; \
|
||||||
|
if (i == last) { x = site; break; } \
|
||||||
|
} \
|
||||||
|
if (x == h->n_buckets) { \
|
||||||
|
if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \
|
||||||
|
else x = i; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
if (__ac_isempty(h->flags, x)) { /* not present at all */ \
|
||||||
|
h->keys[x] = key; \
|
||||||
|
__ac_set_isboth_false(h->flags, x); \
|
||||||
|
++h->size; ++h->n_occupied; \
|
||||||
|
*ret = 1; \
|
||||||
|
} else if (__ac_isdel(h->flags, x)) { /* deleted */ \
|
||||||
|
h->keys[x] = key; \
|
||||||
|
__ac_set_isboth_false(h->flags, x); \
|
||||||
|
++h->size; \
|
||||||
|
*ret = 2; \
|
||||||
|
} else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \
|
||||||
|
return x; \
|
||||||
|
} \
|
||||||
|
SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \
|
||||||
|
{ \
|
||||||
|
if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \
|
||||||
|
__ac_set_isdel_true(h->flags, x); \
|
||||||
|
--h->size; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define KHASH_DECLARE(name, khkey_t, khval_t) \
|
||||||
|
__KHASH_TYPE(name, khkey_t, khval_t) \
|
||||||
|
__KHASH_PROTOTYPES(name, khkey_t, khval_t)
|
||||||
|
|
||||||
|
#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
|
||||||
|
__KHASH_TYPE(name, khkey_t, khval_t) \
|
||||||
|
__KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
|
||||||
|
|
||||||
|
#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
|
||||||
|
KHASH_INIT2(name, static kh_inline klib_unused, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
|
||||||
|
|
||||||
|
/* --- BEGIN OF HASH FUNCTIONS --- */
|
||||||
|
|
||||||
|
/*! @function
|
||||||
|
@abstract Integer hash function
|
||||||
|
@param key The integer [khint32_t]
|
||||||
|
@return The hash value [khint_t]
|
||||||
|
*/
|
||||||
|
#define kh_int_hash_func(key) (khint32_t)(key)
|
||||||
|
/*! @function
|
||||||
|
@abstract Integer comparison function
|
||||||
|
*/
|
||||||
|
#define kh_int_hash_equal(a, b) ((a) == (b))
|
||||||
|
/*! @function
|
||||||
|
@abstract 64-bit integer hash function
|
||||||
|
@param key The integer [khint64_t]
|
||||||
|
@return The hash value [khint_t]
|
||||||
|
*/
|
||||||
|
#define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11)
|
||||||
|
/*! @function
|
||||||
|
@abstract 64-bit integer comparison function
|
||||||
|
*/
|
||||||
|
#define kh_int64_hash_equal(a, b) ((a) == (b))
|
||||||
|
/*! @function
|
||||||
|
@abstract const char* hash function
|
||||||
|
@param s Pointer to a null terminated string
|
||||||
|
@return The hash value
|
||||||
|
*/
|
||||||
|
static kh_inline khint_t __ac_X31_hash_string(const char *s)
|
||||||
|
{
|
||||||
|
khint_t h = (khint_t)*s;
|
||||||
|
if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s;
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
/*! @function
|
||||||
|
@abstract Another interface to const char* hash function
|
||||||
|
@param key Pointer to a null terminated string [const char*]
|
||||||
|
@return The hash value [khint_t]
|
||||||
|
*/
|
||||||
|
#define kh_str_hash_func(key) __ac_X31_hash_string(key)
|
||||||
|
/*! @function
|
||||||
|
@abstract Const char* comparison function
|
||||||
|
*/
|
||||||
|
#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
|
||||||
|
|
||||||
|
static kh_inline khint_t __ac_Wang_hash(khint_t key)
|
||||||
|
{
|
||||||
|
key += ~(key << 15);
|
||||||
|
key ^= (key >> 10);
|
||||||
|
key += (key << 3);
|
||||||
|
key ^= (key >> 6);
|
||||||
|
key += ~(key << 11);
|
||||||
|
key ^= (key >> 16);
|
||||||
|
return key;
|
||||||
|
}
|
||||||
|
#define kh_int_hash_func2(key) __ac_Wang_hash((khint_t)key)
|
||||||
|
|
||||||
|
/* --- END OF HASH FUNCTIONS --- */
|
||||||
|
|
||||||
|
/* Other convenient macros... */
|
||||||
|
|
||||||
|
/*!
|
||||||
|
@abstract Type of the hash table.
|
||||||
|
@param name Name of the hash table [symbol]
|
||||||
|
*/
|
||||||
|
#define khash_t(name) kh_##name##_t
|
||||||
|
|
||||||
|
/*! @function
|
||||||
|
@abstract Initiate a hash table.
|
||||||
|
@param name Name of the hash table [symbol]
|
||||||
|
@return Pointer to the hash table [khash_t(name)*]
|
||||||
|
*/
|
||||||
|
#define kh_init(name) kh_init_##name()
|
||||||
|
|
||||||
|
/*! @function
|
||||||
|
@abstract Destroy a hash table.
|
||||||
|
@param name Name of the hash table [symbol]
|
||||||
|
@param h Pointer to the hash table [khash_t(name)*]
|
||||||
|
*/
|
||||||
|
#define kh_destroy(name, h) kh_destroy_##name(h)
|
||||||
|
|
||||||
|
/*! @function
|
||||||
|
@abstract Reset a hash table without deallocating memory.
|
||||||
|
@param name Name of the hash table [symbol]
|
||||||
|
@param h Pointer to the hash table [khash_t(name)*]
|
||||||
|
*/
|
||||||
|
#define kh_clear(name, h) kh_clear_##name(h)
|
||||||
|
|
||||||
|
/*! @function
|
||||||
|
@abstract Resize a hash table.
|
||||||
|
@param name Name of the hash table [symbol]
|
||||||
|
@param h Pointer to the hash table [khash_t(name)*]
|
||||||
|
@param s New size [khint_t]
|
||||||
|
*/
|
||||||
|
#define kh_resize(name, h, s) kh_resize_##name(h, s)
|
||||||
|
|
||||||
|
/*! @function
|
||||||
|
@abstract Insert a key to the hash table.
|
||||||
|
@param name Name of the hash table [symbol]
|
||||||
|
@param h Pointer to the hash table [khash_t(name)*]
|
||||||
|
@param k Key [type of keys]
|
||||||
|
@param r Extra return code: -1 if the operation failed;
|
||||||
|
0 if the key is present in the hash table;
|
||||||
|
1 if the bucket is empty (never used); 2 if the element in
|
||||||
|
the bucket has been deleted [int*]
|
||||||
|
@return Iterator to the inserted element [khint_t]
|
||||||
|
*/
|
||||||
|
#define kh_put(name, h, k, r) kh_put_##name(h, k, r)
|
||||||
|
|
||||||
|
/*! @function
|
||||||
|
@abstract Retrieve a key from the hash table.
|
||||||
|
@param name Name of the hash table [symbol]
|
||||||
|
@param h Pointer to the hash table [khash_t(name)*]
|
||||||
|
@param k Key [type of keys]
|
||||||
|
@return Iterator to the found element, or kh_end(h) if the element is absent [khint_t]
|
||||||
|
*/
|
||||||
|
#define kh_get(name, h, k) kh_get_##name(h, k)
|
||||||
|
|
||||||
|
/*! @function
|
||||||
|
@abstract Remove a key from the hash table.
|
||||||
|
@param name Name of the hash table [symbol]
|
||||||
|
@param h Pointer to the hash table [khash_t(name)*]
|
||||||
|
@param k Iterator to the element to be deleted [khint_t]
|
||||||
|
*/
|
||||||
|
#define kh_del(name, h, k) kh_del_##name(h, k)
|
||||||
|
|
||||||
|
/*! @function
|
||||||
|
@abstract Test whether a bucket contains data.
|
||||||
|
@param h Pointer to the hash table [khash_t(name)*]
|
||||||
|
@param x Iterator to the bucket [khint_t]
|
||||||
|
@return 1 if containing data; 0 otherwise [int]
|
||||||
|
*/
|
||||||
|
#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
|
||||||
|
|
||||||
|
/*! @function
|
||||||
|
@abstract Get key given an iterator
|
||||||
|
@param h Pointer to the hash table [khash_t(name)*]
|
||||||
|
@param x Iterator to the bucket [khint_t]
|
||||||
|
@return Key [type of keys]
|
||||||
|
*/
|
||||||
|
#define kh_key(h, x) ((h)->keys[x])
|
||||||
|
|
||||||
|
/*! @function
|
||||||
|
@abstract Get value given an iterator
|
||||||
|
@param h Pointer to the hash table [khash_t(name)*]
|
||||||
|
@param x Iterator to the bucket [khint_t]
|
||||||
|
@return Value [type of values]
|
||||||
|
@discussion For hash sets, calling this results in segfault.
|
||||||
|
*/
|
||||||
|
#define kh_val(h, x) ((h)->vals[x])
|
||||||
|
|
||||||
|
/*! @function
|
||||||
|
@abstract Alias of kh_val()
|
||||||
|
*/
|
||||||
|
#define kh_value(h, x) ((h)->vals[x])
|
||||||
|
|
||||||
|
/*! @function
|
||||||
|
@abstract Get the start iterator
|
||||||
|
@param h Pointer to the hash table [khash_t(name)*]
|
||||||
|
@return The start iterator [khint_t]
|
||||||
|
*/
|
||||||
|
#define kh_begin(h) (khint_t)(0)
|
||||||
|
|
||||||
|
/*! @function
|
||||||
|
@abstract Get the end iterator
|
||||||
|
@param h Pointer to the hash table [khash_t(name)*]
|
||||||
|
@return The end iterator [khint_t]
|
||||||
|
*/
|
||||||
|
#define kh_end(h) ((h)->n_buckets)
|
||||||
|
|
||||||
|
/*! @function
|
||||||
|
@abstract Get the number of elements in the hash table
|
||||||
|
@param h Pointer to the hash table [khash_t(name)*]
|
||||||
|
@return Number of elements in the hash table [khint_t]
|
||||||
|
*/
|
||||||
|
#define kh_size(h) ((h)->size)
|
||||||
|
|
||||||
|
/*! @function
|
||||||
|
@abstract Get the number of buckets in the hash table
|
||||||
|
@param h Pointer to the hash table [khash_t(name)*]
|
||||||
|
@return Number of buckets in the hash table [khint_t]
|
||||||
|
*/
|
||||||
|
#define kh_n_buckets(h) ((h)->n_buckets)
|
||||||
|
|
||||||
|
/*! @function
|
||||||
|
@abstract Iterate over the entries in the hash table
|
||||||
|
@param h Pointer to the hash table [khash_t(name)*]
|
||||||
|
@param kvar Variable to which key will be assigned
|
||||||
|
@param vvar Variable to which value will be assigned
|
||||||
|
@param code Block of code to execute
|
||||||
|
*/
|
||||||
|
#define kh_foreach(h, kvar, vvar, code) { khint_t __i; \
|
||||||
|
for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
|
||||||
|
if (!kh_exist(h,__i)) continue; \
|
||||||
|
(kvar) = kh_key(h,__i); \
|
||||||
|
(vvar) = kh_val(h,__i); \
|
||||||
|
code; \
|
||||||
|
} }
|
||||||
|
|
||||||
|
/*! @function
|
||||||
|
@abstract Iterate over the values in the hash table
|
||||||
|
@param h Pointer to the hash table [khash_t(name)*]
|
||||||
|
@param vvar Variable to which value will be assigned
|
||||||
|
@param code Block of code to execute
|
||||||
|
*/
|
||||||
|
#define kh_foreach_value(h, vvar, code) { khint_t __i; \
|
||||||
|
for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
|
||||||
|
if (!kh_exist(h,__i)) continue; \
|
||||||
|
(vvar) = kh_val(h,__i); \
|
||||||
|
code; \
|
||||||
|
} }
|
||||||
|
|
||||||
|
/* More convenient interfaces */
|
||||||
|
|
||||||
|
/*! @function
|
||||||
|
@abstract Instantiate a hash set containing integer keys
|
||||||
|
@param name Name of the hash table [symbol]
|
||||||
|
*/
|
||||||
|
#define KHASH_SET_INIT_INT(name) \
|
||||||
|
KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
|
||||||
|
|
||||||
|
/*! @function
|
||||||
|
@abstract Instantiate a hash map containing integer keys
|
||||||
|
@param name Name of the hash table [symbol]
|
||||||
|
@param khval_t Type of values [type]
|
||||||
|
*/
|
||||||
|
#define KHASH_MAP_INIT_INT(name, khval_t) \
|
||||||
|
KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
|
||||||
|
|
||||||
|
/*! @function
|
||||||
|
@abstract Instantiate a hash set containing 64-bit integer keys
|
||||||
|
@param name Name of the hash table [symbol]
|
||||||
|
*/
|
||||||
|
#define KHASH_SET_INIT_INT64(name) \
|
||||||
|
KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
|
||||||
|
|
||||||
|
/*! @function
|
||||||
|
@abstract Instantiate a hash map containing 64-bit integer keys
|
||||||
|
@param name Name of the hash table [symbol]
|
||||||
|
@param khval_t Type of values [type]
|
||||||
|
*/
|
||||||
|
#define KHASH_MAP_INIT_INT64(name, khval_t) \
|
||||||
|
KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
|
||||||
|
|
||||||
|
typedef const char *kh_cstr_t;
|
||||||
|
/*! @function
|
||||||
|
@abstract Instantiate a hash map containing const char* keys
|
||||||
|
@param name Name of the hash table [symbol]
|
||||||
|
*/
|
||||||
|
#define KHASH_SET_INIT_STR(name) \
|
||||||
|
KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
|
||||||
|
|
||||||
|
/*! @function
|
||||||
|
@abstract Instantiate a hash map containing const char* keys
|
||||||
|
@param name Name of the hash table [symbol]
|
||||||
|
@param khval_t Type of values [type]
|
||||||
|
*/
|
||||||
|
#define KHASH_MAP_INIT_STR(name, khval_t) \
|
||||||
|
KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
|
||||||
|
|
||||||
|
#endif /* __AC_KHASH_H */
|
||||||
|
|
@ -0,0 +1,446 @@
|
||||||
|
/* The MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2019-2024 by Attractive Chaos <attractor@live.co.uk>
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of this software and associated documentation files (the
|
||||||
|
"Software"), to deal in the Software without restriction, including
|
||||||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be
|
||||||
|
included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||||
|
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||||
|
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __AC_KHASHL_H
|
||||||
|
#define __AC_KHASHL_H
|
||||||
|
|
||||||
|
#define AC_VERSION_KHASHL_H "r20"
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <limits.h>
|
||||||
|
|
||||||
|
/************************************
|
||||||
|
* Compiler specific configurations *
|
||||||
|
************************************/
|
||||||
|
|
||||||
|
#if UINT_MAX == 0xffffffffu
|
||||||
|
typedef unsigned int khint32_t;
|
||||||
|
#elif ULONG_MAX == 0xffffffffu
|
||||||
|
typedef unsigned long khint32_t;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if ULONG_MAX == ULLONG_MAX
|
||||||
|
typedef unsigned long khint64_t;
|
||||||
|
#else
|
||||||
|
typedef unsigned long long khint64_t;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef kh_inline
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#define kh_inline __inline
|
||||||
|
#else
|
||||||
|
#define kh_inline inline
|
||||||
|
#endif
|
||||||
|
#endif /* kh_inline */
|
||||||
|
|
||||||
|
#ifndef klib_unused
|
||||||
|
#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3)
|
||||||
|
#define klib_unused __attribute__ ((__unused__))
|
||||||
|
#else
|
||||||
|
#define klib_unused
|
||||||
|
#endif
|
||||||
|
#endif /* klib_unused */
|
||||||
|
|
||||||
|
#define KH_LOCAL static kh_inline klib_unused
|
||||||
|
|
||||||
|
typedef khint32_t khint_t;
|
||||||
|
typedef const char *kh_cstr_t;
|
||||||
|
|
||||||
|
/***********************
|
||||||
|
* Configurable macros *
|
||||||
|
***********************/
|
||||||
|
|
||||||
|
#ifndef kh_max_count
|
||||||
|
#define kh_max_count(cap) (((cap)>>1) + ((cap)>>2)) /* default load factor: 75% */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef kh_packed
|
||||||
|
#define kh_packed __attribute__ ((__packed__))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef kcalloc
|
||||||
|
#define kcalloc(N,Z) calloc(N,Z)
|
||||||
|
#endif
|
||||||
|
#ifndef kmalloc
|
||||||
|
#define kmalloc(Z) malloc(Z)
|
||||||
|
#endif
|
||||||
|
#ifndef krealloc
|
||||||
|
#define krealloc(P,Z) realloc(P,Z)
|
||||||
|
#endif
|
||||||
|
#ifndef kfree
|
||||||
|
#define kfree(P) free(P)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/****************************
|
||||||
|
* Simple private functions *
|
||||||
|
****************************/
|
||||||
|
|
||||||
|
#define __kh_used(flag, i) (flag[i>>5] >> (i&0x1fU) & 1U)
|
||||||
|
#define __kh_set_used(flag, i) (flag[i>>5] |= 1U<<(i&0x1fU))
|
||||||
|
#define __kh_set_unused(flag, i) (flag[i>>5] &= ~(1U<<(i&0x1fU)))
|
||||||
|
|
||||||
|
#define __kh_fsize(m) ((m) < 32? 1 : (m)>>5)
|
||||||
|
|
||||||
|
static kh_inline khint_t __kh_h2b(khint_t hash, khint_t bits) { return hash * 2654435769U >> (32 - bits); }
|
||||||
|
|
||||||
|
/*******************
|
||||||
|
* Hash table base *
|
||||||
|
*******************/
|
||||||
|
|
||||||
|
#define __KHASHL_TYPE(HType, khkey_t) \
|
||||||
|
typedef struct HType { \
|
||||||
|
khint_t bits, count; \
|
||||||
|
khint32_t *used; \
|
||||||
|
khkey_t *keys; \
|
||||||
|
} HType;
|
||||||
|
|
||||||
|
#define __KHASHL_PROTOTYPES(HType, prefix, khkey_t) \
|
||||||
|
extern HType *prefix##_init(void); \
|
||||||
|
extern void prefix##_destroy(HType *h); \
|
||||||
|
extern void prefix##_clear(HType *h); \
|
||||||
|
extern khint_t prefix##_getp(const HType *h, const khkey_t *key); \
|
||||||
|
extern int prefix##_resize(HType *h, khint_t new_n_buckets); \
|
||||||
|
extern khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent); \
|
||||||
|
extern void prefix##_del(HType *h, khint_t k);
|
||||||
|
|
||||||
|
#define __KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \
|
||||||
|
SCOPE HType *prefix##_init(void) { \
|
||||||
|
return (HType*)kcalloc(1, sizeof(HType)); \
|
||||||
|
} \
|
||||||
|
SCOPE void prefix##_destroy(HType *h) { \
|
||||||
|
if (!h) return; \
|
||||||
|
kfree((void *)h->keys); kfree(h->used); \
|
||||||
|
kfree(h); \
|
||||||
|
} \
|
||||||
|
SCOPE void prefix##_clear(HType *h) { \
|
||||||
|
if (h && h->used) { \
|
||||||
|
khint_t n_buckets = (khint_t)1U << h->bits; \
|
||||||
|
memset(h->used, 0, __kh_fsize(n_buckets) * sizeof(khint32_t)); \
|
||||||
|
h->count = 0; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define __KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
|
||||||
|
SCOPE khint_t prefix##_getp_core(const HType *h, const khkey_t *key, khint_t hash) { \
|
||||||
|
khint_t i, last, n_buckets, mask; \
|
||||||
|
if (h->keys == 0) return 0; \
|
||||||
|
n_buckets = (khint_t)1U << h->bits; \
|
||||||
|
mask = n_buckets - 1U; \
|
||||||
|
i = last = __kh_h2b(hash, h->bits); \
|
||||||
|
while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \
|
||||||
|
i = (i + 1U) & mask; \
|
||||||
|
if (i == last) return n_buckets; \
|
||||||
|
} \
|
||||||
|
return !__kh_used(h->used, i)? n_buckets : i; \
|
||||||
|
} \
|
||||||
|
SCOPE khint_t prefix##_getp(const HType *h, const khkey_t *key) { return prefix##_getp_core(h, key, __hash_fn(*key)); } \
|
||||||
|
SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { return prefix##_getp_core(h, &key, __hash_fn(key)); }
|
||||||
|
|
||||||
|
#define __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
|
||||||
|
SCOPE int prefix##_resize(HType *h, khint_t new_n_buckets) { \
|
||||||
|
khint32_t *new_used = 0; \
|
||||||
|
khint_t j = 0, x = new_n_buckets, n_buckets, new_bits, new_mask; \
|
||||||
|
while ((x >>= 1) != 0) ++j; \
|
||||||
|
if (new_n_buckets & (new_n_buckets - 1)) ++j; \
|
||||||
|
new_bits = j > 2? j : 2; \
|
||||||
|
new_n_buckets = (khint_t)1U << new_bits; \
|
||||||
|
if (h->count > kh_max_count(new_n_buckets)) return 0; /* requested size is too small */ \
|
||||||
|
new_used = (khint32_t*)kmalloc(__kh_fsize(new_n_buckets) * sizeof(khint32_t)); \
|
||||||
|
memset(new_used, 0, __kh_fsize(new_n_buckets) * sizeof(khint32_t)); \
|
||||||
|
if (!new_used) return -1; /* not enough memory */ \
|
||||||
|
n_buckets = h->keys? (khint_t)1U<<h->bits : 0U; \
|
||||||
|
if (n_buckets < new_n_buckets) { /* expand */ \
|
||||||
|
khkey_t *new_keys = (khkey_t*)krealloc((void*)h->keys, new_n_buckets * sizeof(khkey_t)); \
|
||||||
|
if (!new_keys) { kfree(new_used); return -1; } \
|
||||||
|
h->keys = new_keys; \
|
||||||
|
} /* otherwise shrink */ \
|
||||||
|
new_mask = new_n_buckets - 1; \
|
||||||
|
for (j = 0; j != n_buckets; ++j) { \
|
||||||
|
khkey_t key; \
|
||||||
|
if (!__kh_used(h->used, j)) continue; \
|
||||||
|
key = h->keys[j]; \
|
||||||
|
__kh_set_unused(h->used, j); \
|
||||||
|
while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
|
||||||
|
khint_t i; \
|
||||||
|
i = __kh_h2b(__hash_fn(key), new_bits); \
|
||||||
|
while (__kh_used(new_used, i)) i = (i + 1) & new_mask; \
|
||||||
|
__kh_set_used(new_used, i); \
|
||||||
|
if (i < n_buckets && __kh_used(h->used, i)) { /* kick out the existing element */ \
|
||||||
|
{ khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
|
||||||
|
__kh_set_unused(h->used, i); /* mark it as deleted in the old hash table */ \
|
||||||
|
} else { /* write the element and jump out of the loop */ \
|
||||||
|
h->keys[i] = key; \
|
||||||
|
break; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
if (n_buckets > new_n_buckets) /* shrink the hash table */ \
|
||||||
|
h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
|
||||||
|
kfree(h->used); /* free the working space */ \
|
||||||
|
h->used = new_used, h->bits = new_bits; \
|
||||||
|
return 0; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define __KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
|
||||||
|
SCOPE khint_t prefix##_putp_core(HType *h, const khkey_t *key, khint_t hash, int *absent) { \
|
||||||
|
khint_t n_buckets, i, last, mask; \
|
||||||
|
n_buckets = h->keys? (khint_t)1U<<h->bits : 0U; \
|
||||||
|
*absent = -1; \
|
||||||
|
if (h->count >= kh_max_count(n_buckets)) { /* rehashing */ \
|
||||||
|
if (prefix##_resize(h, n_buckets + 1U) < 0) \
|
||||||
|
return n_buckets; \
|
||||||
|
n_buckets = (khint_t)1U<<h->bits; \
|
||||||
|
} /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
|
||||||
|
mask = n_buckets - 1; \
|
||||||
|
i = last = __kh_h2b(hash, h->bits); \
|
||||||
|
while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \
|
||||||
|
i = (i + 1U) & mask; \
|
||||||
|
if (i == last) break; \
|
||||||
|
} \
|
||||||
|
if (!__kh_used(h->used, i)) { /* not present at all */ \
|
||||||
|
h->keys[i] = *key; \
|
||||||
|
__kh_set_used(h->used, i); \
|
||||||
|
++h->count; \
|
||||||
|
*absent = 1; \
|
||||||
|
} else *absent = 0; /* Don't touch h->keys[i] if present */ \
|
||||||
|
return i; \
|
||||||
|
} \
|
||||||
|
SCOPE khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent) { return prefix##_putp_core(h, key, __hash_fn(*key), absent); } \
|
||||||
|
SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { return prefix##_putp_core(h, &key, __hash_fn(key), absent); }
|
||||||
|
|
||||||
|
#define __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn) \
|
||||||
|
SCOPE int prefix##_del(HType *h, khint_t i) { \
|
||||||
|
khint_t j = i, k, mask, n_buckets; \
|
||||||
|
if (h->keys == 0) return 0; \
|
||||||
|
n_buckets = (khint_t)1U<<h->bits; \
|
||||||
|
mask = n_buckets - 1U; \
|
||||||
|
while (1) { \
|
||||||
|
j = (j + 1U) & mask; \
|
||||||
|
if (j == i || !__kh_used(h->used, j)) break; /* j==i only when the table is completely full */ \
|
||||||
|
k = __kh_h2b(__hash_fn(h->keys[j]), h->bits); \
|
||||||
|
if ((j > i && (k <= i || k > j)) || (j < i && (k <= i && k > j))) \
|
||||||
|
h->keys[i] = h->keys[j], i = j; \
|
||||||
|
} \
|
||||||
|
__kh_set_unused(h->used, i); \
|
||||||
|
--h->count; \
|
||||||
|
return 1; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define KHASHL_DECLARE(HType, prefix, khkey_t) \
|
||||||
|
__KHASHL_TYPE(HType, khkey_t) \
|
||||||
|
__KHASHL_PROTOTYPES(HType, prefix, khkey_t)
|
||||||
|
|
||||||
|
#define KHASHL_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
|
||||||
|
__KHASHL_TYPE(HType, khkey_t) \
|
||||||
|
__KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \
|
||||||
|
__KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
|
||||||
|
__KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
|
||||||
|
__KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
|
||||||
|
__KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn)
|
||||||
|
|
||||||
|
/***************************
|
||||||
|
* Ensemble of hash tables *
|
||||||
|
***************************/
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
khint_t sub, pos;
|
||||||
|
} kh_ensitr_t;
|
||||||
|
|
||||||
|
#define KHASHE_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
|
||||||
|
KHASHL_INIT(KH_LOCAL, HType##_sub, prefix##_sub, khkey_t, __hash_fn, __hash_eq) \
|
||||||
|
typedef struct HType { \
|
||||||
|
khint64_t count:54, bits:8; \
|
||||||
|
HType##_sub *sub; \
|
||||||
|
} HType; \
|
||||||
|
SCOPE HType *prefix##_init(int bits) { \
|
||||||
|
HType *g; \
|
||||||
|
g = (HType*)kcalloc(1, sizeof(*g)); \
|
||||||
|
g->bits = bits; \
|
||||||
|
g->sub = (HType##_sub*)kcalloc(1U<<bits, sizeof(*g->sub)); \
|
||||||
|
return g; \
|
||||||
|
} \
|
||||||
|
SCOPE void prefix##_destroy(HType *g) { \
|
||||||
|
int t; \
|
||||||
|
if (!g) return; \
|
||||||
|
for (t = 0; t < 1<<g->bits; ++t) { kfree((void*)g->sub[t].keys); kfree(g->sub[t].used); } \
|
||||||
|
kfree(g->sub); kfree(g); \
|
||||||
|
} \
|
||||||
|
SCOPE kh_ensitr_t prefix##_getp(const HType *g, const khkey_t *key) { \
|
||||||
|
khint_t hash, low, ret; \
|
||||||
|
kh_ensitr_t r; \
|
||||||
|
HType##_sub *h; \
|
||||||
|
hash = __hash_fn(*key); \
|
||||||
|
low = hash & ((1U<<g->bits) - 1); \
|
||||||
|
h = &g->sub[low]; \
|
||||||
|
ret = prefix##_sub_getp_core(h, key, hash); \
|
||||||
|
if (ret == kh_end(h)) r.sub = low, r.pos = (khint_t)-1; \
|
||||||
|
else r.sub = low, r.pos = ret; \
|
||||||
|
return r; \
|
||||||
|
} \
|
||||||
|
SCOPE kh_ensitr_t prefix##_get(const HType *g, const khkey_t key) { return prefix##_getp(g, &key); } \
|
||||||
|
SCOPE kh_ensitr_t prefix##_putp(HType *g, const khkey_t *key, int *absent) { \
|
||||||
|
khint_t hash, low, ret; \
|
||||||
|
kh_ensitr_t r; \
|
||||||
|
HType##_sub *h; \
|
||||||
|
hash = __hash_fn(*key); \
|
||||||
|
low = hash & ((1U<<g->bits) - 1); \
|
||||||
|
h = &g->sub[low]; \
|
||||||
|
ret = prefix##_sub_putp_core(h, key, hash, absent); \
|
||||||
|
if (*absent) ++g->count; \
|
||||||
|
r.sub = low, r.pos = ret; \
|
||||||
|
return r; \
|
||||||
|
} \
|
||||||
|
SCOPE kh_ensitr_t prefix##_put(HType *g, const khkey_t key, int *absent) { return prefix##_putp(g, &key, absent); } \
|
||||||
|
SCOPE int prefix##_del(HType *g, kh_ensitr_t itr) { \
|
||||||
|
HType##_sub *h = &g->sub[itr.sub]; \
|
||||||
|
int ret; \
|
||||||
|
ret = prefix##_sub_del(h, itr.pos); \
|
||||||
|
if (ret) --g->count; \
|
||||||
|
return ret; \
|
||||||
|
}
|
||||||
|
|
||||||
|
/*****************************
|
||||||
|
* More convenient interface *
|
||||||
|
*****************************/
|
||||||
|
|
||||||
|
#define __kh_cached_hash(x) ((x).hash)
|
||||||
|
|
||||||
|
#define KHASHL_SET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
|
||||||
|
KHASHL_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq)
|
||||||
|
|
||||||
|
#define KHASHL_MAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \
|
||||||
|
typedef struct { khkey_t key; kh_val_t val; } kh_packed HType##_m_bucket_t; \
|
||||||
|
static kh_inline khint_t prefix##_m_hash(HType##_m_bucket_t x) { return __hash_fn(x.key); } \
|
||||||
|
static kh_inline int prefix##_m_eq(HType##_m_bucket_t x, HType##_m_bucket_t y) { return __hash_eq(x.key, y.key); } \
|
||||||
|
KHASHL_INIT(KH_LOCAL, HType, prefix##_m, HType##_m_bucket_t, prefix##_m_hash, prefix##_m_eq) \
|
||||||
|
SCOPE HType *prefix##_init(void) { return prefix##_m_init(); } \
|
||||||
|
SCOPE void prefix##_destroy(HType *h) { prefix##_m_destroy(h); } \
|
||||||
|
SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_m_bucket_t t; t.key = key; return prefix##_m_getp(h, &t); } \
|
||||||
|
SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_m_del(h, k); } \
|
||||||
|
SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); }
|
||||||
|
|
||||||
|
#define KHASHL_CSET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
|
||||||
|
typedef struct { khkey_t key; khint_t hash; } kh_packed HType##_cs_bucket_t; \
|
||||||
|
static kh_inline int prefix##_cs_eq(HType##_cs_bucket_t x, HType##_cs_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \
|
||||||
|
KHASHL_INIT(KH_LOCAL, HType, prefix##_cs, HType##_cs_bucket_t, __kh_cached_hash, prefix##_cs_eq) \
|
||||||
|
SCOPE HType *prefix##_init(void) { return prefix##_cs_init(); } \
|
||||||
|
SCOPE void prefix##_destroy(HType *h) { prefix##_cs_destroy(h); } \
|
||||||
|
SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cs_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cs_getp(h, &t); } \
|
||||||
|
SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cs_del(h, k); } \
|
||||||
|
SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cs_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cs_putp(h, &t, absent); }
|
||||||
|
|
||||||
|
#define KHASHL_CMAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \
|
||||||
|
typedef struct { khkey_t key; kh_val_t val; khint_t hash; } kh_packed HType##_cm_bucket_t; \
|
||||||
|
static kh_inline int prefix##_cm_eq(HType##_cm_bucket_t x, HType##_cm_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \
|
||||||
|
KHASHL_INIT(KH_LOCAL, HType, prefix##_cm, HType##_cm_bucket_t, __kh_cached_hash, prefix##_cm_eq) \
|
||||||
|
SCOPE HType *prefix##_init(void) { return prefix##_cm_init(); } \
|
||||||
|
SCOPE void prefix##_destroy(HType *h) { prefix##_cm_destroy(h); } \
|
||||||
|
SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cm_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cm_getp(h, &t); } \
|
||||||
|
SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cm_del(h, k); } \
|
||||||
|
SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cm_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cm_putp(h, &t, absent); }
|
||||||
|
|
||||||
|
#define KHASHE_SET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
|
||||||
|
KHASHE_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq)
|
||||||
|
|
||||||
|
#define KHASHE_MAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \
|
||||||
|
typedef struct { khkey_t key; kh_val_t val; } kh_packed HType##_m_bucket_t; \
|
||||||
|
static kh_inline khint_t prefix##_m_hash(HType##_m_bucket_t x) { return __hash_fn(x.key); } \
|
||||||
|
static kh_inline int prefix##_m_eq(HType##_m_bucket_t x, HType##_m_bucket_t y) { return __hash_eq(x.key, y.key); } \
|
||||||
|
KHASHE_INIT(KH_LOCAL, HType, prefix##_m, HType##_m_bucket_t, prefix##_m_hash, prefix##_m_eq) \
|
||||||
|
SCOPE HType *prefix##_init(int bits) { return prefix##_m_init(bits); } \
|
||||||
|
SCOPE void prefix##_destroy(HType *h) { prefix##_m_destroy(h); } \
|
||||||
|
SCOPE kh_ensitr_t prefix##_get(const HType *h, khkey_t key) { HType##_m_bucket_t t; t.key = key; return prefix##_m_getp(h, &t); } \
|
||||||
|
SCOPE int prefix##_del(HType *h, kh_ensitr_t k) { return prefix##_m_del(h, k); } \
|
||||||
|
SCOPE kh_ensitr_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); }
|
||||||
|
|
||||||
|
/**************************
|
||||||
|
* Public macro functions *
|
||||||
|
**************************/
|
||||||
|
|
||||||
|
#define kh_bucket(h, x) ((h)->keys[x])
|
||||||
|
#define kh_size(h) ((h)->count)
|
||||||
|
#define kh_capacity(h) ((h)->keys? 1U<<(h)->bits : 0U)
|
||||||
|
#define kh_end(h) kh_capacity(h)
|
||||||
|
|
||||||
|
#define kh_key(h, x) ((h)->keys[x].key)
|
||||||
|
#define kh_val(h, x) ((h)->keys[x].val)
|
||||||
|
#define kh_exist(h, x) __kh_used((h)->used, (x))
|
||||||
|
|
||||||
|
#define kh_foreach(h, x) for ((x) = 0; (x) != kh_end(h); ++(x)) if (kh_exist((h), (x)))
|
||||||
|
|
||||||
|
#define kh_ens_key(g, x) kh_key(&(g)->sub[(x).sub], (x).pos)
|
||||||
|
#define kh_ens_val(g, x) kh_val(&(g)->sub[(x).sub], (x).pos)
|
||||||
|
#define kh_ens_exist(g, x) kh_exist(&(g)->sub[(x).sub], (x).pos)
|
||||||
|
#define kh_ens_is_end(x) ((x).pos == (khint_t)-1)
|
||||||
|
#define kh_ens_size(g) ((g)->count)
|
||||||
|
|
||||||
|
#define kh_ens_foreach(g, x) for ((x).sub = 0; (x).sub != 1<<(g)->bits; ++(x).sub) for ((x).pos = 0; (x).pos != kh_end(&(g)->sub[(x).sub]); ++(x).pos) if (kh_ens_exist((g), (x)))
|
||||||
|
|
||||||
|
/**************************************
|
||||||
|
* Common hash and equality functions *
|
||||||
|
**************************************/
|
||||||
|
|
||||||
|
#define kh_eq_generic(a, b) ((a) == (b))
|
||||||
|
#define kh_eq_str(a, b) (strcmp((a), (b)) == 0)
|
||||||
|
#define kh_hash_dummy(x) ((khint_t)(x))
|
||||||
|
|
||||||
|
static kh_inline khint_t kh_hash_uint32(khint_t x) { /* murmur finishing */
|
||||||
|
x ^= x >> 16;
|
||||||
|
x *= 0x85ebca6bU;
|
||||||
|
x ^= x >> 13;
|
||||||
|
x *= 0xc2b2ae35U;
|
||||||
|
x ^= x >> 16;
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
static kh_inline khint_t kh_hash_uint64(khint64_t x) { /* splitmix64; see https://nullprogram.com/blog/2018/07/31/ for inversion */
|
||||||
|
x ^= x >> 30;
|
||||||
|
x *= 0xbf58476d1ce4e5b9ULL;
|
||||||
|
x ^= x >> 27;
|
||||||
|
x *= 0x94d049bb133111ebULL;
|
||||||
|
x ^= x >> 31;
|
||||||
|
return (khint_t)x;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define KH_FNV_SEED 11
|
||||||
|
|
||||||
|
static kh_inline khint_t kh_hash_str(kh_cstr_t s) { /* FNV1a */
|
||||||
|
khint_t h = KH_FNV_SEED ^ 2166136261U;
|
||||||
|
const unsigned char *t = (const unsigned char*)s;
|
||||||
|
for (; *t; ++t)
|
||||||
|
h ^= *t, h *= 16777619;
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
static kh_inline khint_t kh_hash_bytes(int len, const unsigned char *s) {
|
||||||
|
khint_t h = KH_FNV_SEED ^ 2166136261U;
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < len; ++i)
|
||||||
|
h ^= s[i], h *= 16777619;
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* __AC_KHASHL_H */
|
||||||
|
|
@ -0,0 +1,423 @@
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "khmm.h"
|
||||||
|
|
||||||
|
// new/delete hmm_par_t
|
||||||
|
|
||||||
|
hmm_par_t *hmm_new_par(int m, int n)
|
||||||
|
{
|
||||||
|
hmm_par_t *hp;
|
||||||
|
int i;
|
||||||
|
assert(m > 0 && n > 0);
|
||||||
|
hp = (hmm_par_t*)calloc(1, sizeof(hmm_par_t));
|
||||||
|
hp->m = m; hp->n = n;
|
||||||
|
hp->a0 = (FLOAT*)calloc(n, sizeof(FLOAT));
|
||||||
|
hp->a = (FLOAT**)calloc2(n, n, sizeof(FLOAT));
|
||||||
|
hp->e = (FLOAT**)calloc2(m + 1, n, sizeof(FLOAT));
|
||||||
|
hp->ae = (FLOAT**)calloc2((m + 1) * n, n, sizeof(FLOAT));
|
||||||
|
for (i = 0; i != n; ++i) hp->e[m][i] = 1.0;
|
||||||
|
return hp;
|
||||||
|
}
|
||||||
|
void hmm_delete_par(hmm_par_t *hp)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
if (hp == 0) return;
|
||||||
|
for (i = 0; i != hp->n; ++i) free(hp->a[i]);
|
||||||
|
for (i = 0; i <= hp->m; ++i) free(hp->e[i]);
|
||||||
|
for (i = 0; i < (hp->m + 1) * hp->n; ++i) free(hp->ae[i]);
|
||||||
|
free(hp->a); free(hp->e); free(hp->a0); free(hp->ae);
|
||||||
|
free(hp);
|
||||||
|
}
|
||||||
|
|
||||||
|
// new/delete hmm_data_t
|
||||||
|
|
||||||
|
hmm_data_t *hmm_new_data(int L, const char *seq, const hmm_par_t *hp)
|
||||||
|
{
|
||||||
|
hmm_data_t *hd;
|
||||||
|
hd = (hmm_data_t*)calloc(1, sizeof(hmm_data_t));
|
||||||
|
hd->L = L;
|
||||||
|
hd->seq = (char*)malloc(L + 1);
|
||||||
|
memcpy(hd->seq + 1, seq, L);
|
||||||
|
return hd;
|
||||||
|
}
|
||||||
|
void hmm_delete_data(hmm_data_t *hd)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
if (hd == 0) return;
|
||||||
|
for (i = 0; i <= hd->L; ++i) {
|
||||||
|
if (hd->f) free(hd->f[i]);
|
||||||
|
if (hd->b) free(hd->b[i]);
|
||||||
|
}
|
||||||
|
free(hd->f); free(hd->b); free(hd->s); free(hd->v); free(hd->p); free(hd->seq);
|
||||||
|
free(hd);
|
||||||
|
}
|
||||||
|
|
||||||
|
// new/delete hmm_exp_t
|
||||||
|
|
||||||
|
hmm_exp_t *hmm_new_exp(const hmm_par_t *hp)
|
||||||
|
{
|
||||||
|
hmm_exp_t *he;
|
||||||
|
assert(hp);
|
||||||
|
he = (hmm_exp_t*)calloc(1, sizeof(hmm_exp_t));
|
||||||
|
he->m = hp->m; he->n = hp->n;
|
||||||
|
he->A0 = (FLOAT*)calloc(hp->n, sizeof(FLOAT));
|
||||||
|
he->A = (FLOAT**)calloc2(hp->n, hp->n, sizeof(FLOAT));
|
||||||
|
he->E = (FLOAT**)calloc2(hp->m + 1, hp->n, sizeof(FLOAT));
|
||||||
|
return he;
|
||||||
|
}
|
||||||
|
void hmm_delete_exp(hmm_exp_t *he)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
if (he == 0) return;
|
||||||
|
for (i = 0; i != he->n; ++i) free(he->A[i]);
|
||||||
|
for (i = 0; i <= he->m; ++i) free(he->E[i]);
|
||||||
|
free(he->A); free(he->E); free(he->A0);
|
||||||
|
free(he);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Viterbi algorithm
|
||||||
|
|
||||||
|
FLOAT hmm_Viterbi(const hmm_par_t *hp, hmm_data_t *hd)
|
||||||
|
{
|
||||||
|
FLOAT **la, **le, *preV, *curV, max;
|
||||||
|
int **Vmax, max_l; // backtrace matrix
|
||||||
|
int k, l, b, u;
|
||||||
|
|
||||||
|
if (hd->v) free(hd->v);
|
||||||
|
hd->v = (int*)calloc(hd->L+1, sizeof(int));
|
||||||
|
la = (FLOAT**)calloc2(hp->n, hp->n, sizeof(FLOAT));
|
||||||
|
le = (FLOAT**)calloc2(hp->m + 1, hp->n, sizeof(FLOAT));
|
||||||
|
Vmax = (int**)calloc2(hd->L+1, hp->n, sizeof(int));
|
||||||
|
preV = (FLOAT*)malloc(sizeof(FLOAT) * hp->n);
|
||||||
|
curV = (FLOAT*)malloc(sizeof(FLOAT) * hp->n);
|
||||||
|
for (k = 0; k != hp->n; ++k)
|
||||||
|
for (l = 0; l != hp->n; ++l)
|
||||||
|
la[k][l] = log(hp->a[l][k]); // this is not a bug
|
||||||
|
for (b = 0; b != hp->m; ++b)
|
||||||
|
for (k = 0; k != hp->n; ++k)
|
||||||
|
le[b][k] = log(hp->e[b][k]);
|
||||||
|
for (k = 0; k != hp->n; ++k) le[hp->m][k] = 0.0;
|
||||||
|
// V_k(1)
|
||||||
|
for (k = 0; k != hp->n; ++k) {
|
||||||
|
preV[k] = le[(int)hd->seq[1]][k] + log(hp->a0[k]);
|
||||||
|
Vmax[1][k] = 0;
|
||||||
|
}
|
||||||
|
// all the rest
|
||||||
|
for (u = 2; u <= hd->L; ++u) {
|
||||||
|
FLOAT *tmp, *leu = le[(int)hd->seq[u]];
|
||||||
|
for (k = 0; k != hp->n; ++k) {
|
||||||
|
FLOAT *laa = la[k];
|
||||||
|
for (l = 0, max = -HMM_INF, max_l = -1; l != hp->n; ++l) {
|
||||||
|
if (max < preV[l] + laa[l]) {
|
||||||
|
max = preV[l] + laa[l];
|
||||||
|
max_l = l;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert(max_l >= 0); // cannot be zero
|
||||||
|
curV[k] = leu[k] + max;
|
||||||
|
Vmax[u][k] = max_l;
|
||||||
|
}
|
||||||
|
tmp = curV; curV = preV; preV = tmp; // swap
|
||||||
|
}
|
||||||
|
// backtrace
|
||||||
|
for (k = 0, max_l = -1, max = -HMM_INF; k != hp->n; ++k) {
|
||||||
|
if (max < preV[k]) {
|
||||||
|
max = preV[k]; max_l = k;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert(max_l >= 0); // cannot be zero
|
||||||
|
hd->v[hd->L] = max_l;
|
||||||
|
for (u = hd->L; u >= 1; --u)
|
||||||
|
hd->v[u-1] = Vmax[u][hd->v[u]];
|
||||||
|
for (k = 0; k != hp->n; ++k) free(la[k]);
|
||||||
|
for (b = 0; b < hp->m; ++b) free(le[b]);
|
||||||
|
for (u = 0; u <= hd->L; ++u) free(Vmax[u]);
|
||||||
|
free(la); free(le); free(Vmax); free(preV); free(curV);
|
||||||
|
hd->status |= HMM_VITERBI;
|
||||||
|
return max;
|
||||||
|
}
|
||||||
|
|
||||||
|
// forward algorithm
|
||||||
|
|
||||||
|
void hmm_forward(const hmm_par_t *hp, hmm_data_t *hd)
|
||||||
|
{
|
||||||
|
FLOAT sum, tmp, **at;
|
||||||
|
int u, k, l;
|
||||||
|
int n, m, L;
|
||||||
|
assert(hp && hd);
|
||||||
|
// allocate memory for hd->f and hd->s
|
||||||
|
n = hp->n; m = hp->m; L = hd->L;
|
||||||
|
if (hd->s) free(hd->s);
|
||||||
|
if (hd->f) {
|
||||||
|
for (k = 0; k <= hd->L; ++k) free(hd->f[k]);
|
||||||
|
free(hd->f);
|
||||||
|
}
|
||||||
|
hd->f = (FLOAT**)calloc2(hd->L+1, hp->n, sizeof(FLOAT));
|
||||||
|
hd->s = (FLOAT*)calloc(hd->L+1, sizeof(FLOAT));
|
||||||
|
hd->status &= ~(unsigned)HMM_FORWARD;
|
||||||
|
// at[][] array helps to improve the cache efficiency
|
||||||
|
at = (FLOAT**)calloc2(n, n, sizeof(FLOAT));
|
||||||
|
// transpose a[][]
|
||||||
|
for (k = 0; k != n; ++k)
|
||||||
|
for (l = 0; l != n; ++l)
|
||||||
|
at[k][l] = hp->a[l][k];
|
||||||
|
// f[0], but it should never be used
|
||||||
|
hd->s[0] = 1.0;
|
||||||
|
for (k = 0; k != n; ++k) hd->f[0][k] = 0.0;
|
||||||
|
// f[1]
|
||||||
|
for (k = 0, sum = 0.0; k != n; ++k)
|
||||||
|
sum += (hd->f[1][k] = hp->a0[k] * hp->e[(int)hd->seq[1]][k]);
|
||||||
|
for (k = 0; k != n; ++k) hd->f[1][k] /= sum;
|
||||||
|
hd->s[1] = sum;
|
||||||
|
// f[2..hmmL], the core loop
|
||||||
|
for (u = 2; u <= L; ++u) {
|
||||||
|
FLOAT *fu = hd->f[u], *fu1 = hd->f[u-1], *eu = hp->e[(int)hd->seq[u]];
|
||||||
|
for (k = 0, sum = 0.0; k != n; ++k) {
|
||||||
|
FLOAT *aa = at[k];
|
||||||
|
for (l = 0, tmp = 0.0; l != n; ++l) tmp += fu1[l] * aa[l];
|
||||||
|
sum += (fu[k] = eu[k] * tmp);
|
||||||
|
}
|
||||||
|
for (k = 0; k != n; ++k) fu[k] /= sum;
|
||||||
|
hd->s[u] = sum;
|
||||||
|
}
|
||||||
|
// free at array
|
||||||
|
for (k = 0; k != hp->n; ++k) free(at[k]);
|
||||||
|
free(at);
|
||||||
|
hd->status |= HMM_FORWARD;
|
||||||
|
}
|
||||||
|
|
||||||
|
// precalculate hp->ae
|
||||||
|
|
||||||
|
void hmm_pre_backward(hmm_par_t *hp)
|
||||||
|
{
|
||||||
|
int m, n, b, k, l;
|
||||||
|
assert(hp);
|
||||||
|
m = hp->m; n = hp->n;
|
||||||
|
for (b = 0; b <= m; ++b) {
|
||||||
|
for (k = 0; k != n; ++k) {
|
||||||
|
FLOAT *p = hp->ae[b * hp->n + k];
|
||||||
|
for (l = 0; l != n; ++l)
|
||||||
|
p[l] = hp->e[b][l] * hp->a[k][l];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// backward algorithm
|
||||||
|
|
||||||
|
void hmm_backward(const hmm_par_t *hp, hmm_data_t *hd)
|
||||||
|
{
|
||||||
|
FLOAT tmp;
|
||||||
|
int k, l, u;
|
||||||
|
int m, n, L;
|
||||||
|
assert(hp && hd);
|
||||||
|
assert(hd->status & HMM_FORWARD);
|
||||||
|
// allocate memory for hd->b
|
||||||
|
m = hp->m; n = hp->n; L = hd->L;
|
||||||
|
if (hd->b) {
|
||||||
|
for (k = 0; k <= hd->L; ++k) free(hd->b[k]);
|
||||||
|
free(hd->b);
|
||||||
|
}
|
||||||
|
hd->status &= ~(unsigned)HMM_BACKWARD;
|
||||||
|
hd->b = (FLOAT**)calloc2(L+1, hp->n, sizeof(FLOAT));
|
||||||
|
// b[L]
|
||||||
|
for (k = 0; k != hp->n; ++k) hd->b[L][k] = 1.0 / hd->s[L];
|
||||||
|
// b[1..L-1], the core loop
|
||||||
|
for (u = L-1; u >= 1; --u) {
|
||||||
|
FLOAT *bu1 = hd->b[u+1], **p = hp->ae + (int)hd->seq[u+1] * n;
|
||||||
|
for (k = 0; k != n; ++k) {
|
||||||
|
FLOAT *q = p[k];
|
||||||
|
for (l = 0, tmp = 0.0; l != n; ++l) tmp += q[l] * bu1[l];
|
||||||
|
hd->b[u][k] = tmp / hd->s[u];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
hd->status |= HMM_BACKWARD;
|
||||||
|
for (l = 0, tmp = 0.0; l != n; ++l)
|
||||||
|
tmp += hp->a0[l] * hd->b[1][l] * hp->e[(int)hd->seq[1]][l];
|
||||||
|
if (tmp > 1.0 + 1e-6 || tmp < 1.0 - 1e-6) // in theory, tmp should always equal to 1
|
||||||
|
fprintf(stderr, "++ Underflow may have happened (%lg).\n", tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
// log-likelihood of the observation
|
||||||
|
|
||||||
|
FLOAT hmm_lk(const hmm_data_t *hd)
|
||||||
|
{
|
||||||
|
FLOAT sum = 0.0, prod = 1.0;
|
||||||
|
int u, L;
|
||||||
|
L = hd->L;
|
||||||
|
assert(hd->status & HMM_FORWARD);
|
||||||
|
for (u = 1; u <= L; ++u) {
|
||||||
|
prod *= hd->s[u];
|
||||||
|
if (prod < HMM_TINY || prod >= 1.0/HMM_TINY) { // reset
|
||||||
|
sum += log(prod);
|
||||||
|
prod = 1.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sum += log(prod);
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
// posterior decoding
|
||||||
|
|
||||||
|
void hmm_post_decode(const hmm_par_t *hp, hmm_data_t *hd)
|
||||||
|
{
|
||||||
|
int u, k;
|
||||||
|
assert(hd->status && HMM_BACKWARD);
|
||||||
|
if (hd->p) free(hd->p);
|
||||||
|
hd->p = (int*)calloc(hd->L + 1, sizeof(int));
|
||||||
|
for (u = 1; u <= hd->L; ++u) {
|
||||||
|
FLOAT prob, max, *fu = hd->f[u], *bu = hd->b[u], su = hd->s[u];
|
||||||
|
int max_k;
|
||||||
|
for (k = 0, max = -1.0, max_k = -1; k != hp->n; ++k) {
|
||||||
|
if (max < (prob = fu[k] * bu[k] * su)) {
|
||||||
|
max = prob; max_k = k;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert(max_k >= 0);
|
||||||
|
hd->p[u] = max_k;
|
||||||
|
}
|
||||||
|
hd->status |= HMM_POSTDEC;
|
||||||
|
}
|
||||||
|
|
||||||
|
// posterior probability of states
|
||||||
|
|
||||||
|
FLOAT hmm_post_state(const hmm_par_t *hp, const hmm_data_t *hd, int u, FLOAT *prob)
|
||||||
|
{
|
||||||
|
FLOAT sum = 0.0, ss = hd->s[u], *fu = hd->f[u], *bu = hd->b[u];
|
||||||
|
int k;
|
||||||
|
for (k = 0; k != hp->n; ++k)
|
||||||
|
sum += (prob[k] = fu[k] * bu[k] * ss);
|
||||||
|
return sum; // in theory, this should always equal to 1.0
|
||||||
|
}
|
||||||
|
|
||||||
|
// expected counts
|
||||||
|
|
||||||
|
hmm_exp_t *hmm_expect(const hmm_par_t *hp, const hmm_data_t *hd)
|
||||||
|
{
|
||||||
|
int k, l, u, b, m, n;
|
||||||
|
hmm_exp_t *he;
|
||||||
|
assert(hd->status & HMM_BACKWARD);
|
||||||
|
he = hmm_new_exp(hp);
|
||||||
|
// initialization
|
||||||
|
m = hp->m; n = hp->n;
|
||||||
|
for (k = 0; k != n; ++k)
|
||||||
|
for (l = 0; l != n; ++l) he->A[k][l] = HMM_TINY;
|
||||||
|
for (b = 0; b <= m; ++b)
|
||||||
|
for (l = 0; l != n; ++l) he->E[b][l] = HMM_TINY;
|
||||||
|
// calculate A_{kl} and E_k(b), k,l\in[0,n)
|
||||||
|
for (u = 1; u < hd->L; ++u) {
|
||||||
|
FLOAT *fu = hd->f[u], *bu = hd->b[u], *bu1 = hd->b[u+1], ss = hd->s[u];
|
||||||
|
FLOAT *Ec = he->E[(int)hd->seq[u]], **p = hp->ae + (int)hd->seq[u+1] * n;
|
||||||
|
for (k = 0; k != n; ++k) {
|
||||||
|
FLOAT *q = p[k], *AA = he->A[k], fuk = fu[k];
|
||||||
|
for (l = 0; l != n; ++l) // this is cache-efficient
|
||||||
|
AA[l] += fuk * q[l] * bu1[l];
|
||||||
|
Ec[k] += fuk * bu[k] * ss;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// calculate A0_l
|
||||||
|
for (l = 0; l != n; ++l)
|
||||||
|
he->A0[l] += hp->a0[l] * hp->e[(int)hd->seq[1]][l] * hd->b[1][l];
|
||||||
|
return he;
|
||||||
|
}
|
||||||
|
|
||||||
|
FLOAT hmm_Q0(const hmm_par_t *hp, hmm_exp_t *he)
|
||||||
|
{
|
||||||
|
int k, l, b;
|
||||||
|
FLOAT sum = 0.0;
|
||||||
|
for (k = 0; k != hp->n; ++k) {
|
||||||
|
FLOAT tmp;
|
||||||
|
for (b = 0, tmp = 0.0; b != hp->m; ++b) tmp += he->E[b][k];
|
||||||
|
for (b = 0; b != hp->m; ++b)
|
||||||
|
sum += he->E[b][k] * log(he->E[b][k] / tmp);
|
||||||
|
}
|
||||||
|
for (k = 0; k != hp->n; ++k) {
|
||||||
|
FLOAT tmp, *A = he->A[k];
|
||||||
|
for (l = 0, tmp = 0.0; l != hp->n; ++l) tmp += A[l];
|
||||||
|
for (l = 0; l != hp->n; ++l) sum += A[l] * log(A[l] / tmp);
|
||||||
|
}
|
||||||
|
return (he->Q0 = sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
// add he0 to he1
|
||||||
|
|
||||||
|
void hmm_add_expect(const hmm_exp_t *he0, hmm_exp_t *he1)
|
||||||
|
{
|
||||||
|
int b, k, l;
|
||||||
|
assert(he0->m == he1->m && he0->n == he1->n);
|
||||||
|
for (k = 0; k != he1->n; ++k) {
|
||||||
|
he1->A0[k] += he0->A0[k];
|
||||||
|
for (l = 0; l != he1->n; ++l)
|
||||||
|
he1->A[k][l] += he0->A[k][l];
|
||||||
|
}
|
||||||
|
for (b = 0; b != he1->m; ++b) {
|
||||||
|
for (l = 0; l != he1->n; ++l)
|
||||||
|
he1->E[b][l] += he0->E[b][l];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// the EM-Q function
|
||||||
|
|
||||||
|
FLOAT hmm_Q(const hmm_par_t *hp, const hmm_exp_t *he)
|
||||||
|
{
|
||||||
|
FLOAT sum = 0.0;
|
||||||
|
int bb, k, l;
|
||||||
|
for (bb = 0; bb != he->m; ++bb) {
|
||||||
|
FLOAT *eb = hp->e[bb], *Eb = he->E[bb];
|
||||||
|
for (k = 0; k != hp->n; ++k) {
|
||||||
|
if (eb[k] <= 0.0) return -HMM_INF;
|
||||||
|
sum += Eb[k] * log(eb[k]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (k = 0; k != he->n; ++k) {
|
||||||
|
FLOAT *Ak = he->A[k], *ak = hp->a[k];
|
||||||
|
for (l = 0; l != he->n; ++l) {
|
||||||
|
if (ak[l] <= 0.0) return -HMM_INF;
|
||||||
|
sum += Ak[l] * log(ak[l]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return (sum -= he->Q0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// simulate sequence
|
||||||
|
|
||||||
|
char *hmm_simulate(const hmm_par_t *hp, int L)
|
||||||
|
{
|
||||||
|
int i, k, l, b;
|
||||||
|
FLOAT x, y, **et;
|
||||||
|
char *seq;
|
||||||
|
seq = (char*)calloc(L+1, 1);
|
||||||
|
// calculate the transpose of hp->e[][]
|
||||||
|
et = (FLOAT**)calloc2(hp->n, hp->m, sizeof(FLOAT));
|
||||||
|
for (k = 0; k != hp->n; ++k)
|
||||||
|
for (b = 0; b != hp->m; ++b)
|
||||||
|
et[k][b] = hp->e[b][k];
|
||||||
|
// the initial state, drawn from a0[]
|
||||||
|
x = drand48();
|
||||||
|
for (k = 0, y = 0.0; k != hp->n; ++k) {
|
||||||
|
y += hp->a0[k];
|
||||||
|
if (y >= x) break;
|
||||||
|
}
|
||||||
|
// main loop
|
||||||
|
for (i = 0; i != L; ++i) {
|
||||||
|
FLOAT *el, *ak = hp->a[k];
|
||||||
|
x = drand48();
|
||||||
|
for (l = 0, y = 0.0; l != hp->n; ++l) {
|
||||||
|
y += ak[l];
|
||||||
|
if (y >= x) break;
|
||||||
|
}
|
||||||
|
el = et[l];
|
||||||
|
x = drand48();
|
||||||
|
for (b = 0, y = 0.0; b != hp->m; ++b) {
|
||||||
|
y += el[b];
|
||||||
|
if (y >= x) break;
|
||||||
|
}
|
||||||
|
seq[i] = b;
|
||||||
|
k = l;
|
||||||
|
}
|
||||||
|
for (k = 0; k != hp->n; ++k) free(et[k]);
|
||||||
|
free(et);
|
||||||
|
return seq;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,107 @@
|
||||||
|
#ifndef AC_SCHMM_H_
|
||||||
|
#define AC_SCHMM_H_
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Last Modified: 2008-03-10
|
||||||
|
* Version: 0.1.0-8
|
||||||
|
*
|
||||||
|
* 2008-03-10, 0.1.0-8: make icc report two more "VECTORIZED"
|
||||||
|
* 2008-03-10, 0.1.0-7: accelerate for some CPU
|
||||||
|
* 2008-02-07, 0.1.0-6: simulate sequences
|
||||||
|
* 2008-01-15, 0.1.0-5: goodness of fit
|
||||||
|
* 2007-11-20, 0.1.0-4: add function declaration of hmm_post_decode()
|
||||||
|
* 2007-11-09: fix a memory leak
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#define HMM_VERSION "0.1.0-7"
|
||||||
|
|
||||||
|
#define HMM_FORWARD 0x02
|
||||||
|
#define HMM_BACKWARD 0x04
|
||||||
|
#define HMM_VITERBI 0x40
|
||||||
|
#define HMM_POSTDEC 0x80
|
||||||
|
|
||||||
|
#ifndef FLOAT
|
||||||
|
#define FLOAT double
|
||||||
|
#endif
|
||||||
|
#define HMM_TINY 1e-25
|
||||||
|
#define HMM_INF 1e300
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
int m, n; // number of symbols, number of states
|
||||||
|
FLOAT **a, **e; // transition matrix and emitting probilities
|
||||||
|
FLOAT **ae; // auxiliary array for acceleration, should be calculated by hmm_pre_backward()
|
||||||
|
FLOAT *a0; // trasition matrix from the start state
|
||||||
|
} hmm_par_t;
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
int L;
|
||||||
|
unsigned status;
|
||||||
|
char *seq;
|
||||||
|
FLOAT **f, **b, *s;
|
||||||
|
int *v; // Viterbi path
|
||||||
|
int *p; // posterior decoding
|
||||||
|
} hmm_data_t;
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
int m, n;
|
||||||
|
FLOAT Q0, **A, **E, *A0;
|
||||||
|
} hmm_exp_t;
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
int l, *obs;
|
||||||
|
FLOAT *thr;
|
||||||
|
} hmm_gof_t;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
/* initialize and destroy hmm_par_t */
|
||||||
|
hmm_par_t *hmm_new_par(int m, int n);
|
||||||
|
void hmm_delete_par(hmm_par_t *hp);
|
||||||
|
/* initialize and destroy hmm_data_t */
|
||||||
|
hmm_data_t *hmm_new_data(int L, const char *seq, const hmm_par_t *hp);
|
||||||
|
void hmm_delete_data(hmm_data_t *hd);
|
||||||
|
/* initialize and destroy hmm_exp_t */
|
||||||
|
hmm_exp_t *hmm_new_exp(const hmm_par_t *hp);
|
||||||
|
void hmm_delete_exp(hmm_exp_t *he);
|
||||||
|
/* Viterbi, forward and backward algorithms */
|
||||||
|
FLOAT hmm_Viterbi(const hmm_par_t *hp, hmm_data_t *hd);
|
||||||
|
void hmm_pre_backward(hmm_par_t *hp);
|
||||||
|
void hmm_forward(const hmm_par_t *hp, hmm_data_t *hd);
|
||||||
|
void hmm_backward(const hmm_par_t *hp, hmm_data_t *hd);
|
||||||
|
/* log-likelihood of the observations (natural based) */
|
||||||
|
FLOAT hmm_lk(const hmm_data_t *hd);
|
||||||
|
/* posterior probability at the position on the sequence */
|
||||||
|
FLOAT hmm_post_state(const hmm_par_t *hp, const hmm_data_t *hd, int u, FLOAT *prob);
|
||||||
|
/* posterior decoding */
|
||||||
|
void hmm_post_decode(const hmm_par_t *hp, hmm_data_t *hd);
|
||||||
|
/* expected counts of transitions and emissions */
|
||||||
|
hmm_exp_t *hmm_expect(const hmm_par_t *hp, const hmm_data_t *hd);
|
||||||
|
/* add he0 counts to he1 counts*/
|
||||||
|
void hmm_add_expect(const hmm_exp_t *he0, hmm_exp_t *he1);
|
||||||
|
/* the Q function that should be maximized in EM */
|
||||||
|
FLOAT hmm_Q(const hmm_par_t *hp, const hmm_exp_t *he);
|
||||||
|
FLOAT hmm_Q0(const hmm_par_t *hp, hmm_exp_t *he);
|
||||||
|
/* simulate sequences */
|
||||||
|
char *hmm_simulate(const hmm_par_t *hp, int L);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static inline void **calloc2(int n_row, int n_col, int size)
|
||||||
|
{
|
||||||
|
char **p;
|
||||||
|
int k;
|
||||||
|
p = (char**)malloc(sizeof(char*) * n_row);
|
||||||
|
for (k = 0; k != n_row; ++k)
|
||||||
|
p[k] = (char*)calloc(n_col, size);
|
||||||
|
return (void**)p;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,135 @@
|
||||||
|
/* The MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2008-2009, by Attractive Chaos <attractor@live.co.uk>
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of this software and associated documentation files (the
|
||||||
|
"Software"), to deal in the Software without restriction, including
|
||||||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be
|
||||||
|
included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||||
|
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||||
|
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _AC_KLIST_H
|
||||||
|
#define _AC_KLIST_H
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#ifndef klib_unused
|
||||||
|
#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3)
|
||||||
|
#define klib_unused __attribute__ ((__unused__))
|
||||||
|
#else
|
||||||
|
#define klib_unused
|
||||||
|
#endif
|
||||||
|
#endif /* klib_unused */
|
||||||
|
|
||||||
|
#define KMEMPOOL_INIT2(SCOPE, name, kmptype_t, kmpfree_f) \
|
||||||
|
typedef struct { \
|
||||||
|
size_t cnt, n, max; \
|
||||||
|
kmptype_t **buf; \
|
||||||
|
} kmp_##name##_t; \
|
||||||
|
SCOPE kmp_##name##_t *kmp_init_##name(void) { \
|
||||||
|
return calloc(1, sizeof(kmp_##name##_t)); \
|
||||||
|
} \
|
||||||
|
SCOPE void kmp_destroy_##name(kmp_##name##_t *mp) { \
|
||||||
|
size_t k; \
|
||||||
|
for (k = 0; k < mp->n; ++k) { \
|
||||||
|
kmpfree_f(mp->buf[k]); free(mp->buf[k]); \
|
||||||
|
} \
|
||||||
|
free(mp->buf); free(mp); \
|
||||||
|
} \
|
||||||
|
SCOPE kmptype_t *kmp_alloc_##name(kmp_##name##_t *mp) { \
|
||||||
|
++mp->cnt; \
|
||||||
|
if (mp->n == 0) return calloc(1, sizeof(kmptype_t)); \
|
||||||
|
return mp->buf[--mp->n]; \
|
||||||
|
} \
|
||||||
|
SCOPE void kmp_free_##name(kmp_##name##_t *mp, kmptype_t *p) { \
|
||||||
|
--mp->cnt; \
|
||||||
|
if (mp->n == mp->max) { \
|
||||||
|
mp->max = mp->max? mp->max<<1 : 16; \
|
||||||
|
mp->buf = realloc(mp->buf, sizeof(kmptype_t *) * mp->max); \
|
||||||
|
} \
|
||||||
|
mp->buf[mp->n++] = p; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define KMEMPOOL_INIT(name, kmptype_t, kmpfree_f) \
|
||||||
|
KMEMPOOL_INIT2(static inline klib_unused, name, kmptype_t, kmpfree_f)
|
||||||
|
|
||||||
|
#define kmempool_t(name) kmp_##name##_t
|
||||||
|
#define kmp_init(name) kmp_init_##name()
|
||||||
|
#define kmp_destroy(name, mp) kmp_destroy_##name(mp)
|
||||||
|
#define kmp_alloc(name, mp) kmp_alloc_##name(mp)
|
||||||
|
#define kmp_free(name, mp, p) kmp_free_##name(mp, p)
|
||||||
|
|
||||||
|
#define KLIST_INIT2(SCOPE, name, kltype_t, kmpfree_t) \
|
||||||
|
struct __kl1_##name { \
|
||||||
|
kltype_t data; \
|
||||||
|
struct __kl1_##name *next; \
|
||||||
|
}; \
|
||||||
|
typedef struct __kl1_##name kl1_##name; \
|
||||||
|
KMEMPOOL_INIT2(SCOPE, name, kl1_##name, kmpfree_t) \
|
||||||
|
typedef struct { \
|
||||||
|
kl1_##name *head, *tail; \
|
||||||
|
kmp_##name##_t *mp; \
|
||||||
|
size_t size; \
|
||||||
|
} kl_##name##_t; \
|
||||||
|
SCOPE kl_##name##_t *kl_init_##name(void) { \
|
||||||
|
kl_##name##_t *kl = calloc(1, sizeof(kl_##name##_t)); \
|
||||||
|
kl->mp = kmp_init(name); \
|
||||||
|
kl->head = kl->tail = kmp_alloc(name, kl->mp); \
|
||||||
|
kl->head->next = 0; \
|
||||||
|
return kl; \
|
||||||
|
} \
|
||||||
|
SCOPE void kl_destroy_##name(kl_##name##_t *kl) { \
|
||||||
|
kl1_##name *p; \
|
||||||
|
for (p = kl->head; p != kl->tail; p = p->next) \
|
||||||
|
kmp_free(name, kl->mp, p); \
|
||||||
|
kmp_free(name, kl->mp, p); \
|
||||||
|
kmp_destroy(name, kl->mp); \
|
||||||
|
free(kl); \
|
||||||
|
} \
|
||||||
|
SCOPE kltype_t *kl_pushp_##name(kl_##name##_t *kl) { \
|
||||||
|
kl1_##name *q, *p = kmp_alloc(name, kl->mp); \
|
||||||
|
q = kl->tail; p->next = 0; kl->tail->next = p; kl->tail = p; \
|
||||||
|
++kl->size; \
|
||||||
|
return &q->data; \
|
||||||
|
} \
|
||||||
|
SCOPE int kl_shift_##name(kl_##name##_t *kl, kltype_t *d) { \
|
||||||
|
kl1_##name *p; \
|
||||||
|
if (kl->head->next == 0) return -1; \
|
||||||
|
--kl->size; \
|
||||||
|
p = kl->head; kl->head = kl->head->next; \
|
||||||
|
if (d) *d = p->data; \
|
||||||
|
kmp_free(name, kl->mp, p); \
|
||||||
|
return 0; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define KLIST_INIT(name, kltype_t, kmpfree_t) \
|
||||||
|
KLIST_INIT2(static inline klib_unused, name, kltype_t, kmpfree_t)
|
||||||
|
|
||||||
|
#define kliter_t(name) kl1_##name
|
||||||
|
#define klist_t(name) kl_##name##_t
|
||||||
|
#define kl_val(iter) ((iter)->data)
|
||||||
|
#define kl_next(iter) ((iter)->next)
|
||||||
|
#define kl_begin(kl) ((kl)->head)
|
||||||
|
#define kl_end(kl) ((kl)->tail)
|
||||||
|
|
||||||
|
#define kl_init(name) kl_init_##name()
|
||||||
|
#define kl_destroy(name, kl) kl_destroy_##name(kl)
|
||||||
|
#define kl_pushp(name, kl) kl_pushp_##name(kl)
|
||||||
|
#define kl_shift(name, kl, d) kl_shift_##name(kl, d)
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,447 @@
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include "kmath.h"
|
||||||
|
|
||||||
|
/******************************
|
||||||
|
*** Non-linear programming ***
|
||||||
|
******************************/
|
||||||
|
|
||||||
|
/* Hooke-Jeeves algorithm for nonlinear minimization
|
||||||
|
|
||||||
|
Based on the pseudocodes by Bell and Pike (CACM 9(9):684-685), and
|
||||||
|
the revision by Tomlin and Smith (CACM 12(11):637-638). Both of the
|
||||||
|
papers are comments on Kaupe's Algorithm 178 "Direct Search" (ACM
|
||||||
|
6(6):313-314). The original algorithm was designed by Hooke and
|
||||||
|
Jeeves (ACM 8:212-229). This program is further revised according to
|
||||||
|
Johnson's implementation at Netlib (opt/hooke.c).
|
||||||
|
|
||||||
|
Hooke-Jeeves algorithm is very simple and it works quite well on a
|
||||||
|
few examples. However, it might fail to converge due to its heuristic
|
||||||
|
nature. A possible improvement, as is suggested by Johnson, may be to
|
||||||
|
choose a small r at the beginning to quickly approach to the minimum
|
||||||
|
and a large r at later step to hit the minimum.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static double __kmin_hj_aux(kmin_f func, int n, double *x1, void *data, double fx1, double *dx, int *n_calls)
|
||||||
|
{
|
||||||
|
int k, j = *n_calls;
|
||||||
|
double ftmp;
|
||||||
|
for (k = 0; k != n; ++k) {
|
||||||
|
x1[k] += dx[k];
|
||||||
|
ftmp = func(n, x1, data); ++j;
|
||||||
|
if (ftmp < fx1) fx1 = ftmp;
|
||||||
|
else { /* search the opposite direction */
|
||||||
|
dx[k] = 0.0 - dx[k];
|
||||||
|
x1[k] += dx[k] + dx[k];
|
||||||
|
ftmp = func(n, x1, data); ++j;
|
||||||
|
if (ftmp < fx1) fx1 = ftmp;
|
||||||
|
else x1[k] -= dx[k]; /* back to the original x[k] */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*n_calls = j;
|
||||||
|
return fx1; /* here: fx1=f(n,x1) */
|
||||||
|
}
|
||||||
|
|
||||||
|
double kmin_hj(kmin_f func, int n, double *x, void *data, double r, double eps, int max_calls)
|
||||||
|
{
|
||||||
|
double fx, fx1, *x1, *dx, radius;
|
||||||
|
int k, n_calls = 0;
|
||||||
|
x1 = (double*)calloc(n, sizeof(double));
|
||||||
|
dx = (double*)calloc(n, sizeof(double));
|
||||||
|
for (k = 0; k != n; ++k) { /* initial directions, based on MGJ */
|
||||||
|
dx[k] = fabs(x[k]) * r;
|
||||||
|
if (dx[k] == 0) dx[k] = r;
|
||||||
|
}
|
||||||
|
radius = r;
|
||||||
|
fx1 = fx = func(n, x, data); ++n_calls;
|
||||||
|
for (;;) {
|
||||||
|
memcpy(x1, x, n * sizeof(double)); /* x1 = x */
|
||||||
|
fx1 = __kmin_hj_aux(func, n, x1, data, fx, dx, &n_calls);
|
||||||
|
while (fx1 < fx) {
|
||||||
|
for (k = 0; k != n; ++k) {
|
||||||
|
double t = x[k];
|
||||||
|
dx[k] = x1[k] > x[k]? fabs(dx[k]) : 0.0 - fabs(dx[k]);
|
||||||
|
x[k] = x1[k];
|
||||||
|
x1[k] = x1[k] + x1[k] - t;
|
||||||
|
}
|
||||||
|
fx = fx1;
|
||||||
|
if (n_calls >= max_calls) break;
|
||||||
|
fx1 = func(n, x1, data); ++n_calls;
|
||||||
|
fx1 = __kmin_hj_aux(func, n, x1, data, fx1, dx, &n_calls);
|
||||||
|
if (fx1 >= fx) break;
|
||||||
|
for (k = 0; k != n; ++k)
|
||||||
|
if (fabs(x1[k] - x[k]) > .5 * fabs(dx[k])) break;
|
||||||
|
if (k == n) break;
|
||||||
|
}
|
||||||
|
if (radius >= eps) {
|
||||||
|
if (n_calls >= max_calls) break;
|
||||||
|
radius *= r;
|
||||||
|
for (k = 0; k != n; ++k) dx[k] *= r;
|
||||||
|
} else break; /* converge */
|
||||||
|
}
|
||||||
|
free(x1); free(dx);
|
||||||
|
return fx1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// I copied this function somewhere several years ago with some of my modifications, but I forgot the source.
|
||||||
|
double kmin_brent(kmin1_f func, double a, double b, void *data, double tol, double *xmin)
|
||||||
|
{
|
||||||
|
double bound, u, r, q, fu, tmp, fa, fb, fc, c;
|
||||||
|
const double gold1 = 1.6180339887;
|
||||||
|
const double gold2 = 0.3819660113;
|
||||||
|
const double tiny = 1e-20;
|
||||||
|
const int max_iter = 100;
|
||||||
|
|
||||||
|
double e, d, w, v, mid, tol1, tol2, p, eold, fv, fw;
|
||||||
|
int iter;
|
||||||
|
|
||||||
|
fa = func(a, data); fb = func(b, data);
|
||||||
|
if (fb > fa) { // swap, such that f(a) > f(b)
|
||||||
|
tmp = a; a = b; b = tmp;
|
||||||
|
tmp = fa; fa = fb; fb = tmp;
|
||||||
|
}
|
||||||
|
c = b + gold1 * (b - a), fc = func(c, data); // golden section extrapolation
|
||||||
|
while (fb > fc) {
|
||||||
|
bound = b + 100.0 * (c - b); // the farthest point where we want to go
|
||||||
|
r = (b - a) * (fb - fc);
|
||||||
|
q = (b - c) * (fb - fa);
|
||||||
|
if (fabs(q - r) < tiny) { // avoid 0 denominator
|
||||||
|
tmp = q > r? tiny : 0.0 - tiny;
|
||||||
|
} else tmp = q - r;
|
||||||
|
u = b - ((b - c) * q - (b - a) * r) / (2.0 * tmp); // u is the parabolic extrapolation point
|
||||||
|
if ((b > u && u > c) || (b < u && u < c)) { // u lies between b and c
|
||||||
|
fu = func(u, data);
|
||||||
|
if (fu < fc) { // (b,u,c) bracket the minimum
|
||||||
|
a = b; b = u; fa = fb; fb = fu;
|
||||||
|
break;
|
||||||
|
} else if (fu > fb) { // (a,b,u) bracket the minimum
|
||||||
|
c = u; fc = fu;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
u = c + gold1 * (c - b); fu = func(u, data); // golden section extrapolation
|
||||||
|
} else if ((c > u && u > bound) || (c < u && u < bound)) { // u lies between c and bound
|
||||||
|
fu = func(u, data);
|
||||||
|
if (fu < fc) { // fb > fc > fu
|
||||||
|
b = c; c = u; u = c + gold1 * (c - b);
|
||||||
|
fb = fc; fc = fu; fu = func(u, data);
|
||||||
|
} else { // (b,c,u) bracket the minimum
|
||||||
|
a = b; b = c; c = u;
|
||||||
|
fa = fb; fb = fc; fc = fu;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else if ((u > bound && bound > c) || (u < bound && bound < c)) { // u goes beyond the bound
|
||||||
|
u = bound; fu = func(u, data);
|
||||||
|
} else { // u goes the other way around, use golden section extrapolation
|
||||||
|
u = c + gold1 * (c - b); fu = func(u, data);
|
||||||
|
}
|
||||||
|
a = b; b = c; c = u;
|
||||||
|
fa = fb; fb = fc; fc = fu;
|
||||||
|
}
|
||||||
|
if (a > c) u = a, a = c, c = u; // swap
|
||||||
|
|
||||||
|
// now, a<b<c, fa>fb and fb<fc, move on to Brent's algorithm
|
||||||
|
e = d = 0.0;
|
||||||
|
w = v = b; fv = fw = fb;
|
||||||
|
for (iter = 0; iter != max_iter; ++iter) {
|
||||||
|
mid = 0.5 * (a + c);
|
||||||
|
tol2 = 2.0 * (tol1 = tol * fabs(b) + tiny);
|
||||||
|
if (fabs(b - mid) <= (tol2 - 0.5 * (c - a))) {
|
||||||
|
*xmin = b; return fb; // found
|
||||||
|
}
|
||||||
|
if (fabs(e) > tol1) {
|
||||||
|
// related to parabolic interpolation
|
||||||
|
r = (b - w) * (fb - fv);
|
||||||
|
q = (b - v) * (fb - fw);
|
||||||
|
p = (b - v) * q - (b - w) * r;
|
||||||
|
q = 2.0 * (q - r);
|
||||||
|
if (q > 0.0) p = 0.0 - p;
|
||||||
|
else q = 0.0 - q;
|
||||||
|
eold = e; e = d;
|
||||||
|
if (fabs(p) >= fabs(0.5 * q * eold) || p <= q * (a - b) || p >= q * (c - b)) {
|
||||||
|
d = gold2 * (e = (b >= mid ? a - b : c - b));
|
||||||
|
} else {
|
||||||
|
d = p / q; u = b + d; // actual parabolic interpolation happens here
|
||||||
|
if (u - a < tol2 || c - u < tol2)
|
||||||
|
d = (mid > b)? tol1 : 0.0 - tol1;
|
||||||
|
}
|
||||||
|
} else d = gold2 * (e = (b >= mid ? a - b : c - b)); // golden section interpolation
|
||||||
|
u = fabs(d) >= tol1 ? b + d : b + (d > 0.0? tol1 : -tol1);
|
||||||
|
fu = func(u, data);
|
||||||
|
if (fu <= fb) { // u is the minimum point so far
|
||||||
|
if (u >= b) a = b;
|
||||||
|
else c = b;
|
||||||
|
v = w; w = b; b = u; fv = fw; fw = fb; fb = fu;
|
||||||
|
} else { // adjust (a,c) and (u,v,w)
|
||||||
|
if (u < b) a = u;
|
||||||
|
else c = u;
|
||||||
|
if (fu <= fw || w == b) {
|
||||||
|
v = w; w = u;
|
||||||
|
fv = fw; fw = fu;
|
||||||
|
} else if (fu <= fv || v == b || v == w) {
|
||||||
|
v = u; fv = fu;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*xmin = b;
|
||||||
|
return fb;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline float SIGN(float a, float b)
|
||||||
|
{
|
||||||
|
return b >= 0 ? (a >= 0 ? a : -a) : (a >= 0 ? -a : a);
|
||||||
|
}
|
||||||
|
|
||||||
|
double krf_brent(double x1, double x2, double tol, double (*func)(double, void*), void *data, int *err)
|
||||||
|
{
|
||||||
|
const int max_iter = 100;
|
||||||
|
const double eps = 3e-8f;
|
||||||
|
int i;
|
||||||
|
double a = x1, b = x2, c = x2, d, e, min1, min2;
|
||||||
|
double fa, fb, fc, p, q, r, s, tol1, xm;
|
||||||
|
|
||||||
|
*err = 0;
|
||||||
|
fa = func(a, data), fb = func(b, data);
|
||||||
|
if ((fa > 0.0f && fb > 0.0f) || (fa < 0.0f && fb < 0.0f)) {
|
||||||
|
*err = -1;
|
||||||
|
return 0.0f;
|
||||||
|
}
|
||||||
|
fc = fb;
|
||||||
|
for (i = 0; i < max_iter; ++i) {
|
||||||
|
if ((fb > 0.0f && fc > 0.0f) || (fb < 0.0f && fc < 0.0f)) {
|
||||||
|
c = a;
|
||||||
|
fc = fa;
|
||||||
|
e = d = b - a;
|
||||||
|
}
|
||||||
|
if (fabs(fc) < fabs(fb)) {
|
||||||
|
a = b, b = c, c = a;
|
||||||
|
fa = fb, fb = fc, fc = fa;
|
||||||
|
}
|
||||||
|
tol1 = 2.0f * eps * fabs(b) + 0.5f * tol;
|
||||||
|
xm = 0.5f * (c - b);
|
||||||
|
if (fabs(xm) <= tol1 || fb == 0.0f)
|
||||||
|
return b;
|
||||||
|
if (fabs(e) >= tol1 && fabs(fa) > fabs(fb)) {
|
||||||
|
s = fb / fa;
|
||||||
|
if (a == c) {
|
||||||
|
p = 2.0f * xm * s;
|
||||||
|
q = 1.0f - s;
|
||||||
|
} else {
|
||||||
|
q = fa / fc;
|
||||||
|
r = fb / fc;
|
||||||
|
p = s * (2.0f * xm * q * (q - r) - (b - a) * (r - 1.0f));
|
||||||
|
q = (q - 1.0f) * (r - 1.0f) * (s - 1.0f);
|
||||||
|
}
|
||||||
|
if (p > 0.0f) q = -q;
|
||||||
|
p = fabs(p);
|
||||||
|
min1 = 3.0f * xm * q - fabs(tol1 * q);
|
||||||
|
min2 = fabs(e * q);
|
||||||
|
if (2.0f * p < (min1 < min2 ? min1 : min2)) {
|
||||||
|
e = d;
|
||||||
|
d = p / q;
|
||||||
|
} else {
|
||||||
|
d = xm;
|
||||||
|
e = d;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
d = xm;
|
||||||
|
e = d;
|
||||||
|
}
|
||||||
|
a = b;
|
||||||
|
fa = fb;
|
||||||
|
if (fabs(d) > tol1) b += d;
|
||||||
|
else b += SIGN(tol1, xm);
|
||||||
|
fb = func(b, data);
|
||||||
|
}
|
||||||
|
*err = -2;
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*************************
|
||||||
|
*** Special functions ***
|
||||||
|
*************************/
|
||||||
|
|
||||||
|
/* Log gamma function
|
||||||
|
* \log{\Gamma(z)}
|
||||||
|
* AS245, 2nd algorithm, http://lib.stat.cmu.edu/apstat/245
|
||||||
|
*/
|
||||||
|
double kf_lgamma(double z)
|
||||||
|
{
|
||||||
|
double x = 0;
|
||||||
|
x += 0.1659470187408462e-06 / (z+7);
|
||||||
|
x += 0.9934937113930748e-05 / (z+6);
|
||||||
|
x -= 0.1385710331296526 / (z+5);
|
||||||
|
x += 12.50734324009056 / (z+4);
|
||||||
|
x -= 176.6150291498386 / (z+3);
|
||||||
|
x += 771.3234287757674 / (z+2);
|
||||||
|
x -= 1259.139216722289 / (z+1);
|
||||||
|
x += 676.5203681218835 / z;
|
||||||
|
x += 0.9999999999995183;
|
||||||
|
return log(x) - 5.58106146679532777 - z + (z-0.5) * log(z+6.5);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* complementary error function
|
||||||
|
* \frac{2}{\sqrt{\pi}} \int_x^{\infty} e^{-t^2} dt
|
||||||
|
* AS66, 2nd algorithm, http://lib.stat.cmu.edu/apstat/66
|
||||||
|
*/
|
||||||
|
double kf_erfc(double x)
|
||||||
|
{
|
||||||
|
const double p0 = 220.2068679123761;
|
||||||
|
const double p1 = 221.2135961699311;
|
||||||
|
const double p2 = 112.0792914978709;
|
||||||
|
const double p3 = 33.912866078383;
|
||||||
|
const double p4 = 6.37396220353165;
|
||||||
|
const double p5 = .7003830644436881;
|
||||||
|
const double p6 = .03526249659989109;
|
||||||
|
const double q0 = 440.4137358247522;
|
||||||
|
const double q1 = 793.8265125199484;
|
||||||
|
const double q2 = 637.3336333788311;
|
||||||
|
const double q3 = 296.5642487796737;
|
||||||
|
const double q4 = 86.78073220294608;
|
||||||
|
const double q5 = 16.06417757920695;
|
||||||
|
const double q6 = 1.755667163182642;
|
||||||
|
const double q7 = .08838834764831844;
|
||||||
|
double expntl, z, p;
|
||||||
|
z = fabs(x) * M_SQRT2;
|
||||||
|
if (z > 37.) return x > 0.? 0. : 2.;
|
||||||
|
expntl = exp(z * z * - .5);
|
||||||
|
if (z < 10. / M_SQRT2) // for small z
|
||||||
|
p = expntl * ((((((p6 * z + p5) * z + p4) * z + p3) * z + p2) * z + p1) * z + p0)
|
||||||
|
/ (((((((q7 * z + q6) * z + q5) * z + q4) * z + q3) * z + q2) * z + q1) * z + q0);
|
||||||
|
else p = expntl / 2.506628274631001 / (z + 1. / (z + 2. / (z + 3. / (z + 4. / (z + .65)))));
|
||||||
|
return x > 0.? 2. * p : 2. * (1. - p);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The following computes regularized incomplete gamma functions.
|
||||||
|
* Formulas are taken from Wiki, with additional input from Numerical
|
||||||
|
* Recipes in C (for modified Lentz's algorithm) and AS245
|
||||||
|
* (http://lib.stat.cmu.edu/apstat/245).
|
||||||
|
*
|
||||||
|
* A good online calculator is available at:
|
||||||
|
*
|
||||||
|
* http://www.danielsoper.com/statcalc/calc23.aspx
|
||||||
|
*
|
||||||
|
* It calculates upper incomplete gamma function, which equals
|
||||||
|
* kf_gammaq(s,z)*tgamma(s).
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define KF_GAMMA_EPS 1e-14
|
||||||
|
#define KF_TINY 1e-290
|
||||||
|
|
||||||
|
// regularized lower incomplete gamma function, by series expansion
|
||||||
|
static double _kf_gammap(double s, double z)
|
||||||
|
{
|
||||||
|
double sum, x;
|
||||||
|
int k;
|
||||||
|
for (k = 1, sum = x = 1.; k < 100; ++k) {
|
||||||
|
sum += (x *= z / (s + k));
|
||||||
|
if (x / sum < KF_GAMMA_EPS) break;
|
||||||
|
}
|
||||||
|
return exp(s * log(z) - z - kf_lgamma(s + 1.) + log(sum));
|
||||||
|
}
|
||||||
|
// regularized upper incomplete gamma function, by continued fraction
|
||||||
|
static double _kf_gammaq(double s, double z)
|
||||||
|
{
|
||||||
|
int j;
|
||||||
|
double C, D, f;
|
||||||
|
f = 1. + z - s; C = f; D = 0.;
|
||||||
|
// Modified Lentz's algorithm for computing continued fraction
|
||||||
|
// See Numerical Recipes in C, 2nd edition, section 5.2
|
||||||
|
for (j = 1; j < 100; ++j) {
|
||||||
|
double a = j * (s - j), b = (j<<1) + 1 + z - s, d;
|
||||||
|
D = b + a * D;
|
||||||
|
if (D < KF_TINY) D = KF_TINY;
|
||||||
|
C = b + a / C;
|
||||||
|
if (C < KF_TINY) C = KF_TINY;
|
||||||
|
D = 1. / D;
|
||||||
|
d = C * D;
|
||||||
|
f *= d;
|
||||||
|
if (fabs(d - 1.) < KF_GAMMA_EPS) break;
|
||||||
|
}
|
||||||
|
return exp(s * log(z) - z - kf_lgamma(s) - log(f));
|
||||||
|
}
|
||||||
|
|
||||||
|
double kf_gammap(double s, double z)
|
||||||
|
{
|
||||||
|
return z <= 1. || z < s? _kf_gammap(s, z) : 1. - _kf_gammaq(s, z);
|
||||||
|
}
|
||||||
|
|
||||||
|
double kf_gammaq(double s, double z)
|
||||||
|
{
|
||||||
|
return z <= 1. || z < s? 1. - _kf_gammap(s, z) : _kf_gammaq(s, z);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Regularized incomplete beta function. The method is taken from
|
||||||
|
* Numerical Recipe in C, 2nd edition, section 6.4. The following web
|
||||||
|
* page calculates the incomplete beta function, which equals
|
||||||
|
* kf_betai(a,b,x) * gamma(a) * gamma(b) / gamma(a+b):
|
||||||
|
*
|
||||||
|
* http://www.danielsoper.com/statcalc/calc36.aspx
|
||||||
|
*/
|
||||||
|
static double kf_betai_aux(double a, double b, double x)
|
||||||
|
{
|
||||||
|
double C, D, f;
|
||||||
|
int j;
|
||||||
|
if (x == 0.) return 0.;
|
||||||
|
if (x == 1.) return 1.;
|
||||||
|
f = 1.; C = f; D = 0.;
|
||||||
|
// Modified Lentz's algorithm for computing continued fraction
|
||||||
|
for (j = 1; j < 200; ++j) {
|
||||||
|
double aa, d;
|
||||||
|
int m = j>>1;
|
||||||
|
aa = (j&1)? -(a + m) * (a + b + m) * x / ((a + 2*m) * (a + 2*m + 1))
|
||||||
|
: m * (b - m) * x / ((a + 2*m - 1) * (a + 2*m));
|
||||||
|
D = 1. + aa * D;
|
||||||
|
if (D < KF_TINY) D = KF_TINY;
|
||||||
|
C = 1. + aa / C;
|
||||||
|
if (C < KF_TINY) C = KF_TINY;
|
||||||
|
D = 1. / D;
|
||||||
|
d = C * D;
|
||||||
|
f *= d;
|
||||||
|
if (fabs(d - 1.) < KF_GAMMA_EPS) break;
|
||||||
|
}
|
||||||
|
return exp(kf_lgamma(a+b) - kf_lgamma(a) - kf_lgamma(b) + a * log(x) + b * log(1.-x)) / a / f;
|
||||||
|
}
|
||||||
|
double kf_betai(double a, double b, double x)
|
||||||
|
{
|
||||||
|
return x < (a + 1.) / (a + b + 2.)? kf_betai_aux(a, b, x) : 1. - kf_betai_aux(b, a, 1. - x);
|
||||||
|
}
|
||||||
|
|
||||||
|
/******************
|
||||||
|
*** Statistics ***
|
||||||
|
******************/
|
||||||
|
|
||||||
|
double km_ks_dist(int na, const double a[], int nb, const double b[]) // a[] and b[] MUST BE sorted
|
||||||
|
{
|
||||||
|
int ia = 0, ib = 0;
|
||||||
|
double fa = 0, fb = 0, sup = 0, na1 = 1. / na, nb1 = 1. / nb;
|
||||||
|
while (ia < na || ib < nb) {
|
||||||
|
if (ia == na) fb += nb1, ++ib;
|
||||||
|
else if (ib == nb) fa += na1, ++ia;
|
||||||
|
else if (a[ia] < b[ib]) fa += na1, ++ia;
|
||||||
|
else if (a[ia] > b[ib]) fb += nb1, ++ib;
|
||||||
|
else fa += na1, fb += nb1, ++ia, ++ib;
|
||||||
|
if (sup < fabs(fa - fb)) sup = fabs(fa - fb);
|
||||||
|
}
|
||||||
|
return sup;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef KF_MAIN
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "ksort.h"
|
||||||
|
KSORT_INIT_GENERIC(double)
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
double x = 5.5, y = 3;
|
||||||
|
double a, b;
|
||||||
|
double xx[] = {0.22, -0.87, -2.39, -1.79, 0.37, -1.54, 1.28, -0.31, -0.74, 1.72, 0.38, -0.17, -0.62, -1.10, 0.30, 0.15, 2.30, 0.19, -0.50, -0.09};
|
||||||
|
double yy[] = {-5.13, -2.19, -2.43, -3.83, 0.50, -3.25, 4.32, 1.63, 5.18, -0.43, 7.11, 4.87, -3.10, -5.81, 3.76, 6.31, 2.58, 0.07, 5.76, 3.50};
|
||||||
|
ks_introsort(double, 20, xx); ks_introsort(double, 20, yy);
|
||||||
|
printf("K-S distance: %f\n", km_ks_dist(20, xx, 20, yy));
|
||||||
|
printf("erfc(%lg): %lg, %lg\n", x, erfc(x), kf_erfc(x));
|
||||||
|
printf("upper-gamma(%lg,%lg): %lg\n", x, y, kf_gammaq(y, x)*tgamma(y));
|
||||||
|
a = 2; b = 2; x = 0.5;
|
||||||
|
printf("incomplete-beta(%lg,%lg,%lg): %lg\n", a, b, x, kf_betai(a, b, x) / exp(kf_lgamma(a+b) - kf_lgamma(a) - kf_lgamma(b)));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,38 @@
|
||||||
|
#ifndef AC_KMATH_H
|
||||||
|
#define AC_KMATH_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**************************
|
||||||
|
* Non-linear programming *
|
||||||
|
**************************/
|
||||||
|
|
||||||
|
#define KMIN_RADIUS 0.5
|
||||||
|
#define KMIN_EPS 1e-7
|
||||||
|
#define KMIN_MAXCALL 50000
|
||||||
|
|
||||||
|
typedef double (*kmin_f)(int, double*, void*);
|
||||||
|
typedef double (*kmin1_f)(double, void*);
|
||||||
|
|
||||||
|
double kmin_hj(kmin_f func, int n, double *x, void *data, double r, double eps, int max_calls); // Hooke-Jeeves'
|
||||||
|
double kmin_brent(kmin1_f func, double a, double b, void *data, double tol, double *xmin); // Brent's 1-dimenssion
|
||||||
|
|
||||||
|
/*********************
|
||||||
|
* Special functions *
|
||||||
|
*********************/
|
||||||
|
|
||||||
|
double kf_lgamma(double z); // log gamma function
|
||||||
|
double kf_erfc(double x); // complementary error function
|
||||||
|
double kf_gammap(double s, double z); // regularized lower incomplete gamma function
|
||||||
|
double kf_gammaq(double s, double z); // regularized upper incomplete gamma function
|
||||||
|
double kf_betai(double a, double b, double x); // regularized incomplete beta function
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,628 @@
|
||||||
|
/* The MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2008 by Genome Research Ltd (GRL).
|
||||||
|
2010 by Attractive Chaos <attractor@live.co.uk>
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of this software and associated documentation files (the
|
||||||
|
"Software"), to deal in the Software without restriction, including
|
||||||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be
|
||||||
|
included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||||
|
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||||
|
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Probably I will not do socket programming in the next few years and
|
||||||
|
therefore I decide to heavily annotate this file, for Linux and
|
||||||
|
Windows as well. -ac */
|
||||||
|
|
||||||
|
#include <time.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
|
#include <netdb.h>
|
||||||
|
#include <arpa/inet.h>
|
||||||
|
#include <sys/socket.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "knetfile.h"
|
||||||
|
|
||||||
|
/* In winsock.h, the type of a socket is SOCKET, which is: "typedef
|
||||||
|
* u_int SOCKET". An invalid SOCKET is: "(SOCKET)(~0)", or signed
|
||||||
|
* integer -1. In knetfile.c, I use "int" for socket type
|
||||||
|
* throughout. This should be improved to avoid confusion.
|
||||||
|
*
|
||||||
|
* In Linux/Mac, recv() and read() do almost the same thing. You can see
|
||||||
|
* in the header file that netread() is simply an alias of read(). In
|
||||||
|
* Windows, however, they are different and using recv() is mandatory.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* This function tests if the file handler is ready for reading (or
|
||||||
|
* writing if is_read==0). */
|
||||||
|
static int socket_wait(int fd, int is_read)
|
||||||
|
{
|
||||||
|
fd_set fds, *fdr = 0, *fdw = 0;
|
||||||
|
struct timeval tv;
|
||||||
|
int ret;
|
||||||
|
tv.tv_sec = 5; tv.tv_usec = 0; // 5 seconds time out
|
||||||
|
FD_ZERO(&fds);
|
||||||
|
FD_SET(fd, &fds);
|
||||||
|
if (is_read) fdr = &fds;
|
||||||
|
else fdw = &fds;
|
||||||
|
ret = select(fd+1, fdr, fdw, 0, &tv);
|
||||||
|
#ifndef _WIN32
|
||||||
|
if (ret == -1) perror("select");
|
||||||
|
#else
|
||||||
|
if (ret == 0)
|
||||||
|
fprintf(stderr, "select time-out\n");
|
||||||
|
else if (ret == SOCKET_ERROR)
|
||||||
|
fprintf(stderr, "select: %d\n", WSAGetLastError());
|
||||||
|
#endif
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
|
/* This function does not work with Windows due to the lack of
|
||||||
|
* getaddrinfo() in winsock. It is addapted from an example in "Beej's
|
||||||
|
* Guide to Network Programming" (http://beej.us/guide/bgnet/). */
|
||||||
|
static int socket_connect(const char *host, const char *port)
|
||||||
|
{
|
||||||
|
#define __err_connect(func) do { perror(func); freeaddrinfo(res); return -1; } while (0)
|
||||||
|
|
||||||
|
int ai_err, on = 1, fd;
|
||||||
|
struct linger lng = { 0, 0 };
|
||||||
|
struct addrinfo hints, *res = 0;
|
||||||
|
memset(&hints, 0, sizeof(struct addrinfo));
|
||||||
|
hints.ai_family = AF_UNSPEC;
|
||||||
|
hints.ai_socktype = SOCK_STREAM;
|
||||||
|
/* In Unix/Mac, getaddrinfo() is the most convenient way to get
|
||||||
|
* server information. */
|
||||||
|
if ((ai_err = getaddrinfo(host, port, &hints, &res)) != 0) { fprintf(stderr, "can't resolve %s:%s: %s\n", host, port, gai_strerror(ai_err)); return -1; }
|
||||||
|
if ((fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol)) == -1) __err_connect("socket");
|
||||||
|
/* The following two setsockopt() are used by ftplib
|
||||||
|
* (http://nbpfaus.net/~pfau/ftplib/). I am not sure if they
|
||||||
|
* necessary. */
|
||||||
|
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == -1) __err_connect("setsockopt");
|
||||||
|
if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &lng, sizeof(lng)) == -1) __err_connect("setsockopt");
|
||||||
|
if (connect(fd, res->ai_addr, res->ai_addrlen) != 0) __err_connect("connect");
|
||||||
|
freeaddrinfo(res);
|
||||||
|
return fd;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
/* MinGW's printf has problem with "%lld" */
|
||||||
|
char *int64tostr(char *buf, int64_t x)
|
||||||
|
{
|
||||||
|
int cnt;
|
||||||
|
int i = 0;
|
||||||
|
do {
|
||||||
|
buf[i++] = '0' + x % 10;
|
||||||
|
x /= 10;
|
||||||
|
} while (x);
|
||||||
|
buf[i] = 0;
|
||||||
|
for (cnt = i, i = 0; i < cnt/2; ++i) {
|
||||||
|
int c = buf[i]; buf[i] = buf[cnt-i-1]; buf[cnt-i-1] = c;
|
||||||
|
}
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t strtoint64(const char *buf)
|
||||||
|
{
|
||||||
|
int64_t x;
|
||||||
|
for (x = 0; *buf != '\0'; ++buf)
|
||||||
|
x = x * 10 + ((int64_t) *buf - 48);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
/* In windows, the first thing is to establish the TCP connection. */
|
||||||
|
int knet_win32_init()
|
||||||
|
{
|
||||||
|
WSADATA wsaData;
|
||||||
|
return WSAStartup(MAKEWORD(2, 2), &wsaData);
|
||||||
|
}
|
||||||
|
void knet_win32_destroy()
|
||||||
|
{
|
||||||
|
WSACleanup();
|
||||||
|
}
|
||||||
|
/* A slightly modfied version of the following function also works on
|
||||||
|
* Mac (and presummably Linux). However, this function is not stable on
|
||||||
|
* my Mac. It sometimes works fine but sometimes does not. Therefore for
|
||||||
|
* non-Windows OS, I do not use this one. */
|
||||||
|
static SOCKET socket_connect(const char *host, const char *port)
|
||||||
|
{
|
||||||
|
#define __err_connect(func) \
|
||||||
|
do { \
|
||||||
|
fprintf(stderr, "%s: %d\n", func, WSAGetLastError()); \
|
||||||
|
return -1; \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
int on = 1;
|
||||||
|
SOCKET fd;
|
||||||
|
struct linger lng = { 0, 0 };
|
||||||
|
struct sockaddr_in server;
|
||||||
|
struct hostent *hp = 0;
|
||||||
|
// open socket
|
||||||
|
if ((fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == INVALID_SOCKET) __err_connect("socket");
|
||||||
|
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char*)&on, sizeof(on)) == -1) __err_connect("setsockopt");
|
||||||
|
if (setsockopt(fd, SOL_SOCKET, SO_LINGER, (char*)&lng, sizeof(lng)) == -1) __err_connect("setsockopt");
|
||||||
|
// get host info
|
||||||
|
if (isalpha(host[0])) hp = gethostbyname(host);
|
||||||
|
else {
|
||||||
|
struct in_addr addr;
|
||||||
|
addr.s_addr = inet_addr(host);
|
||||||
|
hp = gethostbyaddr((char*)&addr, 4, AF_INET);
|
||||||
|
}
|
||||||
|
if (hp == 0) __err_connect("gethost");
|
||||||
|
// connect
|
||||||
|
server.sin_addr.s_addr = *((unsigned long*)hp->h_addr);
|
||||||
|
server.sin_family= AF_INET;
|
||||||
|
server.sin_port = htons(atoi(port));
|
||||||
|
if (connect(fd, (struct sockaddr*)&server, sizeof(server)) != 0) __err_connect("connect");
|
||||||
|
// freehostent(hp); // strangely in MSDN, hp is NOT freed (memory leak?!)
|
||||||
|
return fd;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static off_t my_netread(int fd, void *buf, off_t len)
|
||||||
|
{
|
||||||
|
off_t rest = len, curr, l = 0;
|
||||||
|
/* recv() and read() may not read the required length of data with
|
||||||
|
* one call. They have to be called repeatedly. */
|
||||||
|
while (rest) {
|
||||||
|
if (socket_wait(fd, 1) <= 0) break; // socket is not ready for reading
|
||||||
|
curr = netread(fd, buf + l, rest);
|
||||||
|
/* According to the glibc manual, section 13.2, a zero returned
|
||||||
|
* value indicates end-of-file (EOF), which should mean that
|
||||||
|
* read() will not return zero if EOF has not been met but data
|
||||||
|
* are not immediately available. */
|
||||||
|
if (curr == 0) break;
|
||||||
|
l += curr; rest -= curr;
|
||||||
|
}
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*************************
|
||||||
|
* FTP specific routines *
|
||||||
|
*************************/
|
||||||
|
|
||||||
|
static int kftp_get_response(knetFile *ftp)
|
||||||
|
{
|
||||||
|
#ifndef _WIN32
|
||||||
|
unsigned char c;
|
||||||
|
#else
|
||||||
|
char c;
|
||||||
|
#endif
|
||||||
|
int n = 0;
|
||||||
|
char *p;
|
||||||
|
if (socket_wait(ftp->ctrl_fd, 1) <= 0) return 0;
|
||||||
|
while (netread(ftp->ctrl_fd, &c, 1)) { // FIXME: this is *VERY BAD* for unbuffered I/O
|
||||||
|
//fputc(c, stderr);
|
||||||
|
if (n >= ftp->max_response) {
|
||||||
|
ftp->max_response = ftp->max_response? ftp->max_response<<1 : 256;
|
||||||
|
ftp->response = (char*)realloc(ftp->response, ftp->max_response);
|
||||||
|
}
|
||||||
|
ftp->response[n++] = c;
|
||||||
|
if (c == '\n') {
|
||||||
|
if (n >= 4 && isdigit(ftp->response[0]) && isdigit(ftp->response[1]) && isdigit(ftp->response[2])
|
||||||
|
&& ftp->response[3] != '-') break;
|
||||||
|
n = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (n < 2) return -1;
|
||||||
|
ftp->response[n-2] = 0;
|
||||||
|
return strtol(ftp->response, &p, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int kftp_send_cmd(knetFile *ftp, const char *cmd, int is_get)
|
||||||
|
{
|
||||||
|
if (socket_wait(ftp->ctrl_fd, 0) <= 0) return -1; // socket is not ready for writing
|
||||||
|
netwrite(ftp->ctrl_fd, cmd, strlen(cmd));
|
||||||
|
return is_get? kftp_get_response(ftp) : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int kftp_pasv_prep(knetFile *ftp)
|
||||||
|
{
|
||||||
|
char *p;
|
||||||
|
int v[6];
|
||||||
|
kftp_send_cmd(ftp, "PASV\r\n", 1);
|
||||||
|
for (p = ftp->response; *p && *p != '('; ++p);
|
||||||
|
if (*p != '(') return -1;
|
||||||
|
++p;
|
||||||
|
sscanf(p, "%d,%d,%d,%d,%d,%d", &v[0], &v[1], &v[2], &v[3], &v[4], &v[5]);
|
||||||
|
memcpy(ftp->pasv_ip, v, 4 * sizeof(int));
|
||||||
|
ftp->pasv_port = (v[4]<<8&0xff00) + v[5];
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int kftp_pasv_connect(knetFile *ftp)
|
||||||
|
{
|
||||||
|
char host[80], port[10];
|
||||||
|
if (ftp->pasv_port == 0) {
|
||||||
|
fprintf(stderr, "[kftp_pasv_connect] kftp_pasv_prep() is not called before hand.\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
sprintf(host, "%d.%d.%d.%d", ftp->pasv_ip[0], ftp->pasv_ip[1], ftp->pasv_ip[2], ftp->pasv_ip[3]);
|
||||||
|
sprintf(port, "%d", ftp->pasv_port);
|
||||||
|
ftp->fd = socket_connect(host, port);
|
||||||
|
if (ftp->fd == -1) return -1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int kftp_connect(knetFile *ftp)
|
||||||
|
{
|
||||||
|
ftp->ctrl_fd = socket_connect(ftp->host, ftp->port);
|
||||||
|
if (ftp->ctrl_fd == -1) return -1;
|
||||||
|
kftp_get_response(ftp);
|
||||||
|
kftp_send_cmd(ftp, "USER anonymous\r\n", 1);
|
||||||
|
kftp_send_cmd(ftp, "PASS kftp@\r\n", 1);
|
||||||
|
kftp_send_cmd(ftp, "TYPE I\r\n", 1);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int kftp_reconnect(knetFile *ftp)
|
||||||
|
{
|
||||||
|
if (ftp->ctrl_fd != -1) {
|
||||||
|
netclose(ftp->ctrl_fd);
|
||||||
|
ftp->ctrl_fd = -1;
|
||||||
|
}
|
||||||
|
netclose(ftp->fd);
|
||||||
|
ftp->fd = -1;
|
||||||
|
return kftp_connect(ftp);
|
||||||
|
}
|
||||||
|
|
||||||
|
// initialize ->type, ->host, ->retr and ->size
|
||||||
|
knetFile *kftp_parse_url(const char *fn, const char *mode)
|
||||||
|
{
|
||||||
|
knetFile *fp;
|
||||||
|
char *p;
|
||||||
|
int l;
|
||||||
|
if (strstr(fn, "ftp://") != fn) return 0;
|
||||||
|
for (p = (char*)fn + 6; *p && *p != '/'; ++p);
|
||||||
|
if (*p != '/') return 0;
|
||||||
|
l = p - fn - 6;
|
||||||
|
fp = (knetFile*)calloc(1, sizeof(knetFile));
|
||||||
|
fp->type = KNF_TYPE_FTP;
|
||||||
|
fp->fd = -1;
|
||||||
|
/* the Linux/Mac version of socket_connect() also recognizes a port
|
||||||
|
* like "ftp", but the Windows version does not. */
|
||||||
|
fp->port = strdup("21");
|
||||||
|
fp->host = (char*)calloc(l + 1, 1);
|
||||||
|
if (strchr(mode, 'c')) fp->no_reconnect = 1;
|
||||||
|
strncpy(fp->host, fn + 6, l);
|
||||||
|
fp->retr = (char*)calloc(strlen(p) + 8, 1);
|
||||||
|
sprintf(fp->retr, "RETR %s\r\n", p);
|
||||||
|
fp->size_cmd = (char*)calloc(strlen(p) + 8, 1);
|
||||||
|
sprintf(fp->size_cmd, "SIZE %s\r\n", p);
|
||||||
|
fp->seek_offset = 0;
|
||||||
|
return fp;
|
||||||
|
}
|
||||||
|
// place ->fd at offset off
|
||||||
|
int kftp_connect_file(knetFile *fp)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
long long file_size;
|
||||||
|
if (fp->fd != -1) {
|
||||||
|
netclose(fp->fd);
|
||||||
|
if (fp->no_reconnect) kftp_get_response(fp);
|
||||||
|
}
|
||||||
|
kftp_pasv_prep(fp);
|
||||||
|
kftp_send_cmd(fp, fp->size_cmd, 1);
|
||||||
|
#ifndef _WIN32
|
||||||
|
if ( sscanf(fp->response,"%*d %lld", &file_size) != 1 )
|
||||||
|
{
|
||||||
|
fprintf(stderr,"[kftp_connect_file] %s\n", fp->response);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
const char *p = fp->response;
|
||||||
|
while (*p != ' ') ++p;
|
||||||
|
while (*p < '0' || *p > '9') ++p;
|
||||||
|
file_size = strtoint64(p);
|
||||||
|
#endif
|
||||||
|
fp->file_size = file_size;
|
||||||
|
if (fp->offset>=0) {
|
||||||
|
char tmp[32];
|
||||||
|
#ifndef _WIN32
|
||||||
|
sprintf(tmp, "REST %lld\r\n", (long long)fp->offset);
|
||||||
|
#else
|
||||||
|
strcpy(tmp, "REST ");
|
||||||
|
int64tostr(tmp + 5, fp->offset);
|
||||||
|
strcat(tmp, "\r\n");
|
||||||
|
#endif
|
||||||
|
kftp_send_cmd(fp, tmp, 1);
|
||||||
|
}
|
||||||
|
kftp_send_cmd(fp, fp->retr, 0);
|
||||||
|
kftp_pasv_connect(fp);
|
||||||
|
ret = kftp_get_response(fp);
|
||||||
|
if (ret != 150) {
|
||||||
|
fprintf(stderr, "[kftp_connect_file] %s\n", fp->response);
|
||||||
|
netclose(fp->fd);
|
||||||
|
fp->fd = -1;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
fp->is_ready = 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**************************
|
||||||
|
* HTTP specific routines *
|
||||||
|
**************************/
|
||||||
|
|
||||||
|
knetFile *khttp_parse_url(const char *fn, const char *mode)
|
||||||
|
{
|
||||||
|
knetFile *fp;
|
||||||
|
char *p, *proxy, *q;
|
||||||
|
int l;
|
||||||
|
if (strstr(fn, "http://") != fn) return 0;
|
||||||
|
// set ->http_host
|
||||||
|
for (p = (char*)fn + 7; *p && *p != '/'; ++p);
|
||||||
|
l = p - fn - 7;
|
||||||
|
fp = (knetFile*)calloc(1, sizeof(knetFile));
|
||||||
|
fp->http_host = (char*)calloc(l + 1, 1);
|
||||||
|
strncpy(fp->http_host, fn + 7, l);
|
||||||
|
fp->http_host[l] = 0;
|
||||||
|
for (q = fp->http_host; *q && *q != ':'; ++q);
|
||||||
|
if (*q == ':') *q++ = 0;
|
||||||
|
// get http_proxy
|
||||||
|
proxy = getenv("http_proxy");
|
||||||
|
// set ->host, ->port and ->path
|
||||||
|
if (proxy == 0) {
|
||||||
|
fp->host = strdup(fp->http_host); // when there is no proxy, server name is identical to http_host name.
|
||||||
|
fp->port = strdup(*q? q : "80");
|
||||||
|
fp->path = strdup(*p? p : "/");
|
||||||
|
} else {
|
||||||
|
fp->host = (strstr(proxy, "http://") == proxy)? strdup(proxy + 7) : strdup(proxy);
|
||||||
|
for (q = fp->host; *q && *q != ':'; ++q);
|
||||||
|
if (*q == ':') *q++ = 0;
|
||||||
|
fp->port = strdup(*q? q : "80");
|
||||||
|
fp->path = strdup(fn);
|
||||||
|
}
|
||||||
|
fp->type = KNF_TYPE_HTTP;
|
||||||
|
fp->ctrl_fd = fp->fd = -1;
|
||||||
|
fp->seek_offset = 0;
|
||||||
|
return fp;
|
||||||
|
}
|
||||||
|
|
||||||
|
int khttp_connect_file(knetFile *fp)
|
||||||
|
{
|
||||||
|
int ret, l = 0;
|
||||||
|
char *buf, *p;
|
||||||
|
if (fp->fd != -1) netclose(fp->fd);
|
||||||
|
fp->fd = socket_connect(fp->host, fp->port);
|
||||||
|
buf = (char*)calloc(0x10000, 1); // FIXME: I am lazy... But in principle, 64KB should be large enough.
|
||||||
|
l += sprintf(buf + l, "GET %s HTTP/1.0\r\nHost: %s\r\n", fp->path, fp->http_host);
|
||||||
|
l += sprintf(buf + l, "Range: bytes=%lld-\r\n", (long long)fp->offset);
|
||||||
|
l += sprintf(buf + l, "\r\n");
|
||||||
|
netwrite(fp->fd, buf, l);
|
||||||
|
l = 0;
|
||||||
|
while (netread(fp->fd, buf + l, 1)) { // read HTTP header; FIXME: bad efficiency
|
||||||
|
if (buf[l] == '\n' && l >= 3)
|
||||||
|
if (strncmp(buf + l - 3, "\r\n\r\n", 4) == 0) break;
|
||||||
|
++l;
|
||||||
|
}
|
||||||
|
buf[l] = 0;
|
||||||
|
if (l < 14) { // prematured header
|
||||||
|
netclose(fp->fd);
|
||||||
|
fp->fd = -1;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
ret = strtol(buf + 8, &p, 0); // HTTP return code
|
||||||
|
if (ret == 200 && fp->offset>0) { // 200 (complete result); then skip beginning of the file
|
||||||
|
off_t rest = fp->offset;
|
||||||
|
while (rest) {
|
||||||
|
off_t l = rest < 0x10000? rest : 0x10000;
|
||||||
|
rest -= my_netread(fp->fd, buf, l);
|
||||||
|
}
|
||||||
|
} else if (ret != 206 && ret != 200) {
|
||||||
|
free(buf);
|
||||||
|
fprintf(stderr, "[khttp_connect_file] fail to open file (HTTP code: %d).\n", ret);
|
||||||
|
netclose(fp->fd);
|
||||||
|
fp->fd = -1;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
free(buf);
|
||||||
|
fp->is_ready = 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/********************
|
||||||
|
* Generic routines *
|
||||||
|
********************/
|
||||||
|
|
||||||
|
knetFile *knet_open(const char *fn, const char *mode)
|
||||||
|
{
|
||||||
|
knetFile *fp = 0;
|
||||||
|
if (mode[0] != 'r') {
|
||||||
|
fprintf(stderr, "[kftp_open] only mode \"r\" is supported.\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (strstr(fn, "ftp://") == fn) {
|
||||||
|
fp = kftp_parse_url(fn, mode);
|
||||||
|
if (fp == 0) return 0;
|
||||||
|
if (kftp_connect(fp) == -1) {
|
||||||
|
knet_close(fp);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
kftp_connect_file(fp);
|
||||||
|
} else if (strstr(fn, "http://") == fn) {
|
||||||
|
fp = khttp_parse_url(fn, mode);
|
||||||
|
if (fp == 0) return 0;
|
||||||
|
khttp_connect_file(fp);
|
||||||
|
} else { // local file
|
||||||
|
#ifdef _WIN32
|
||||||
|
/* In windows, O_BINARY is necessary. In Linux/Mac, O_BINARY may
|
||||||
|
* be undefined on some systems, although it is defined on my
|
||||||
|
* Mac and the Linux I have tested on. */
|
||||||
|
int fd = open(fn, O_RDONLY | O_BINARY);
|
||||||
|
#else
|
||||||
|
int fd = open(fn, O_RDONLY);
|
||||||
|
#endif
|
||||||
|
if (fd == -1) {
|
||||||
|
perror("open");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
fp = (knetFile*)calloc(1, sizeof(knetFile));
|
||||||
|
fp->type = KNF_TYPE_LOCAL;
|
||||||
|
fp->fd = fd;
|
||||||
|
fp->ctrl_fd = -1;
|
||||||
|
}
|
||||||
|
if (fp && fp->fd == -1) {
|
||||||
|
knet_close(fp);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return fp;
|
||||||
|
}
|
||||||
|
|
||||||
|
knetFile *knet_dopen(int fd, const char *mode)
|
||||||
|
{
|
||||||
|
knetFile *fp = (knetFile*)calloc(1, sizeof(knetFile));
|
||||||
|
fp->type = KNF_TYPE_LOCAL;
|
||||||
|
fp->fd = fd;
|
||||||
|
return fp;
|
||||||
|
}
|
||||||
|
|
||||||
|
off_t knet_read(knetFile *fp, void *buf, off_t len)
|
||||||
|
{
|
||||||
|
off_t l = 0;
|
||||||
|
if (fp->fd == -1) return 0;
|
||||||
|
if (fp->type == KNF_TYPE_FTP) {
|
||||||
|
if (fp->is_ready == 0) {
|
||||||
|
if (!fp->no_reconnect) kftp_reconnect(fp);
|
||||||
|
kftp_connect_file(fp);
|
||||||
|
}
|
||||||
|
} else if (fp->type == KNF_TYPE_HTTP) {
|
||||||
|
if (fp->is_ready == 0)
|
||||||
|
khttp_connect_file(fp);
|
||||||
|
}
|
||||||
|
if (fp->type == KNF_TYPE_LOCAL) { // on Windows, the following block is necessary; not on UNIX
|
||||||
|
off_t rest = len, curr;
|
||||||
|
while (rest) {
|
||||||
|
do {
|
||||||
|
curr = read(fp->fd, buf + l, rest);
|
||||||
|
} while (curr < 0 && EINTR == errno);
|
||||||
|
if (curr < 0) return -1;
|
||||||
|
if (curr == 0) break;
|
||||||
|
l += curr; rest -= curr;
|
||||||
|
}
|
||||||
|
} else l = my_netread(fp->fd, buf, len);
|
||||||
|
fp->offset += l;
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
|
||||||
|
off_t knet_seek(knetFile *fp, int64_t off, int whence)
|
||||||
|
{
|
||||||
|
if (whence == SEEK_SET && off == fp->offset) return 0;
|
||||||
|
if (fp->type == KNF_TYPE_LOCAL) {
|
||||||
|
/* Be aware that lseek() returns the offset after seeking,
|
||||||
|
* while fseek() returns zero on success. */
|
||||||
|
off_t offset = lseek(fp->fd, off, whence);
|
||||||
|
if (offset == -1) {
|
||||||
|
// Be silent, it is OK for knet_seek to fail when the file is streamed
|
||||||
|
// fprintf(stderr,"[knet_seek] %s\n", strerror(errno));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
fp->offset = offset;
|
||||||
|
return off;
|
||||||
|
} else if (fp->type == KNF_TYPE_FTP) {
|
||||||
|
if (whence==SEEK_CUR)
|
||||||
|
fp->offset += off;
|
||||||
|
else if (whence==SEEK_SET)
|
||||||
|
fp->offset = off;
|
||||||
|
else if ( whence==SEEK_END)
|
||||||
|
fp->offset = fp->file_size+off;
|
||||||
|
fp->is_ready = 0;
|
||||||
|
return off;
|
||||||
|
} else if (fp->type == KNF_TYPE_HTTP) {
|
||||||
|
if (whence == SEEK_END) { // FIXME: can we allow SEEK_END in future?
|
||||||
|
fprintf(stderr, "[knet_seek] SEEK_END is not supported for HTTP. Offset is unchanged.\n");
|
||||||
|
errno = ESPIPE;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (whence==SEEK_CUR)
|
||||||
|
fp->offset += off;
|
||||||
|
else if (whence==SEEK_SET)
|
||||||
|
fp->offset = off;
|
||||||
|
fp->is_ready = 0;
|
||||||
|
return off;
|
||||||
|
}
|
||||||
|
errno = EINVAL;
|
||||||
|
fprintf(stderr,"[knet_seek] %s\n", strerror(errno));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int knet_close(knetFile *fp)
|
||||||
|
{
|
||||||
|
if (fp == 0) return 0;
|
||||||
|
if (fp->ctrl_fd != -1) netclose(fp->ctrl_fd); // FTP specific
|
||||||
|
if (fp->fd != -1) {
|
||||||
|
/* On Linux/Mac, netclose() is an alias of close(), but on
|
||||||
|
* Windows, it is an alias of closesocket(). */
|
||||||
|
if (fp->type == KNF_TYPE_LOCAL) close(fp->fd);
|
||||||
|
else netclose(fp->fd);
|
||||||
|
}
|
||||||
|
free(fp->host); free(fp->port);
|
||||||
|
free(fp->response); free(fp->retr); // FTP specific
|
||||||
|
free(fp->path); free(fp->http_host); // HTTP specific
|
||||||
|
free(fp);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef KNETFILE_MAIN
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
char *buf;
|
||||||
|
knetFile *fp;
|
||||||
|
int type = 4, l;
|
||||||
|
#ifdef _WIN32
|
||||||
|
knet_win32_init();
|
||||||
|
#endif
|
||||||
|
buf = calloc(0x100000, 1);
|
||||||
|
if (type == 0) {
|
||||||
|
fp = knet_open("knetfile.c", "r");
|
||||||
|
knet_seek(fp, 1000, SEEK_SET);
|
||||||
|
} else if (type == 1) { // NCBI FTP, large file
|
||||||
|
fp = knet_open("ftp://ftp.ncbi.nih.gov/1000genomes/ftp/data/NA12878/alignment/NA12878.chrom6.SLX.SRP000032.2009_06.bam", "r");
|
||||||
|
knet_seek(fp, 2500000000ll, SEEK_SET);
|
||||||
|
l = knet_read(fp, buf, 255);
|
||||||
|
} else if (type == 2) {
|
||||||
|
fp = knet_open("ftp://ftp.sanger.ac.uk/pub4/treefam/tmp/index.shtml", "r");
|
||||||
|
knet_seek(fp, 1000, SEEK_SET);
|
||||||
|
} else if (type == 3) {
|
||||||
|
fp = knet_open("http://www.sanger.ac.uk/Users/lh3/index.shtml", "r");
|
||||||
|
knet_seek(fp, 1000, SEEK_SET);
|
||||||
|
} else if (type == 4) {
|
||||||
|
fp = knet_open("http://www.sanger.ac.uk/Users/lh3/ex1.bam", "r");
|
||||||
|
knet_read(fp, buf, 10000);
|
||||||
|
knet_seek(fp, 20000, SEEK_SET);
|
||||||
|
knet_seek(fp, 10000, SEEK_SET);
|
||||||
|
l = knet_read(fp, buf+10000, 10000000) + 10000;
|
||||||
|
}
|
||||||
|
if (type != 4 && type != 1) {
|
||||||
|
knet_read(fp, buf, 255);
|
||||||
|
buf[255] = 0;
|
||||||
|
printf("%s\n", buf);
|
||||||
|
} else write(fileno(stdout), buf, l);
|
||||||
|
knet_close(fp);
|
||||||
|
free(buf);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,75 @@
|
||||||
|
#ifndef KNETFILE_H
|
||||||
|
#define KNETFILE_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
|
#define netread(fd, ptr, len) read(fd, ptr, len)
|
||||||
|
#define netwrite(fd, ptr, len) write(fd, ptr, len)
|
||||||
|
#define netclose(fd) close(fd)
|
||||||
|
#else
|
||||||
|
#include <winsock2.h>
|
||||||
|
#define netread(fd, ptr, len) recv(fd, ptr, len, 0)
|
||||||
|
#define netwrite(fd, ptr, len) send(fd, ptr, len, 0)
|
||||||
|
#define netclose(fd) closesocket(fd)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// FIXME: currently I/O is unbuffered
|
||||||
|
|
||||||
|
#define KNF_TYPE_LOCAL 1
|
||||||
|
#define KNF_TYPE_FTP 2
|
||||||
|
#define KNF_TYPE_HTTP 3
|
||||||
|
|
||||||
|
typedef struct knetFile_s {
|
||||||
|
int type, fd;
|
||||||
|
int64_t offset;
|
||||||
|
char *host, *port;
|
||||||
|
|
||||||
|
// the following are for FTP only
|
||||||
|
int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready;
|
||||||
|
char *response, *retr, *size_cmd;
|
||||||
|
int64_t seek_offset; // for lazy seek
|
||||||
|
int64_t file_size;
|
||||||
|
|
||||||
|
// the following are for HTTP only
|
||||||
|
char *path, *http_host;
|
||||||
|
} knetFile;
|
||||||
|
|
||||||
|
#define knet_tell(fp) ((fp)->offset)
|
||||||
|
#define knet_fileno(fp) ((fp)->fd)
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
int knet_win32_init();
|
||||||
|
void knet_win32_destroy();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
knetFile *knet_open(const char *fn, const char *mode);
|
||||||
|
|
||||||
|
/*
|
||||||
|
This only works with local files.
|
||||||
|
*/
|
||||||
|
knetFile *knet_dopen(int fd, const char *mode);
|
||||||
|
|
||||||
|
/*
|
||||||
|
If ->is_ready==0, this routine updates ->fd; otherwise, it simply
|
||||||
|
reads from ->fd.
|
||||||
|
*/
|
||||||
|
off_t knet_read(knetFile *fp, void *buf, off_t len);
|
||||||
|
|
||||||
|
/*
|
||||||
|
This routine only sets ->offset and ->is_ready=0. It does not
|
||||||
|
communicate with the FTP server.
|
||||||
|
*/
|
||||||
|
off_t knet_seek(knetFile *fp, int64_t off, int whence);
|
||||||
|
int knet_close(knetFile *fp);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,172 @@
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "knhx.h"
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int error, n, max;
|
||||||
|
knhx1_t *node;
|
||||||
|
} knaux_t;
|
||||||
|
|
||||||
|
static inline char *add_node(const char *s, knaux_t *aux, int x)
|
||||||
|
{
|
||||||
|
char *p, *nbeg, *nend = 0;
|
||||||
|
knhx1_t *r;
|
||||||
|
if (aux->n == aux->max) {
|
||||||
|
aux->max = aux->max? aux->max<<1 : 8;
|
||||||
|
aux->node = (knhx1_t*)realloc(aux->node, sizeof(knhx1_t) * aux->max);
|
||||||
|
}
|
||||||
|
r = aux->node + (aux->n++);
|
||||||
|
r->n = x; r->parent = -1;
|
||||||
|
for (p = (char*)s, nbeg = p, r->d = -1.0; *p && *p != ',' && *p != ')'; ++p) {
|
||||||
|
if (*p == '[') {
|
||||||
|
if (nend == 0) nend = p;
|
||||||
|
do ++p; while (*p && *p != ']');
|
||||||
|
if (*p == 0) {
|
||||||
|
aux->error |= KNERR_BRACKET;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else if (*p == ':') {
|
||||||
|
if (nend == 0) nend = p;
|
||||||
|
r->d = strtod(p + 1, &p);
|
||||||
|
--p;
|
||||||
|
} else if (!isgraph(*p)) if (nend == 0) nend = p;
|
||||||
|
}
|
||||||
|
if (nend == 0) nend = p;
|
||||||
|
if (nend != nbeg) {
|
||||||
|
r->name = (char*)calloc(nend - nbeg + 1, 1);
|
||||||
|
strncpy(r->name, nbeg, nend - nbeg);
|
||||||
|
} else r->name = strdup("");
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
knhx1_t *kn_parse(const char *nhx, int *_n, int *_error)
|
||||||
|
{
|
||||||
|
char *p;
|
||||||
|
int *stack, top, max;
|
||||||
|
knaux_t *aux;
|
||||||
|
knhx1_t *ret;
|
||||||
|
|
||||||
|
#define __push_back(y) do { \
|
||||||
|
if (top == max) { \
|
||||||
|
max = max? max<<1 : 16; \
|
||||||
|
stack = (int*)realloc(stack, sizeof(int) * max); \
|
||||||
|
} \
|
||||||
|
stack[top++] = (y); \
|
||||||
|
} while (0) \
|
||||||
|
|
||||||
|
stack = 0; top = max = 0;
|
||||||
|
p = (char*)nhx;
|
||||||
|
aux = (knaux_t*)calloc(1, sizeof(knaux_t));
|
||||||
|
while (*p) {
|
||||||
|
while (*p && !isgraph(*p)) ++p;
|
||||||
|
if (*p == 0) break;
|
||||||
|
if (*p == ',') ++p;
|
||||||
|
else if (*p == '(') {
|
||||||
|
__push_back(-1);
|
||||||
|
++p;
|
||||||
|
} else if (*p == ')') {
|
||||||
|
int x = aux->n, m, i;
|
||||||
|
for (i = top - 1; i >= 0; --i)
|
||||||
|
if (stack[i] < 0) break;
|
||||||
|
m = top - 1 - i;
|
||||||
|
p = add_node(p + 1, aux, m);
|
||||||
|
aux->node[x].child = (int*)calloc(m, sizeof(int));
|
||||||
|
for (i = top - 1, m = m - 1; m >= 0; --m, --i) {
|
||||||
|
aux->node[x].child[m] = stack[i];
|
||||||
|
aux->node[stack[i]].parent = x;
|
||||||
|
}
|
||||||
|
top = i;
|
||||||
|
__push_back(x);
|
||||||
|
} else {
|
||||||
|
__push_back(aux->n);
|
||||||
|
p = add_node(p, aux, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*_n = aux->n;
|
||||||
|
*_error = aux->error;
|
||||||
|
ret = aux->node;
|
||||||
|
free(aux); free(stack);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef kroundup32
|
||||||
|
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static inline int kputsn(const char *p, int l, kstring_t *s)
|
||||||
|
{
|
||||||
|
if (s->l + l + 1 >= s->m) {
|
||||||
|
s->m = s->l + l + 2;
|
||||||
|
kroundup32(s->m);
|
||||||
|
s->s = (char*)realloc(s->s, s->m);
|
||||||
|
}
|
||||||
|
memcpy(s->s + s->l, p, l);
|
||||||
|
s->l += l; s->s[s->l] = 0;
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int kputc(int c, kstring_t *s)
|
||||||
|
{
|
||||||
|
if (s->l + 1 >= s->m) {
|
||||||
|
s->m = s->l + 2;
|
||||||
|
kroundup32(s->m);
|
||||||
|
s->s = (char*)realloc(s->s, s->m);
|
||||||
|
}
|
||||||
|
s->s[s->l++] = c; s->s[s->l] = 0;
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void format_node_recur(const knhx1_t *node, const knhx1_t *p, kstring_t *s, char *numbuf)
|
||||||
|
{
|
||||||
|
if (p->n) {
|
||||||
|
int i;
|
||||||
|
kputc('(', s);
|
||||||
|
for (i = 0; i < p->n; ++i) {
|
||||||
|
if (i) kputc(',', s);
|
||||||
|
format_node_recur(node, &node[p->child[i]], s, numbuf);
|
||||||
|
}
|
||||||
|
kputc(')', s);
|
||||||
|
if (p->name) kputsn(p->name, strlen(p->name), s);
|
||||||
|
if (p->d >= 0) {
|
||||||
|
sprintf(numbuf, ":%g", p->d);
|
||||||
|
kputsn(numbuf, strlen(numbuf), s);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
kputsn(p->name, strlen(p->name), s);
|
||||||
|
if (p->d >= 0) {
|
||||||
|
sprintf(numbuf, ":%g", p->d);
|
||||||
|
kputsn(numbuf, strlen(numbuf), s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void kn_format(const knhx1_t *node, int root, kstring_t *s) // TODO: get rid of recursion
|
||||||
|
{
|
||||||
|
char numbuf[128];
|
||||||
|
format_node_recur(node, &node[root], s, numbuf);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef KNHX_MAIN
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
char *s = "((a[abc],d1)x:0.5,((b[&&NHX:S=MOUSE],h2)[&&NHX:S=HUMAN:B=99][blabla][&&NHX:K=foo],c))";
|
||||||
|
knhx1_t *node;
|
||||||
|
int i, j, n, error;
|
||||||
|
kstring_t str;
|
||||||
|
node = kn_parse(s, &n, &error);
|
||||||
|
for (i = 0; i < n; ++i) {
|
||||||
|
knhx1_t *p = node + i;
|
||||||
|
printf("[%d] %s\t%d\t%d\t%g", i, p->name, p->parent, p->n, p->d);
|
||||||
|
for (j = 0; j < p->n; ++j)
|
||||||
|
printf("\t%d", p->child[j]);
|
||||||
|
putchar('\n');
|
||||||
|
}
|
||||||
|
str.l = str.m = 0; str.s = 0;
|
||||||
|
kn_format(node, n-1, &str);
|
||||||
|
puts(str.s);
|
||||||
|
free(str.s);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,35 @@
|
||||||
|
#ifndef KNHX_H_
|
||||||
|
#define KNHX_H_
|
||||||
|
|
||||||
|
#define KNERR_MISSING_LEFT 0x01
|
||||||
|
#define KNERR_MISSING_RGHT 0x02
|
||||||
|
#define KNERR_BRACKET 0x04
|
||||||
|
#define KNERR_COLON 0x08
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int parent, n;
|
||||||
|
int *child;
|
||||||
|
char *name;
|
||||||
|
double d;
|
||||||
|
} knhx1_t;
|
||||||
|
|
||||||
|
#ifndef KSTRING_T
|
||||||
|
#define KSTRING_T kstring_t
|
||||||
|
typedef struct __kstring_t {
|
||||||
|
size_t l, m;
|
||||||
|
char *s;
|
||||||
|
} kstring_t;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
knhx1_t *kn_parse(const char *nhx, int *_n, int *_error);
|
||||||
|
void kn_format(const knhx1_t *node, int root, kstring_t *s);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,343 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <signal.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#ifndef _WIN32
|
||||||
|
#include <netdb.h>
|
||||||
|
#include <arpa/inet.h>
|
||||||
|
#include <sys/socket.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#define _KO_NO_NET
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef _KO_NO_NET
|
||||||
|
static int socket_wait(int fd, int is_read)
|
||||||
|
{
|
||||||
|
fd_set fds, *fdr = 0, *fdw = 0;
|
||||||
|
struct timeval tv;
|
||||||
|
int ret;
|
||||||
|
tv.tv_sec = 5; tv.tv_usec = 0; // 5 seconds time out
|
||||||
|
FD_ZERO(&fds);
|
||||||
|
FD_SET(fd, &fds);
|
||||||
|
if (is_read) fdr = &fds;
|
||||||
|
else fdw = &fds;
|
||||||
|
ret = select(fd+1, fdr, fdw, 0, &tv);
|
||||||
|
if (ret == -1) perror("select");
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int socket_connect(const char *host, const char *port)
|
||||||
|
{
|
||||||
|
#define __err_connect(func) do { perror(func); freeaddrinfo(res); return -1; } while (0)
|
||||||
|
|
||||||
|
int ai_err, on = 1, fd;
|
||||||
|
struct linger lng = { 0, 0 };
|
||||||
|
struct addrinfo hints, *res = 0;
|
||||||
|
memset(&hints, 0, sizeof(struct addrinfo));
|
||||||
|
hints.ai_family = AF_UNSPEC;
|
||||||
|
hints.ai_socktype = SOCK_STREAM;
|
||||||
|
if ((ai_err = getaddrinfo(host, port, &hints, &res)) != 0) { fprintf(stderr, "can't resolve %s:%s: %s\n", host, port, gai_strerror(ai_err)); return -1; }
|
||||||
|
if ((fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol)) == -1) __err_connect("socket");
|
||||||
|
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == -1) __err_connect("setsockopt");
|
||||||
|
if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &lng, sizeof(lng)) == -1) __err_connect("setsockopt");
|
||||||
|
if (connect(fd, res->ai_addr, res->ai_addrlen) != 0) __err_connect("connect");
|
||||||
|
freeaddrinfo(res);
|
||||||
|
return fd;
|
||||||
|
#undef __err_connect
|
||||||
|
}
|
||||||
|
|
||||||
|
static int http_open(const char *fn)
|
||||||
|
{
|
||||||
|
char *p, *proxy, *q, *http_host, *host, *port, *path, *buf;
|
||||||
|
int fd, ret, l;
|
||||||
|
|
||||||
|
/* parse URL; adapted from khttp_parse_url() in knetfile.c */
|
||||||
|
if (strstr(fn, "http://") != fn) return 0;
|
||||||
|
// set ->http_host
|
||||||
|
for (p = (char*)fn + 7; *p && *p != '/'; ++p);
|
||||||
|
l = p - fn - 7;
|
||||||
|
http_host = calloc(l + 1, 1);
|
||||||
|
strncpy(http_host, fn + 7, l);
|
||||||
|
http_host[l] = 0;
|
||||||
|
for (q = http_host; *q && *q != ':'; ++q);
|
||||||
|
if (*q == ':') *q++ = 0;
|
||||||
|
// get http_proxy
|
||||||
|
proxy = getenv("http_proxy");
|
||||||
|
// set host, port and path
|
||||||
|
if (proxy == 0) {
|
||||||
|
host = strdup(http_host); // when there is no proxy, server name is identical to http_host name.
|
||||||
|
port = strdup(*q? q : "80");
|
||||||
|
path = strdup(*p? p : "/");
|
||||||
|
} else {
|
||||||
|
host = (strstr(proxy, "http://") == proxy)? strdup(proxy + 7) : strdup(proxy);
|
||||||
|
for (q = host; *q && *q != ':'; ++q);
|
||||||
|
if (*q == ':') *q++ = 0;
|
||||||
|
port = strdup(*q? q : "80");
|
||||||
|
path = strdup(fn);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* connect; adapted from khttp_connect() in knetfile.c */
|
||||||
|
l = 0;
|
||||||
|
fd = socket_connect(host, port);
|
||||||
|
buf = calloc(0x10000, 1); // FIXME: I am lazy... But in principle, 64KB should be large enough.
|
||||||
|
l += sprintf(buf + l, "GET %s HTTP/1.0\r\nHost: %s\r\n", path, http_host);
|
||||||
|
l += sprintf(buf + l, "\r\n");
|
||||||
|
write(fd, buf, l);
|
||||||
|
l = 0;
|
||||||
|
while (read(fd, buf + l, 1)) { // read HTTP header; FIXME: bad efficiency
|
||||||
|
if (buf[l] == '\n' && l >= 3)
|
||||||
|
if (strncmp(buf + l - 3, "\r\n\r\n", 4) == 0) break;
|
||||||
|
++l;
|
||||||
|
}
|
||||||
|
buf[l] = 0;
|
||||||
|
if (l < 14) { // prematured header
|
||||||
|
close(fd);
|
||||||
|
fd = -1;
|
||||||
|
}
|
||||||
|
ret = strtol(buf + 8, &p, 0); // HTTP return code
|
||||||
|
if (ret != 200) {
|
||||||
|
close(fd);
|
||||||
|
fd = -1;
|
||||||
|
}
|
||||||
|
free(buf); free(http_host); free(host); free(port); free(path);
|
||||||
|
return fd;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int max_response, ctrl_fd;
|
||||||
|
char *response;
|
||||||
|
} ftpaux_t;
|
||||||
|
|
||||||
|
static int kftp_get_response(ftpaux_t *aux)
|
||||||
|
{
|
||||||
|
unsigned char c;
|
||||||
|
int n = 0;
|
||||||
|
char *p;
|
||||||
|
if (socket_wait(aux->ctrl_fd, 1) <= 0) return 0;
|
||||||
|
while (read(aux->ctrl_fd, &c, 1)) { // FIXME: this is *VERY BAD* for unbuffered I/O
|
||||||
|
if (n >= aux->max_response) {
|
||||||
|
aux->max_response = aux->max_response? aux->max_response<<1 : 256;
|
||||||
|
aux->response = realloc(aux->response, aux->max_response);
|
||||||
|
}
|
||||||
|
aux->response[n++] = c;
|
||||||
|
if (c == '\n') {
|
||||||
|
if (n >= 4 && isdigit(aux->response[0]) && isdigit(aux->response[1]) && isdigit(aux->response[2])
|
||||||
|
&& aux->response[3] != '-') break;
|
||||||
|
n = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (n < 2) return -1;
|
||||||
|
aux->response[n-2] = 0;
|
||||||
|
return strtol(aux->response, &p, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int kftp_send_cmd(ftpaux_t *aux, const char *cmd, int is_get)
|
||||||
|
{
|
||||||
|
if (socket_wait(aux->ctrl_fd, 0) <= 0) return -1; // socket is not ready for writing
|
||||||
|
write(aux->ctrl_fd, cmd, strlen(cmd));
|
||||||
|
return is_get? kftp_get_response(aux) : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ftp_open(const char *fn)
|
||||||
|
{
|
||||||
|
char *p, *host = 0, *port = 0, *retr = 0;
|
||||||
|
char host2[80], port2[10];
|
||||||
|
int v[6], l, fd = -1, ret, pasv_port, pasv_ip[4];
|
||||||
|
ftpaux_t aux;
|
||||||
|
|
||||||
|
/* parse URL */
|
||||||
|
if (strstr(fn, "ftp://") != fn) return 0;
|
||||||
|
for (p = (char*)fn + 6; *p && *p != '/'; ++p);
|
||||||
|
if (*p != '/') return 0;
|
||||||
|
l = p - fn - 6;
|
||||||
|
port = strdup("21");
|
||||||
|
host = calloc(l + 1, 1);
|
||||||
|
strncpy(host, fn + 6, l);
|
||||||
|
retr = calloc(strlen(p) + 8, 1);
|
||||||
|
sprintf(retr, "RETR %s\r\n", p);
|
||||||
|
|
||||||
|
/* connect to ctrl */
|
||||||
|
memset(&aux, 0, sizeof(ftpaux_t));
|
||||||
|
aux.ctrl_fd = socket_connect(host, port);
|
||||||
|
if (aux.ctrl_fd == -1) goto ftp_open_end; /* fail to connect ctrl */
|
||||||
|
|
||||||
|
/* connect to the data stream */
|
||||||
|
kftp_get_response(&aux);
|
||||||
|
kftp_send_cmd(&aux, "USER anonymous\r\n", 1);
|
||||||
|
kftp_send_cmd(&aux, "PASS kopen@\r\n", 1);
|
||||||
|
kftp_send_cmd(&aux, "TYPE I\r\n", 1);
|
||||||
|
kftp_send_cmd(&aux, "PASV\r\n", 1);
|
||||||
|
for (p = aux.response; *p && *p != '('; ++p);
|
||||||
|
if (*p != '(') goto ftp_open_end;
|
||||||
|
++p;
|
||||||
|
sscanf(p, "%d,%d,%d,%d,%d,%d", &v[0], &v[1], &v[2], &v[3], &v[4], &v[5]);
|
||||||
|
memcpy(pasv_ip, v, 4 * sizeof(int));
|
||||||
|
pasv_port = (v[4]<<8&0xff00) + v[5];
|
||||||
|
kftp_send_cmd(&aux, retr, 0);
|
||||||
|
sprintf(host2, "%d.%d.%d.%d", pasv_ip[0], pasv_ip[1], pasv_ip[2], pasv_ip[3]);
|
||||||
|
sprintf(port2, "%d", pasv_port);
|
||||||
|
fd = socket_connect(host2, port2);
|
||||||
|
if (fd == -1) goto ftp_open_end;
|
||||||
|
ret = kftp_get_response(&aux);
|
||||||
|
if (ret != 150) {
|
||||||
|
close(fd);
|
||||||
|
fd = -1;
|
||||||
|
}
|
||||||
|
close(aux.ctrl_fd);
|
||||||
|
|
||||||
|
ftp_open_end:
|
||||||
|
free(host); free(port); free(retr); free(aux.response);
|
||||||
|
return fd;
|
||||||
|
}
|
||||||
|
#endif /* !defined(_KO_NO_NET) */
|
||||||
|
|
||||||
|
static char **cmd2argv(const char *cmd)
|
||||||
|
{
|
||||||
|
int i, beg, end, argc;
|
||||||
|
char **argv, *p, *q, *str;
|
||||||
|
end = strlen(cmd);
|
||||||
|
for (i = end - 1; i >= 0; --i)
|
||||||
|
if (!isspace(cmd[i])) break;
|
||||||
|
end = i + 1;
|
||||||
|
for (beg = 0; beg < end; ++beg)
|
||||||
|
if (!isspace(cmd[beg])) break;
|
||||||
|
if (beg == end) return 0;
|
||||||
|
for (i = beg + 1, argc = 0; i < end; ++i)
|
||||||
|
if (isspace(cmd[i]) && !isspace(cmd[i-1]))
|
||||||
|
++argc;
|
||||||
|
argv = (char**)calloc(argc + 2, sizeof(void*));
|
||||||
|
argv[0] = str = (char*)calloc(end - beg + 1, 1);
|
||||||
|
strncpy(argv[0], cmd + beg, end - beg);
|
||||||
|
for (i = argc = 1, q = p = str; i < end - beg; ++i)
|
||||||
|
if (isspace(str[i])) str[i] = 0;
|
||||||
|
else if (str[i] && str[i-1] == 0) argv[argc++] = &str[i];
|
||||||
|
return argv;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define KO_STDIN 1
|
||||||
|
#define KO_FILE 2
|
||||||
|
#define KO_PIPE 3
|
||||||
|
#define KO_HTTP 4
|
||||||
|
#define KO_FTP 5
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int type, fd;
|
||||||
|
pid_t pid;
|
||||||
|
} koaux_t;
|
||||||
|
|
||||||
|
void *kopen(const char *fn, int *_fd)
|
||||||
|
{
|
||||||
|
koaux_t *aux = 0;
|
||||||
|
*_fd = -1;
|
||||||
|
if (strstr(fn, "http://") == fn) {
|
||||||
|
aux = calloc(1, sizeof(koaux_t));
|
||||||
|
aux->type = KO_HTTP;
|
||||||
|
aux->fd = http_open(fn);
|
||||||
|
} else if (strstr(fn, "ftp://") == fn) {
|
||||||
|
aux = calloc(1, sizeof(koaux_t));
|
||||||
|
aux->type = KO_FTP;
|
||||||
|
aux->fd = ftp_open(fn);
|
||||||
|
} else if (strcmp(fn, "-") == 0) {
|
||||||
|
aux = calloc(1, sizeof(koaux_t));
|
||||||
|
aux->type = KO_STDIN;
|
||||||
|
aux->fd = STDIN_FILENO;
|
||||||
|
} else {
|
||||||
|
const char *p, *q;
|
||||||
|
for (p = fn; *p; ++p)
|
||||||
|
if (!isspace(*p)) break;
|
||||||
|
if (*p == '<') { // pipe open
|
||||||
|
int need_shell, pfd[2];
|
||||||
|
pid_t pid;
|
||||||
|
// a simple check to see if we need to invoke a shell; not always working
|
||||||
|
for (q = p + 1; *q; ++q)
|
||||||
|
if (ispunct(*q) && *q != '.' && *q != '_' && *q != '-' && *q != ':')
|
||||||
|
break;
|
||||||
|
need_shell = (*q != 0);
|
||||||
|
pipe(pfd);
|
||||||
|
pid = vfork();
|
||||||
|
if (pid == -1) { /* vfork() error */
|
||||||
|
close(pfd[0]); close(pfd[1]);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (pid == 0) { /* the child process */
|
||||||
|
char **argv; /* FIXME: I do not know if this will lead to a memory leak */
|
||||||
|
close(pfd[0]);
|
||||||
|
dup2(pfd[1], STDOUT_FILENO);
|
||||||
|
close(pfd[1]);
|
||||||
|
if (!need_shell) {
|
||||||
|
argv = cmd2argv(p + 1);
|
||||||
|
execvp(argv[0], argv);
|
||||||
|
free(argv[0]); free(argv);
|
||||||
|
} else execl("/bin/sh", "sh", "-c", p + 1, NULL);
|
||||||
|
exit(1);
|
||||||
|
} else { /* parent process */
|
||||||
|
close(pfd[1]);
|
||||||
|
aux = calloc(1, sizeof(koaux_t));
|
||||||
|
aux->type = KO_PIPE;
|
||||||
|
aux->fd = pfd[0];
|
||||||
|
aux->pid = pid;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
#ifdef _WIN32
|
||||||
|
*_fd = open(fn, O_RDONLY | O_BINARY);
|
||||||
|
#else
|
||||||
|
*_fd = open(fn, O_RDONLY);
|
||||||
|
#endif
|
||||||
|
if (*_fd) {
|
||||||
|
aux = calloc(1, sizeof(koaux_t));
|
||||||
|
aux->type = KO_FILE;
|
||||||
|
aux->fd = *_fd;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*_fd = aux->fd;
|
||||||
|
return aux;
|
||||||
|
}
|
||||||
|
|
||||||
|
int kclose(void *a)
|
||||||
|
{
|
||||||
|
koaux_t *aux = (koaux_t*)a;
|
||||||
|
if (aux->type == KO_PIPE) {
|
||||||
|
int status;
|
||||||
|
pid_t pid;
|
||||||
|
pid = waitpid(aux->pid, &status, WNOHANG);
|
||||||
|
if (pid != aux->pid) kill(aux->pid, 15);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _KO_MAIN
|
||||||
|
#define BUF_SIZE 0x10000
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
void *x;
|
||||||
|
int l, fd;
|
||||||
|
unsigned char buf[BUF_SIZE];
|
||||||
|
FILE *fp;
|
||||||
|
if (argc == 1) {
|
||||||
|
fprintf(stderr, "Usage: kopen <file>\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
x = kopen(argv[1], &fd);
|
||||||
|
fp = fdopen(fd, "r");
|
||||||
|
if (fp == 0) {
|
||||||
|
fprintf(stderr, "ERROR: fail to open the input\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
do {
|
||||||
|
if ((l = fread(buf, 1, BUF_SIZE, fp)) != 0)
|
||||||
|
fwrite(buf, 1, l, stdout);
|
||||||
|
} while (l == BUF_SIZE);
|
||||||
|
fclose(fp);
|
||||||
|
kclose(x);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,474 @@
|
||||||
|
/* The MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2019 by Attractive Chaos <attractor@live.co.uk>
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of this software and associated documentation files (the
|
||||||
|
"Software"), to deal in the Software without restriction, including
|
||||||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be
|
||||||
|
included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||||
|
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||||
|
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* An example:
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "krmq.h"
|
||||||
|
|
||||||
|
struct my_node {
|
||||||
|
char key;
|
||||||
|
KRMQ_HEAD(struct my_node) head;
|
||||||
|
};
|
||||||
|
#define my_cmp(p, q) (((q)->key < (p)->key) - ((p)->key < (q)->key))
|
||||||
|
KRMQ_INIT(my, struct my_node, head, my_cmp)
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
const char *str = "MNOLKQOPHIA"; // from wiki, except a duplicate
|
||||||
|
struct my_node *root = 0;
|
||||||
|
int i, l = strlen(str);
|
||||||
|
for (i = 0; i < l; ++i) { // insert in the input order
|
||||||
|
struct my_node *q, *p = malloc(sizeof(*p));
|
||||||
|
p->key = str[i];
|
||||||
|
q = krmq_insert(my, &root, p, 0);
|
||||||
|
if (p != q) free(p); // if already present, free
|
||||||
|
}
|
||||||
|
krmq_itr_t(my) itr;
|
||||||
|
krmq_itr_first(my, root, &itr); // place at first
|
||||||
|
do { // traverse
|
||||||
|
const struct my_node *p = krmq_at(&itr);
|
||||||
|
putchar(p->key);
|
||||||
|
free((void*)p); // free node
|
||||||
|
} while (krmq_itr_next(my, &itr));
|
||||||
|
putchar('\n');
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KRMQ_H
|
||||||
|
#define KRMQ_H
|
||||||
|
|
||||||
|
#ifdef __STRICT_ANSI__
|
||||||
|
#define inline __inline__
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define KRMQ_MAX_DEPTH 64
|
||||||
|
|
||||||
|
#define krmq_size(head, p) ((p)? (p)->head.size : 0)
|
||||||
|
#define krmq_size_child(head, q, i) ((q)->head.p[(i)]? (q)->head.p[(i)]->head.size : 0)
|
||||||
|
|
||||||
|
#define KRMQ_HEAD(__type) \
|
||||||
|
struct { \
|
||||||
|
__type *p[2], *s; \
|
||||||
|
signed char balance; /* balance factor */ \
|
||||||
|
unsigned size; /* #elements in subtree */ \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define __KRMQ_FIND(suf, __scope, __type, __head, __cmp) \
|
||||||
|
__scope __type *krmq_find_##suf(const __type *root, const __type *x, unsigned *cnt_) { \
|
||||||
|
const __type *p = root; \
|
||||||
|
unsigned cnt = 0; \
|
||||||
|
while (p != 0) { \
|
||||||
|
int cmp; \
|
||||||
|
cmp = __cmp(x, p); \
|
||||||
|
if (cmp >= 0) cnt += krmq_size_child(__head, p, 0) + 1; \
|
||||||
|
if (cmp < 0) p = p->__head.p[0]; \
|
||||||
|
else if (cmp > 0) p = p->__head.p[1]; \
|
||||||
|
else break; \
|
||||||
|
} \
|
||||||
|
if (cnt_) *cnt_ = cnt; \
|
||||||
|
return (__type*)p; \
|
||||||
|
} \
|
||||||
|
__scope __type *krmq_interval_##suf(const __type *root, const __type *x, __type **lower, __type **upper) { \
|
||||||
|
const __type *p = root, *l = 0, *u = 0; \
|
||||||
|
while (p != 0) { \
|
||||||
|
int cmp; \
|
||||||
|
cmp = __cmp(x, p); \
|
||||||
|
if (cmp < 0) u = p, p = p->__head.p[0]; \
|
||||||
|
else if (cmp > 0) l = p, p = p->__head.p[1]; \
|
||||||
|
else { l = u = p; break; } \
|
||||||
|
} \
|
||||||
|
if (lower) *lower = (__type*)l; \
|
||||||
|
if (upper) *upper = (__type*)u; \
|
||||||
|
return (__type*)p; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define __KRMQ_RMQ(suf, __scope, __type, __head, __cmp, __lt2) \
|
||||||
|
__scope __type *krmq_rmq_##suf(const __type *root, const __type *lo, const __type *up) { /* CLOSED interval */ \
|
||||||
|
const __type *p = root, *path[2][KRMQ_MAX_DEPTH], *min; \
|
||||||
|
int plen[2] = {0, 0}, pcmp[2][KRMQ_MAX_DEPTH], i, cmp, lca; \
|
||||||
|
if (root == 0) return 0; \
|
||||||
|
while (p) { \
|
||||||
|
cmp = __cmp(lo, p); \
|
||||||
|
path[0][plen[0]] = p, pcmp[0][plen[0]++] = cmp; \
|
||||||
|
if (cmp < 0) p = p->__head.p[0]; \
|
||||||
|
else if (cmp > 0) p = p->__head.p[1]; \
|
||||||
|
else break; \
|
||||||
|
} \
|
||||||
|
p = root; \
|
||||||
|
while (p) { \
|
||||||
|
cmp = __cmp(up, p); \
|
||||||
|
path[1][plen[1]] = p, pcmp[1][plen[1]++] = cmp; \
|
||||||
|
if (cmp < 0) p = p->__head.p[0]; \
|
||||||
|
else if (cmp > 0) p = p->__head.p[1]; \
|
||||||
|
else break; \
|
||||||
|
} \
|
||||||
|
for (i = 0; i < plen[0] && i < plen[1]; ++i) /* find the LCA */ \
|
||||||
|
if (path[0][i] == path[1][i] && pcmp[0][i] <= 0 && pcmp[1][i] >= 0) \
|
||||||
|
break; \
|
||||||
|
if (i == plen[0] || i == plen[1]) return 0; /* no elements in the closed interval */ \
|
||||||
|
lca = i, min = path[0][lca]; \
|
||||||
|
for (i = lca + 1; i < plen[0]; ++i) { \
|
||||||
|
if (pcmp[0][i] <= 0) { \
|
||||||
|
if (__lt2(path[0][i], min)) min = path[0][i]; \
|
||||||
|
if (path[0][i]->__head.p[1] && __lt2(path[0][i]->__head.p[1]->__head.s, min)) \
|
||||||
|
min = path[0][i]->__head.p[1]->__head.s; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
for (i = lca + 1; i < plen[1]; ++i) { \
|
||||||
|
if (pcmp[1][i] >= 0) { \
|
||||||
|
if (__lt2(path[1][i], min)) min = path[1][i]; \
|
||||||
|
if (path[1][i]->__head.p[0] && __lt2(path[1][i]->__head.p[0]->__head.s, min)) \
|
||||||
|
min = path[1][i]->__head.p[0]->__head.s; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
return (__type*)min; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define __KRMQ_ROTATE(suf, __type, __head, __lt2) \
|
||||||
|
/* */ \
|
||||||
|
static inline void krmq_update_min_##suf(__type *p, const __type *q, const __type *r) { \
|
||||||
|
p->__head.s = !q || __lt2(p, q->__head.s)? p : q->__head.s; \
|
||||||
|
p->__head.s = !r || __lt2(p->__head.s, r->__head.s)? p->__head.s : r->__head.s; \
|
||||||
|
} \
|
||||||
|
/* one rotation: (a,(b,c)q)p => ((a,b)p,c)q */ \
|
||||||
|
static inline __type *krmq_rotate1_##suf(__type *p, int dir) { /* dir=0 to left; dir=1 to right */ \
|
||||||
|
int opp = 1 - dir; /* opposite direction */ \
|
||||||
|
__type *q = p->__head.p[opp], *s = p->__head.s; \
|
||||||
|
unsigned size_p = p->__head.size; \
|
||||||
|
p->__head.size -= q->__head.size - krmq_size_child(__head, q, dir); \
|
||||||
|
q->__head.size = size_p; \
|
||||||
|
krmq_update_min_##suf(p, p->__head.p[dir], q->__head.p[dir]); \
|
||||||
|
q->__head.s = s; \
|
||||||
|
p->__head.p[opp] = q->__head.p[dir]; \
|
||||||
|
q->__head.p[dir] = p; \
|
||||||
|
return q; \
|
||||||
|
} \
|
||||||
|
/* two consecutive rotations: (a,((b,c)r,d)q)p => ((a,b)p,(c,d)q)r */ \
|
||||||
|
static inline __type *krmq_rotate2_##suf(__type *p, int dir) { \
|
||||||
|
int b1, opp = 1 - dir; \
|
||||||
|
__type *q = p->__head.p[opp], *r = q->__head.p[dir], *s = p->__head.s; \
|
||||||
|
unsigned size_x_dir = krmq_size_child(__head, r, dir); \
|
||||||
|
r->__head.size = p->__head.size; \
|
||||||
|
p->__head.size -= q->__head.size - size_x_dir; \
|
||||||
|
q->__head.size -= size_x_dir + 1; \
|
||||||
|
krmq_update_min_##suf(p, p->__head.p[dir], r->__head.p[dir]); \
|
||||||
|
krmq_update_min_##suf(q, q->__head.p[opp], r->__head.p[opp]); \
|
||||||
|
r->__head.s = s; \
|
||||||
|
p->__head.p[opp] = r->__head.p[dir]; \
|
||||||
|
r->__head.p[dir] = p; \
|
||||||
|
q->__head.p[dir] = r->__head.p[opp]; \
|
||||||
|
r->__head.p[opp] = q; \
|
||||||
|
b1 = dir == 0? +1 : -1; \
|
||||||
|
if (r->__head.balance == b1) q->__head.balance = 0, p->__head.balance = -b1; \
|
||||||
|
else if (r->__head.balance == 0) q->__head.balance = p->__head.balance = 0; \
|
||||||
|
else q->__head.balance = b1, p->__head.balance = 0; \
|
||||||
|
r->__head.balance = 0; \
|
||||||
|
return r; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define __KRMQ_INSERT(suf, __scope, __type, __head, __cmp, __lt2) \
|
||||||
|
__scope __type *krmq_insert_##suf(__type **root_, __type *x, unsigned *cnt_) { \
|
||||||
|
unsigned char stack[KRMQ_MAX_DEPTH]; \
|
||||||
|
__type *path[KRMQ_MAX_DEPTH]; \
|
||||||
|
__type *bp, *bq; \
|
||||||
|
__type *p, *q, *r = 0; /* _r_ is potentially the new root */ \
|
||||||
|
int i, which = 0, top, b1, path_len; \
|
||||||
|
unsigned cnt = 0; \
|
||||||
|
bp = *root_, bq = 0; \
|
||||||
|
/* find the insertion location */ \
|
||||||
|
for (p = bp, q = bq, top = path_len = 0; p; q = p, p = p->__head.p[which]) { \
|
||||||
|
int cmp; \
|
||||||
|
cmp = __cmp(x, p); \
|
||||||
|
if (cmp >= 0) cnt += krmq_size_child(__head, p, 0) + 1; \
|
||||||
|
if (cmp == 0) { \
|
||||||
|
if (cnt_) *cnt_ = cnt; \
|
||||||
|
return p; \
|
||||||
|
} \
|
||||||
|
if (p->__head.balance != 0) \
|
||||||
|
bq = q, bp = p, top = 0; \
|
||||||
|
stack[top++] = which = (cmp > 0); \
|
||||||
|
path[path_len++] = p; \
|
||||||
|
} \
|
||||||
|
if (cnt_) *cnt_ = cnt; \
|
||||||
|
x->__head.balance = 0, x->__head.size = 1, x->__head.p[0] = x->__head.p[1] = 0, x->__head.s = x; \
|
||||||
|
if (q == 0) *root_ = x; \
|
||||||
|
else q->__head.p[which] = x; \
|
||||||
|
if (bp == 0) return x; \
|
||||||
|
for (i = 0; i < path_len; ++i) ++path[i]->__head.size; \
|
||||||
|
for (i = path_len - 1; i >= 0; --i) { \
|
||||||
|
krmq_update_min_##suf(path[i], path[i]->__head.p[0], path[i]->__head.p[1]); \
|
||||||
|
if (path[i]->__head.s != x) break; \
|
||||||
|
} \
|
||||||
|
for (p = bp, top = 0; p != x; p = p->__head.p[stack[top]], ++top) /* update balance factors */ \
|
||||||
|
if (stack[top] == 0) --p->__head.balance; \
|
||||||
|
else ++p->__head.balance; \
|
||||||
|
if (bp->__head.balance > -2 && bp->__head.balance < 2) return x; /* no re-balance needed */ \
|
||||||
|
/* re-balance */ \
|
||||||
|
which = (bp->__head.balance < 0); \
|
||||||
|
b1 = which == 0? +1 : -1; \
|
||||||
|
q = bp->__head.p[1 - which]; \
|
||||||
|
if (q->__head.balance == b1) { \
|
||||||
|
r = krmq_rotate1_##suf(bp, which); \
|
||||||
|
q->__head.balance = bp->__head.balance = 0; \
|
||||||
|
} else r = krmq_rotate2_##suf(bp, which); \
|
||||||
|
if (bq == 0) *root_ = r; \
|
||||||
|
else bq->__head.p[bp != bq->__head.p[0]] = r; \
|
||||||
|
return x; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define __KRMQ_ERASE(suf, __scope, __type, __head, __cmp, __lt2) \
|
||||||
|
__scope __type *krmq_erase_##suf(__type **root_, const __type *x, unsigned *cnt_) { \
|
||||||
|
__type *p, *path[KRMQ_MAX_DEPTH], fake; \
|
||||||
|
unsigned char dir[KRMQ_MAX_DEPTH]; \
|
||||||
|
int i, d = 0, cmp; \
|
||||||
|
unsigned cnt = 0; \
|
||||||
|
fake.__head.p[0] = *root_, fake.__head.p[1] = 0; \
|
||||||
|
if (cnt_) *cnt_ = 0; \
|
||||||
|
if (x) { \
|
||||||
|
for (cmp = -1, p = &fake; cmp; cmp = __cmp(x, p)) { \
|
||||||
|
int which = (cmp > 0); \
|
||||||
|
if (cmp > 0) cnt += krmq_size_child(__head, p, 0) + 1; \
|
||||||
|
dir[d] = which; \
|
||||||
|
path[d++] = p; \
|
||||||
|
p = p->__head.p[which]; \
|
||||||
|
if (p == 0) { \
|
||||||
|
if (cnt_) *cnt_ = 0; \
|
||||||
|
return 0; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
cnt += krmq_size_child(__head, p, 0) + 1; /* because p==x is not counted */ \
|
||||||
|
} else { \
|
||||||
|
for (p = &fake, cnt = 1; p; p = p->__head.p[0]) \
|
||||||
|
dir[d] = 0, path[d++] = p; \
|
||||||
|
p = path[--d]; \
|
||||||
|
} \
|
||||||
|
if (cnt_) *cnt_ = cnt; \
|
||||||
|
for (i = 1; i < d; ++i) --path[i]->__head.size; \
|
||||||
|
if (p->__head.p[1] == 0) { /* ((1,.)2,3)4 => (1,3)4; p=2 */ \
|
||||||
|
path[d-1]->__head.p[dir[d-1]] = p->__head.p[0]; \
|
||||||
|
} else { \
|
||||||
|
__type *q = p->__head.p[1]; \
|
||||||
|
if (q->__head.p[0] == 0) { /* ((1,2)3,4)5 => ((1)2,4)5; p=3,q=2 */ \
|
||||||
|
q->__head.p[0] = p->__head.p[0]; \
|
||||||
|
q->__head.balance = p->__head.balance; \
|
||||||
|
path[d-1]->__head.p[dir[d-1]] = q; \
|
||||||
|
path[d] = q, dir[d++] = 1; \
|
||||||
|
q->__head.size = p->__head.size - 1; \
|
||||||
|
} else { /* ((1,((.,2)3,4)5)6,7)8 => ((1,(2,4)5)3,7)8; p=6 */ \
|
||||||
|
__type *r; \
|
||||||
|
int e = d++; /* backup _d_ */\
|
||||||
|
for (;;) { \
|
||||||
|
dir[d] = 0; \
|
||||||
|
path[d++] = q; \
|
||||||
|
r = q->__head.p[0]; \
|
||||||
|
if (r->__head.p[0] == 0) break; \
|
||||||
|
q = r; \
|
||||||
|
} \
|
||||||
|
r->__head.p[0] = p->__head.p[0]; \
|
||||||
|
q->__head.p[0] = r->__head.p[1]; \
|
||||||
|
r->__head.p[1] = p->__head.p[1]; \
|
||||||
|
r->__head.balance = p->__head.balance; \
|
||||||
|
path[e-1]->__head.p[dir[e-1]] = r; \
|
||||||
|
path[e] = r, dir[e] = 1; \
|
||||||
|
for (i = e + 1; i < d; ++i) --path[i]->__head.size; \
|
||||||
|
r->__head.size = p->__head.size - 1; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
for (i = d - 1; i >= 0; --i) /* not sure why adding condition "path[i]->__head.s==p" doesn't work */ \
|
||||||
|
krmq_update_min_##suf(path[i], path[i]->__head.p[0], path[i]->__head.p[1]); \
|
||||||
|
while (--d > 0) { \
|
||||||
|
__type *q = path[d]; \
|
||||||
|
int which, other, b1 = 1, b2 = 2; \
|
||||||
|
which = dir[d], other = 1 - which; \
|
||||||
|
if (which) b1 = -b1, b2 = -b2; \
|
||||||
|
q->__head.balance += b1; \
|
||||||
|
if (q->__head.balance == b1) break; \
|
||||||
|
else if (q->__head.balance == b2) { \
|
||||||
|
__type *r = q->__head.p[other]; \
|
||||||
|
if (r->__head.balance == -b1) { \
|
||||||
|
path[d-1]->__head.p[dir[d-1]] = krmq_rotate2_##suf(q, which); \
|
||||||
|
} else { \
|
||||||
|
path[d-1]->__head.p[dir[d-1]] = krmq_rotate1_##suf(q, which); \
|
||||||
|
if (r->__head.balance == 0) { \
|
||||||
|
r->__head.balance = -b1; \
|
||||||
|
q->__head.balance = b1; \
|
||||||
|
break; \
|
||||||
|
} else r->__head.balance = q->__head.balance = 0; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
*root_ = fake.__head.p[0]; \
|
||||||
|
return p; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define krmq_free(__type, __head, __root, __free) do { \
|
||||||
|
__type *_p, *_q; \
|
||||||
|
for (_p = __root; _p; _p = _q) { \
|
||||||
|
if (_p->__head.p[0] == 0) { \
|
||||||
|
_q = _p->__head.p[1]; \
|
||||||
|
__free(_p); \
|
||||||
|
} else { \
|
||||||
|
_q = _p->__head.p[0]; \
|
||||||
|
_p->__head.p[0] = _q->__head.p[1]; \
|
||||||
|
_q->__head.p[1] = _p; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define __KRMQ_ITR(suf, __scope, __type, __head, __cmp) \
|
||||||
|
struct krmq_itr_##suf { \
|
||||||
|
const __type *stack[KRMQ_MAX_DEPTH], **top; \
|
||||||
|
}; \
|
||||||
|
__scope void krmq_itr_first_##suf(const __type *root, struct krmq_itr_##suf *itr) { \
|
||||||
|
const __type *p; \
|
||||||
|
for (itr->top = itr->stack - 1, p = root; p; p = p->__head.p[0]) \
|
||||||
|
*++itr->top = p; \
|
||||||
|
} \
|
||||||
|
__scope int krmq_itr_find_##suf(const __type *root, const __type *x, struct krmq_itr_##suf *itr) { \
|
||||||
|
const __type *p = root; \
|
||||||
|
itr->top = itr->stack - 1; \
|
||||||
|
while (p != 0) { \
|
||||||
|
int cmp; \
|
||||||
|
*++itr->top = p; \
|
||||||
|
cmp = __cmp(x, p); \
|
||||||
|
if (cmp < 0) p = p->__head.p[0]; \
|
||||||
|
else if (cmp > 0) p = p->__head.p[1]; \
|
||||||
|
else break; \
|
||||||
|
} \
|
||||||
|
return p? 1 : 0; \
|
||||||
|
} \
|
||||||
|
__scope int krmq_itr_next_bidir_##suf(struct krmq_itr_##suf *itr, int dir) { \
|
||||||
|
const __type *p; \
|
||||||
|
if (itr->top < itr->stack) return 0; \
|
||||||
|
dir = !!dir; \
|
||||||
|
p = (*itr->top)->__head.p[dir]; \
|
||||||
|
if (p) { /* go down */ \
|
||||||
|
for (; p; p = p->__head.p[!dir]) \
|
||||||
|
*++itr->top = p; \
|
||||||
|
return 1; \
|
||||||
|
} else { /* go up */ \
|
||||||
|
do { \
|
||||||
|
p = *itr->top--; \
|
||||||
|
} while (itr->top >= itr->stack && p == (*itr->top)->__head.p[dir]); \
|
||||||
|
return itr->top < itr->stack? 0 : 1; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Insert a node to the tree
|
||||||
|
*
|
||||||
|
* @param suf name suffix used in KRMQ_INIT()
|
||||||
|
* @param proot pointer to the root of the tree (in/out: root may change)
|
||||||
|
* @param x node to insert (in)
|
||||||
|
* @param cnt number of nodes smaller than or equal to _x_; can be NULL (out)
|
||||||
|
*
|
||||||
|
* @return _x_ if not present in the tree, or the node equal to x.
|
||||||
|
*/
|
||||||
|
#define krmq_insert(suf, proot, x, cnt) krmq_insert_##suf(proot, x, cnt)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find a node in the tree
|
||||||
|
*
|
||||||
|
* @param suf name suffix used in KRMQ_INIT()
|
||||||
|
* @param root root of the tree
|
||||||
|
* @param x node value to find (in)
|
||||||
|
* @param cnt number of nodes smaller than or equal to _x_; can be NULL (out)
|
||||||
|
*
|
||||||
|
* @return node equal to _x_ if present, or NULL if absent
|
||||||
|
*/
|
||||||
|
#define krmq_find(suf, root, x, cnt) krmq_find_##suf(root, x, cnt)
|
||||||
|
#define krmq_interval(suf, root, x, lower, upper) krmq_interval_##suf(root, x, lower, upper)
|
||||||
|
#define krmq_rmq(suf, root, lo, up) krmq_rmq_##suf(root, lo, up)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete a node from the tree
|
||||||
|
*
|
||||||
|
* @param suf name suffix used in KRMQ_INIT()
|
||||||
|
* @param proot pointer to the root of the tree (in/out: root may change)
|
||||||
|
* @param x node value to delete; if NULL, delete the first node (in)
|
||||||
|
*
|
||||||
|
* @return node removed from the tree if present, or NULL if absent
|
||||||
|
*/
|
||||||
|
#define krmq_erase(suf, proot, x, cnt) krmq_erase_##suf(proot, x, cnt)
|
||||||
|
#define krmq_erase_first(suf, proot) krmq_erase_##suf(proot, 0, 0)
|
||||||
|
|
||||||
|
#define krmq_itr_t(suf) struct krmq_itr_##suf
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Place the iterator at the smallest object
|
||||||
|
*
|
||||||
|
* @param suf name suffix used in KRMQ_INIT()
|
||||||
|
* @param root root of the tree
|
||||||
|
* @param itr iterator
|
||||||
|
*/
|
||||||
|
#define krmq_itr_first(suf, root, itr) krmq_itr_first_##suf(root, itr)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Place the iterator at the object equal to or greater than the query
|
||||||
|
*
|
||||||
|
* @param suf name suffix used in KRMQ_INIT()
|
||||||
|
* @param root root of the tree
|
||||||
|
* @param x query (in)
|
||||||
|
* @param itr iterator (out)
|
||||||
|
*
|
||||||
|
* @return 1 if find; 0 otherwise. krmq_at(itr) is NULL if and only if query is
|
||||||
|
* larger than all objects in the tree
|
||||||
|
*/
|
||||||
|
#define krmq_itr_find(suf, root, x, itr) krmq_itr_find_##suf(root, x, itr)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Move to the next object in order
|
||||||
|
*
|
||||||
|
* @param itr iterator (modified)
|
||||||
|
*
|
||||||
|
* @return 1 if there is a next object; 0 otherwise
|
||||||
|
*/
|
||||||
|
#define krmq_itr_next(suf, itr) krmq_itr_next_bidir_##suf(itr, 1)
|
||||||
|
#define krmq_itr_prev(suf, itr) krmq_itr_next_bidir_##suf(itr, 0)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the pointer at the iterator
|
||||||
|
*
|
||||||
|
* @param itr iterator
|
||||||
|
*
|
||||||
|
* @return pointer if present; NULL otherwise
|
||||||
|
*/
|
||||||
|
#define krmq_at(itr) ((itr)->top < (itr)->stack? 0 : *(itr)->top)
|
||||||
|
|
||||||
|
#define KRMQ_INIT2(suf, __scope, __type, __head, __cmp, __lt2) \
|
||||||
|
__KRMQ_FIND(suf, __scope, __type, __head, __cmp) \
|
||||||
|
__KRMQ_RMQ(suf, __scope, __type, __head, __cmp, __lt2) \
|
||||||
|
__KRMQ_ROTATE(suf, __type, __head, __lt2) \
|
||||||
|
__KRMQ_INSERT(suf, __scope, __type, __head, __cmp, __lt2) \
|
||||||
|
__KRMQ_ERASE(suf, __scope, __type, __head, __cmp, __lt2) \
|
||||||
|
__KRMQ_ITR(suf, __scope, __type, __head, __cmp)
|
||||||
|
|
||||||
|
#define KRMQ_INIT(suf, __type, __head, __cmp, __lt2) \
|
||||||
|
KRMQ_INIT2(suf,, __type, __head, __cmp, __lt2)
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,54 @@
|
||||||
|
#ifndef KRNG_H
|
||||||
|
#define KRNG_H
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
uint64_t s[2];
|
||||||
|
} krng_t;
|
||||||
|
|
||||||
|
static inline uint64_t kr_splitmix64(uint64_t x)
|
||||||
|
{
|
||||||
|
uint64_t z = (x += 0x9E3779B97F4A7C15ULL);
|
||||||
|
z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ULL;
|
||||||
|
z = (z ^ (z >> 27)) * 0x94D049BB133111EBULL;
|
||||||
|
return z ^ (z >> 31);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint64_t kr_rand_r(krng_t *r)
|
||||||
|
{
|
||||||
|
const uint64_t s0 = r->s[0];
|
||||||
|
uint64_t s1 = r->s[1];
|
||||||
|
const uint64_t result = s0 + s1;
|
||||||
|
s1 ^= s0;
|
||||||
|
r->s[0] = (s0 << 55 | s0 >> 9) ^ s1 ^ (s1 << 14);
|
||||||
|
r->s[1] = s0 << 36 | s0 >> 28;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void kr_jump_r(krng_t *r)
|
||||||
|
{
|
||||||
|
static const uint64_t JUMP[] = { 0xbeac0467eba5facbULL, 0xd86b048b86aa9922ULL };
|
||||||
|
uint64_t s0 = 0, s1 = 0;
|
||||||
|
int i, b;
|
||||||
|
for (i = 0; i < 2; ++i)
|
||||||
|
for (b = 0; b < 64; b++) {
|
||||||
|
if (JUMP[i] & 1ULL << b)
|
||||||
|
s0 ^= r->s[0], s1 ^= r->s[1];
|
||||||
|
kr_rand_r(r);
|
||||||
|
}
|
||||||
|
r->s[0] = s0, r->s[1] = s1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void kr_srand_r(krng_t *r, uint64_t seed)
|
||||||
|
{
|
||||||
|
r->s[0] = kr_splitmix64(seed);
|
||||||
|
r->s[1] = kr_splitmix64(r->s[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline double kr_drand_r(krng_t *r)
|
||||||
|
{
|
||||||
|
union { uint64_t i; double d; } u;
|
||||||
|
u.i = 0x3FFULL << 52 | kr_rand_r(r) >> 12;
|
||||||
|
return u.d - 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,242 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2008 Yuta Mori All Rights Reserved.
|
||||||
|
* 2011 Attractive Chaos <attractor@live.co.uk>
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* This is a library for constructing the suffix array for a string containing
|
||||||
|
* multiple sentinels with sentinels all represented by 0. The last symbol in
|
||||||
|
* the string must be a sentinel. The library is modified from an early version
|
||||||
|
* of Yuta Mori's SAIS library, but is slower than the lastest SAIS by about
|
||||||
|
* 30%, partly due to the recent optimization Yuta has applied and partly due
|
||||||
|
* to the extra comparisons between sentinels. This is not the first effort in
|
||||||
|
* supporting multi-sentinel strings, but is probably the easiest to use. */
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#ifdef _KSA64
|
||||||
|
#include <stdint.h>
|
||||||
|
typedef int64_t saint_t;
|
||||||
|
#define SAINT_MAX INT64_MAX
|
||||||
|
#define SAIS_CORE ksa_core64
|
||||||
|
#define SAIS_BWT ksa_bwt64
|
||||||
|
#define SAIS_MAIN ksa_sa64
|
||||||
|
#else
|
||||||
|
#include <limits.h>
|
||||||
|
typedef int saint_t;
|
||||||
|
#define SAINT_MAX INT_MAX
|
||||||
|
#define SAIS_CORE ksa_core
|
||||||
|
#define SAIS_BWT ksa_bwt
|
||||||
|
#define SAIS_MAIN ksa_sa
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* T is of type "const unsigned char*". If T[i] is a sentinel, chr(i) takes a negative value */
|
||||||
|
#define chr(i) (cs == sizeof(saint_t) ? ((const saint_t *)T)[i] : (T[i]? (saint_t)T[i] : i - SAINT_MAX))
|
||||||
|
|
||||||
|
/** Count the occurrences of each symbol */
|
||||||
|
static void getCounts(const unsigned char *T, saint_t *C, saint_t n, saint_t k, int cs)
|
||||||
|
{
|
||||||
|
saint_t i;
|
||||||
|
for (i = 0; i < k; ++i) C[i] = 0;
|
||||||
|
for (i = 0; i < n; ++i) {
|
||||||
|
saint_t c = chr(i);
|
||||||
|
++C[c > 0? c : 0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find the end of each bucket
|
||||||
|
*
|
||||||
|
* @param C occurrences computed by getCounts(); input
|
||||||
|
* @param B start/end of each bucket; output
|
||||||
|
* @param k size of alphabet
|
||||||
|
* @param end compute the end of bucket if true; otherwise compute the end
|
||||||
|
*/
|
||||||
|
static inline void getBuckets(const saint_t *C, saint_t *B, saint_t k, saint_t end)
|
||||||
|
{
|
||||||
|
saint_t i, sum = 0;
|
||||||
|
if (end) for (i = 0; i < k; ++i) sum += C[i], B[i] = sum;
|
||||||
|
else for (i = 0; i < k; ++i) sum += C[i], B[i] = sum - C[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Induced sort */
|
||||||
|
static void induceSA(const unsigned char *T, saint_t *SA, saint_t *C, saint_t *B, saint_t n, saint_t k, saint_t cs)
|
||||||
|
{
|
||||||
|
saint_t *b, i, j;
|
||||||
|
saint_t c0, c1;
|
||||||
|
/* left-to-right induced sort (for L-type) */
|
||||||
|
if (C == B) getCounts(T, C, n, k, cs);
|
||||||
|
getBuckets(C, B, k, 0); /* find starts of buckets */
|
||||||
|
for (i = 0, b = 0, c1 = -1; i < n; ++i) {
|
||||||
|
j = SA[i], SA[i] = ~j;
|
||||||
|
if (0 < j) { /* >0 if j-1 is L-type; <0 if S-type; ==0 undefined */
|
||||||
|
--j;
|
||||||
|
if ((c0 = chr(j)) != c1) {
|
||||||
|
B[c1 > 0? c1 : 0] = b - SA;
|
||||||
|
c1 = c0;
|
||||||
|
b = SA + B[c1 > 0? c1 : 0];
|
||||||
|
}
|
||||||
|
*b++ = (0 < j && chr(j - 1) < c1) ? ~j : j;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* right-to-left induced sort (for S-type) */
|
||||||
|
if (C == B) getCounts(T, C, n, k, cs);
|
||||||
|
getBuckets(C, B, k, 1); /* find ends of buckets */
|
||||||
|
for (i = n - 1, b = 0, c1 = -1; 0 <= i; --i) {
|
||||||
|
if (0 < (j = SA[i])) { /* the prefix is S-type */
|
||||||
|
--j;
|
||||||
|
if ((c0 = chr(j)) != c1) {
|
||||||
|
B[c1 > 0? c1 : 0] = b - SA;
|
||||||
|
c1 = c0;
|
||||||
|
b = SA + B[c1 > 0? c1 : 0];
|
||||||
|
}
|
||||||
|
if (c0 > 0) *--b = (j == 0 || chr(j - 1) > c1) ? ~j : j;
|
||||||
|
} else SA[i] = ~j; /* if L-type, change the sign */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Recursively construct the suffix array for a string containing multiple
|
||||||
|
* sentinels. NULL is taken as the sentinel.
|
||||||
|
*
|
||||||
|
* @param T NULL terminated input string (there can be multiple NULLs)
|
||||||
|
* @param SA output suffix array
|
||||||
|
* @param fs working space available in SA (typically 0 when first called)
|
||||||
|
* @param n length of T, including the trailing NULL
|
||||||
|
* @param k size of the alphabet (typically 256 when first called)
|
||||||
|
* @param cs # bytes per element in T; 1 or sizeof(saint_t) (typically 1 when first called)
|
||||||
|
*
|
||||||
|
* @return 0 upon success
|
||||||
|
*/
|
||||||
|
int SAIS_CORE(const unsigned char *T, saint_t *SA, saint_t fs, saint_t n, saint_t k, int cs)
|
||||||
|
{
|
||||||
|
saint_t *C, *B;
|
||||||
|
saint_t i, j, c, m, q, qlen, name;
|
||||||
|
saint_t c0, c1;
|
||||||
|
|
||||||
|
/* STAGE I: reduce the problem by at least 1/2 sort all the S-substrings */
|
||||||
|
if (k <= fs) C = SA + n, B = (k <= fs - k) ? C + k : C;
|
||||||
|
else {
|
||||||
|
if ((C = (saint_t*)malloc(k * (1 + (cs == 1)) * sizeof(saint_t))) == NULL) return -2;
|
||||||
|
B = cs == 1? C + k : C;
|
||||||
|
}
|
||||||
|
getCounts(T, C, n, k, cs);
|
||||||
|
getBuckets(C, B, k, 1); /* find ends of buckets */
|
||||||
|
for (i = 0; i < n; ++i) SA[i] = 0;
|
||||||
|
/* mark L and S (the t array in Nong et al.), and keep the positions of LMS in the buckets */
|
||||||
|
for (i = n - 2, c = 1, c1 = chr(n - 1); 0 <= i; --i, c1 = c0) {
|
||||||
|
if ((c0 = chr(i)) < c1 + c) c = 1; /* c1 = chr(i+1); c==1 if in an S run */
|
||||||
|
else if (c) SA[--B[c1 > 0? c1 : 0]] = i + 1, c = 0;
|
||||||
|
}
|
||||||
|
induceSA(T, SA, C, B, n, k, cs);
|
||||||
|
if (fs < k) free(C);
|
||||||
|
/* pack all the sorted LMS into the first m items of SA
|
||||||
|
2*m must be not larger than n (see Nong et al. for the proof) */
|
||||||
|
for (i = 0, m = 0; i < n; ++i) {
|
||||||
|
saint_t p = SA[i];
|
||||||
|
if (p == n - 1) SA[m++] = p;
|
||||||
|
else if (0 < p && chr(p - 1) > (c0 = chr(p))) {
|
||||||
|
for (j = p + 1; j < n && c0 == (c1 = chr(j)); ++j);
|
||||||
|
if (j < n && c0 < c1) SA[m++] = p;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (i = m; i < n; ++i) SA[i] = 0; /* init the name array buffer */
|
||||||
|
/* store the length of all substrings */
|
||||||
|
for (i = n - 2, j = n, c = 1, c1 = chr(n - 1); 0 <= i; --i, c1 = c0) {
|
||||||
|
if ((c0 = chr(i)) < c1 + c) c = 1; /* c1 = chr(i+1) */
|
||||||
|
else if (c) SA[m + ((i + 1) >> 1)] = j - i - 1, j = i + 1, c = 0;
|
||||||
|
}
|
||||||
|
/* find the lexicographic names of all substrings */
|
||||||
|
for (i = 0, name = 0, q = n, qlen = 0; i < m; ++i) {
|
||||||
|
saint_t p = SA[i], plen = SA[m + (p >> 1)], diff = 1;
|
||||||
|
if (plen == qlen) {
|
||||||
|
for (j = 0; j < plen && chr(p + j) == chr(q + j); j++);
|
||||||
|
if (j == plen) diff = 0;
|
||||||
|
}
|
||||||
|
if (diff) ++name, q = p, qlen = plen;
|
||||||
|
SA[m + (p >> 1)] = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* STAGE II: solve the reduced problem; recurse if names are not yet unique */
|
||||||
|
if (name < m) {
|
||||||
|
saint_t *RA = SA + n + fs - m - 1;
|
||||||
|
for (i = n - 1, j = m - 1; m <= i; --i)
|
||||||
|
if (SA[i] != 0) RA[j--] = SA[i];
|
||||||
|
RA[m] = 0; // add a sentinel; in the resulting SA, SA[0]==m always stands
|
||||||
|
if (SAIS_CORE((unsigned char *)RA, SA, fs + n - m * 2 - 2, m + 1, name + 1, sizeof(saint_t)) != 0) return -2;
|
||||||
|
for (i = n - 2, j = m - 1, c = 1, c1 = chr(n - 1); 0 <= i; --i, c1 = c0) {
|
||||||
|
if ((c0 = chr(i)) < c1 + c) c = 1;
|
||||||
|
else if (c) RA[j--] = i + 1, c = 0; /* get p1 */
|
||||||
|
}
|
||||||
|
for (i = 0; i < m; ++i) SA[i] = RA[SA[i+1]]; /* get index */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* STAGE III: induce the result for the original problem */
|
||||||
|
if (k <= fs) C = SA + n, B = (k <= fs - k) ? C + k : C;
|
||||||
|
else {
|
||||||
|
if ((C = (saint_t*)malloc(k * (1 + (cs == 1)) * sizeof(saint_t))) == NULL) return -2;
|
||||||
|
B = cs == 1? C + k : C;
|
||||||
|
}
|
||||||
|
/* put all LMS characters into their buckets */
|
||||||
|
getCounts(T, C, n, k, cs);
|
||||||
|
getBuckets(C, B, k, 1); /* find ends of buckets */
|
||||||
|
for (i = m; i < n; ++i) SA[i] = 0; /* init SA[m..n-1] */
|
||||||
|
for (i = m - 1; 0 <= i; --i) {
|
||||||
|
j = SA[i], SA[i] = 0;
|
||||||
|
c = chr(j);
|
||||||
|
SA[--B[c > 0? c : 0]] = j;
|
||||||
|
}
|
||||||
|
induceSA(T, SA, C, B, n, k, cs);
|
||||||
|
if (fs < k) free(C);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct the suffix array for a NULL terminated string possibly containing
|
||||||
|
* multiple sentinels (NULLs).
|
||||||
|
*
|
||||||
|
* @param T[0..n-1] NULL terminated input string
|
||||||
|
* @param SA[0..n-1] output suffix array
|
||||||
|
* @param n length of the given string, including NULL
|
||||||
|
* @param k size of the alphabet including the sentinel; no more than 256
|
||||||
|
* @return 0 upon success
|
||||||
|
*/
|
||||||
|
int SAIS_MAIN(const unsigned char *T, saint_t *SA, saint_t n, int k)
|
||||||
|
{
|
||||||
|
if (T == NULL || SA == NULL || T[n - 1] != '\0' || n <= 0) return -1;
|
||||||
|
if (k < 0 || k > 256) k = 256;
|
||||||
|
return SAIS_CORE(T, SA, 0, n, (saint_t)k, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
int SAIS_BWT(unsigned char *T, saint_t n, int k)
|
||||||
|
{
|
||||||
|
saint_t *SA, i;
|
||||||
|
int ret;
|
||||||
|
if ((SA = malloc(n * sizeof(saint_t))) == 0) return -1;
|
||||||
|
if ((ret = SAIS_MAIN(T, SA, n, k)) != 0) return ret;
|
||||||
|
for (i = 0; i < n; ++i)
|
||||||
|
if (SA[i]) SA[i] = T[SA[i] - 1]; // if SA[i]==0, SA[i]=0
|
||||||
|
for (i = 0; i < n; ++i) T[i] = SA[i];
|
||||||
|
free(SA);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,242 @@
|
||||||
|
/* The MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2008, 2009, 2011 Attractive Chaos <attractor@live.co.uk>
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of this software and associated documentation files (the
|
||||||
|
"Software"), to deal in the Software without restriction, including
|
||||||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be
|
||||||
|
included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||||
|
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||||
|
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Last Modified: 05MAR2012 */
|
||||||
|
|
||||||
|
#ifndef AC_KSEQ_H
|
||||||
|
#define AC_KSEQ_H
|
||||||
|
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
|
||||||
|
#define KS_SEP_TAB 1 // isspace() && !' '
|
||||||
|
#define KS_SEP_LINE 2 // line separator: "\n" (Unix) or "\r\n" (Windows)
|
||||||
|
#define KS_SEP_MAX 2
|
||||||
|
|
||||||
|
#define __KS_TYPE(type_t) \
|
||||||
|
typedef struct __kstream_t { \
|
||||||
|
unsigned char *buf; \
|
||||||
|
int begin, end, is_eof; \
|
||||||
|
type_t f; \
|
||||||
|
} kstream_t;
|
||||||
|
|
||||||
|
#define ks_err(ks) ((ks)->end == -1)
|
||||||
|
#define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
|
||||||
|
#define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
|
||||||
|
|
||||||
|
#define __KS_BASIC(type_t, __bufsize) \
|
||||||
|
static inline kstream_t *ks_init(type_t f) \
|
||||||
|
{ \
|
||||||
|
kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
|
||||||
|
ks->f = f; \
|
||||||
|
ks->buf = (unsigned char*)malloc(__bufsize); \
|
||||||
|
return ks; \
|
||||||
|
} \
|
||||||
|
static inline void ks_destroy(kstream_t *ks) \
|
||||||
|
{ \
|
||||||
|
if (ks) { \
|
||||||
|
free(ks->buf); \
|
||||||
|
free(ks); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define __KS_GETC(__read, __bufsize) \
|
||||||
|
static inline int ks_getc(kstream_t *ks) \
|
||||||
|
{ \
|
||||||
|
if (ks_err(ks)) return -3; \
|
||||||
|
if (ks->is_eof && ks->begin >= ks->end) return -1; \
|
||||||
|
if (ks->begin >= ks->end) { \
|
||||||
|
ks->begin = 0; \
|
||||||
|
ks->end = __read(ks->f, ks->buf, __bufsize); \
|
||||||
|
if (ks->end == 0) { ks->is_eof = 1; return -1;} \
|
||||||
|
if (ks->end == -1) { ks->is_eof = 1; return -3;}\
|
||||||
|
} \
|
||||||
|
return (int)ks->buf[ks->begin++]; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef KSTRING_T
|
||||||
|
#define KSTRING_T kstring_t
|
||||||
|
typedef struct __kstring_t {
|
||||||
|
size_t l, m;
|
||||||
|
char *s;
|
||||||
|
} kstring_t;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef kroundup32
|
||||||
|
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define __KS_GETUNTIL(__read, __bufsize) \
|
||||||
|
static int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append) \
|
||||||
|
{ \
|
||||||
|
int gotany = 0; \
|
||||||
|
if (dret) *dret = 0; \
|
||||||
|
str->l = append? str->l : 0; \
|
||||||
|
for (;;) { \
|
||||||
|
int i; \
|
||||||
|
if (ks_err(ks)) return -3; \
|
||||||
|
if (ks->begin >= ks->end) { \
|
||||||
|
if (!ks->is_eof) { \
|
||||||
|
ks->begin = 0; \
|
||||||
|
ks->end = __read(ks->f, ks->buf, __bufsize); \
|
||||||
|
if (ks->end == 0) { ks->is_eof = 1; break; } \
|
||||||
|
if (ks->end == -1) { ks->is_eof = 1; return -3; } \
|
||||||
|
} else break; \
|
||||||
|
} \
|
||||||
|
if (delimiter == KS_SEP_LINE) { \
|
||||||
|
unsigned char *sep = memchr(ks->buf + ks->begin, '\n', ks->end - ks->begin); \
|
||||||
|
i = sep != NULL ? sep - ks->buf : ks->end; \
|
||||||
|
} else if (delimiter > KS_SEP_MAX) { \
|
||||||
|
unsigned char *sep = memchr(ks->buf + ks->begin, delimiter, ks->end - ks->begin); \
|
||||||
|
i = sep != NULL ? sep - ks->buf : ks->end; \
|
||||||
|
} else if (delimiter == KS_SEP_SPACE) { \
|
||||||
|
for (i = ks->begin; i < ks->end; ++i) \
|
||||||
|
if (isspace(ks->buf[i])) break; \
|
||||||
|
} else if (delimiter == KS_SEP_TAB) { \
|
||||||
|
for (i = ks->begin; i < ks->end; ++i) \
|
||||||
|
if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
|
||||||
|
} else i = 0; /* never come to here! */ \
|
||||||
|
if (str->m - str->l < (size_t)(i - ks->begin + 1)) { \
|
||||||
|
str->m = str->l + (i - ks->begin) + 1; \
|
||||||
|
kroundup32(str->m); \
|
||||||
|
str->s = (char*)realloc(str->s, str->m); \
|
||||||
|
} \
|
||||||
|
gotany = 1; \
|
||||||
|
memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
|
||||||
|
str->l = str->l + (i - ks->begin); \
|
||||||
|
ks->begin = i + 1; \
|
||||||
|
if (i < ks->end) { \
|
||||||
|
if (dret) *dret = ks->buf[i]; \
|
||||||
|
break; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
if (!gotany && ks_eof(ks)) return -1; \
|
||||||
|
if (str->s == 0) { \
|
||||||
|
str->m = 1; \
|
||||||
|
str->s = (char*)calloc(1, 1); \
|
||||||
|
} else if (delimiter == KS_SEP_LINE && str->l > 1 && str->s[str->l-1] == '\r') --str->l; \
|
||||||
|
str->s[str->l] = '\0'; \
|
||||||
|
return str->l; \
|
||||||
|
} \
|
||||||
|
static inline int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
|
||||||
|
{ return ks_getuntil2(ks, delimiter, str, dret, 0); }
|
||||||
|
|
||||||
|
#define KSTREAM_INIT(type_t, __read, __bufsize) \
|
||||||
|
__KS_TYPE(type_t) \
|
||||||
|
__KS_BASIC(type_t, __bufsize) \
|
||||||
|
__KS_GETC(__read, __bufsize) \
|
||||||
|
__KS_GETUNTIL(__read, __bufsize)
|
||||||
|
|
||||||
|
#define kseq_rewind(ks) ((ks)->last_char = (ks)->f->is_eof = (ks)->f->begin = (ks)->f->end = 0)
|
||||||
|
|
||||||
|
#define __KSEQ_BASIC(SCOPE, type_t) \
|
||||||
|
SCOPE kseq_t *kseq_init(type_t fd) \
|
||||||
|
{ \
|
||||||
|
kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
|
||||||
|
s->f = ks_init(fd); \
|
||||||
|
return s; \
|
||||||
|
} \
|
||||||
|
SCOPE void kseq_destroy(kseq_t *ks) \
|
||||||
|
{ \
|
||||||
|
if (!ks) return; \
|
||||||
|
free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \
|
||||||
|
ks_destroy(ks->f); \
|
||||||
|
free(ks); \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Return value:
|
||||||
|
>=0 length of the sequence (normal)
|
||||||
|
-1 end-of-file
|
||||||
|
-2 truncated quality string
|
||||||
|
-3 error reading stream
|
||||||
|
*/
|
||||||
|
#define __KSEQ_READ(SCOPE) \
|
||||||
|
SCOPE int kseq_read(kseq_t *seq) \
|
||||||
|
{ \
|
||||||
|
int c,r; \
|
||||||
|
kstream_t *ks = seq->f; \
|
||||||
|
if (seq->last_char == 0) { /* then jump to the next header line */ \
|
||||||
|
while ((c = ks_getc(ks)) >= 0 && c != '>' && c != '@'); \
|
||||||
|
if (c < 0) return c; /* end of file or error*/ \
|
||||||
|
seq->last_char = c; \
|
||||||
|
} /* else: the first header char has been read in the previous call */ \
|
||||||
|
seq->comment.l = seq->seq.l = seq->qual.l = 0; /* reset all members */ \
|
||||||
|
if ((r=ks_getuntil(ks, 0, &seq->name, &c)) < 0) return r; /* normal exit: EOF or error */ \
|
||||||
|
if (c != '\n') ks_getuntil(ks, KS_SEP_LINE, &seq->comment, 0); /* read FASTA/Q comment */ \
|
||||||
|
if (seq->seq.s == 0) { /* we can do this in the loop below, but that is slower */ \
|
||||||
|
seq->seq.m = 256; \
|
||||||
|
seq->seq.s = (char*)malloc(seq->seq.m); \
|
||||||
|
} \
|
||||||
|
while ((c = ks_getc(ks)) >= 0 && c != '>' && c != '+' && c != '@') { \
|
||||||
|
if (c == '\n') continue; /* skip empty lines */ \
|
||||||
|
seq->seq.s[seq->seq.l++] = c; /* this is safe: we always have enough space for 1 char */ \
|
||||||
|
ks_getuntil2(ks, KS_SEP_LINE, &seq->seq, 0, 1); /* read the rest of the line */ \
|
||||||
|
} \
|
||||||
|
if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
|
||||||
|
if (seq->seq.l + 1 >= seq->seq.m) { /* seq->seq.s[seq->seq.l] below may be out of boundary */ \
|
||||||
|
seq->seq.m = seq->seq.l + 2; \
|
||||||
|
kroundup32(seq->seq.m); /* rounded to the next closest 2^k */ \
|
||||||
|
seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
|
||||||
|
} \
|
||||||
|
seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \
|
||||||
|
if (c != '+') return seq->seq.l; /* FASTA */ \
|
||||||
|
if (seq->qual.m < seq->seq.m) { /* allocate memory for qual in case insufficient */ \
|
||||||
|
seq->qual.m = seq->seq.m; \
|
||||||
|
seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \
|
||||||
|
} \
|
||||||
|
while ((c = ks_getc(ks)) >= 0 && c != '\n'); /* skip the rest of '+' line */ \
|
||||||
|
if (c == -1) return -2; /* error: no quality string */ \
|
||||||
|
while ((c = ks_getuntil2(ks, KS_SEP_LINE, &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l)); \
|
||||||
|
if (c == -3) return -3; /* stream error */ \
|
||||||
|
seq->last_char = 0; /* we have not come to the next header line */ \
|
||||||
|
if (seq->seq.l != seq->qual.l) return -2; /* error: qual string is of a different length */ \
|
||||||
|
return seq->seq.l; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define __KSEQ_TYPE(type_t) \
|
||||||
|
typedef struct { \
|
||||||
|
kstring_t name, comment, seq, qual; \
|
||||||
|
int last_char; \
|
||||||
|
kstream_t *f; \
|
||||||
|
} kseq_t;
|
||||||
|
|
||||||
|
#define KSEQ_INIT2(SCOPE, type_t, __read) \
|
||||||
|
KSTREAM_INIT(type_t, __read, 16384) \
|
||||||
|
__KSEQ_TYPE(type_t) \
|
||||||
|
__KSEQ_BASIC(SCOPE, type_t) \
|
||||||
|
__KSEQ_READ(SCOPE)
|
||||||
|
|
||||||
|
#define KSEQ_INIT(type_t, __read) KSEQ_INIT2(static, type_t, __read)
|
||||||
|
|
||||||
|
#define KSEQ_DECLARE(type_t) \
|
||||||
|
__KS_TYPE(type_t) \
|
||||||
|
__KSEQ_TYPE(type_t) \
|
||||||
|
extern kseq_t *kseq_init(type_t fd); \
|
||||||
|
void kseq_destroy(kseq_t *ks); \
|
||||||
|
int kseq_read(kseq_t *seq);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,253 @@
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "kson.h"
|
||||||
|
|
||||||
|
/*************
|
||||||
|
*** Parse ***
|
||||||
|
*************/
|
||||||
|
|
||||||
|
kson_node_t *kson_parse_core(const char *json, long *_n, int *error, long *parsed_len)
|
||||||
|
{
|
||||||
|
long *stack = 0, top = 0, max = 0, n_a = 0, m_a = 0, i, j;
|
||||||
|
kson_node_t *a = 0, *u;
|
||||||
|
const char *p, *q;
|
||||||
|
size_t *tmp;
|
||||||
|
|
||||||
|
#define __push_back(y) do { \
|
||||||
|
if (top == max) { \
|
||||||
|
max = max? max<<1 : 4; \
|
||||||
|
stack = (long*)realloc(stack, sizeof(long) * max); \
|
||||||
|
} \
|
||||||
|
stack[top++] = (y); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define __new_node(z) do { \
|
||||||
|
if (n_a == m_a) { \
|
||||||
|
long old_m = m_a; \
|
||||||
|
m_a = m_a? m_a<<1 : 4; \
|
||||||
|
a = (kson_node_t*)realloc(a, sizeof(kson_node_t) * m_a); \
|
||||||
|
memset(a + old_m, 0, sizeof(kson_node_t) * (m_a - old_m)); \
|
||||||
|
} \
|
||||||
|
*(z) = &a[n_a++]; \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
assert(sizeof(size_t) == sizeof(kson_node_t*));
|
||||||
|
*error = KSON_OK;
|
||||||
|
for (p = json; *p; ++p) {
|
||||||
|
while (*p && isspace(*p)) ++p;
|
||||||
|
if (*p == 0) break;
|
||||||
|
if (*p == ',') { // comma is somewhat redundant
|
||||||
|
} else if (*p == '[' || *p == '{') {
|
||||||
|
int t = *p == '['? -1 : -2;
|
||||||
|
if (top < 2 || stack[top-1] != -3) { // unnamed internal node
|
||||||
|
__push_back(n_a);
|
||||||
|
__new_node(&u);
|
||||||
|
__push_back(t);
|
||||||
|
} else stack[top-1] = t; // named internal node
|
||||||
|
} else if (*p == ']' || *p == '}') {
|
||||||
|
long i, start, t = *p == ']'? -1 : -2;
|
||||||
|
for (i = top - 1; i >= 0 && stack[i] != t; --i);
|
||||||
|
if (i < 0) { // error: an extra right bracket
|
||||||
|
*error = KSON_ERR_EXTRA_RIGHT;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
start = i;
|
||||||
|
u = &a[stack[start-1]];
|
||||||
|
u->key = u->v.str;
|
||||||
|
u->n = top - 1 - start;
|
||||||
|
u->v.child = (kson_node_t**)malloc(u->n * sizeof(kson_node_t*));
|
||||||
|
tmp = (size_t*)u->v.child;
|
||||||
|
for (i = start + 1; i < top; ++i)
|
||||||
|
tmp[i - start - 1] = stack[i];
|
||||||
|
u->type = *p == ']'? KSON_TYPE_BRACKET : KSON_TYPE_BRACE;
|
||||||
|
if ((top = start) == 1) break; // completed one object; remaining characters discarded
|
||||||
|
} else if (*p == ':') {
|
||||||
|
if (top == 0 || stack[top-1] == -3) {
|
||||||
|
*error = KSON_ERR_NO_KEY;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
__push_back(-3);
|
||||||
|
} else {
|
||||||
|
int c = *p;
|
||||||
|
// get the node to modify
|
||||||
|
if (top >= 2 && stack[top-1] == -3) { // we have a key:value pair here
|
||||||
|
--top;
|
||||||
|
u = &a[stack[top-1]];
|
||||||
|
u->key = u->v.str; // move old value to key
|
||||||
|
} else { // don't know if this is a bare value or a key:value pair; keep it as a value for now
|
||||||
|
__push_back(n_a);
|
||||||
|
__new_node(&u);
|
||||||
|
}
|
||||||
|
// parse string
|
||||||
|
if (c == '\'' || c == '"') {
|
||||||
|
for (q = ++p; *q && *q != c; ++q)
|
||||||
|
if (*q == '\\') ++q;
|
||||||
|
} else {
|
||||||
|
for (q = p; *q && *q != ']' && *q != '}' && *q != ',' && *q != ':' && *q != '\n'; ++q)
|
||||||
|
if (*q == '\\') ++q;
|
||||||
|
}
|
||||||
|
u->v.str = (char*)malloc(q - p + 1); strncpy(u->v.str, p, q - p); u->v.str[q-p] = 0; // equivalent to u->v.str=strndup(p, q-p)
|
||||||
|
u->type = c == '\''? KSON_TYPE_SGL_QUOTE : c == '"'? KSON_TYPE_DBL_QUOTE : KSON_TYPE_NO_QUOTE;
|
||||||
|
p = c == '\'' || c == '"'? q : q - 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while (*p && isspace(*p)) ++p; // skip trailing blanks
|
||||||
|
if (parsed_len) *parsed_len = p - json;
|
||||||
|
if (top != 1) *error = KSON_ERR_EXTRA_LEFT;
|
||||||
|
|
||||||
|
for (i = 0; i < n_a; ++i)
|
||||||
|
for (j = 0, u = &a[i], tmp = (size_t*)u->v.child; j < (long)u->n; ++j)
|
||||||
|
u->v.child[j] = &a[tmp[j]];
|
||||||
|
|
||||||
|
free(stack);
|
||||||
|
*_n = n_a;
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
void kson_destroy(kson_t *kson)
|
||||||
|
{
|
||||||
|
long i;
|
||||||
|
if (kson == 0) return;
|
||||||
|
for (i = 0; i < kson->n_nodes; ++i) {
|
||||||
|
free(kson->root[i].key); free(kson->root[i].v.str);
|
||||||
|
}
|
||||||
|
free(kson->root); free(kson);
|
||||||
|
}
|
||||||
|
|
||||||
|
kson_t *kson_parse(const char *json)
|
||||||
|
{
|
||||||
|
kson_t *kson;
|
||||||
|
int error;
|
||||||
|
kson = (kson_t*)calloc(1, sizeof(kson_t));
|
||||||
|
kson->root = kson_parse_core(json, &kson->n_nodes, &error, 0);
|
||||||
|
if (error) {
|
||||||
|
kson_destroy(kson);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return kson;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*************
|
||||||
|
*** Query ***
|
||||||
|
*************/
|
||||||
|
|
||||||
|
const kson_node_t *kson_by_path(const kson_node_t *p, int depth, ...)
|
||||||
|
{
|
||||||
|
va_list ap;
|
||||||
|
va_start(ap, depth);
|
||||||
|
while (p && depth > 0) {
|
||||||
|
if (p->type == KSON_TYPE_BRACE) {
|
||||||
|
p = kson_by_key(p, va_arg(ap, const char*));
|
||||||
|
} else if (p->type == KSON_TYPE_BRACKET) {
|
||||||
|
p = kson_by_index(p, va_arg(ap, long));
|
||||||
|
} else break;
|
||||||
|
--depth;
|
||||||
|
}
|
||||||
|
va_end(ap);
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**************
|
||||||
|
*** Fromat ***
|
||||||
|
**************/
|
||||||
|
|
||||||
|
void kson_format_recur(const kson_node_t *p, int depth)
|
||||||
|
{
|
||||||
|
long i;
|
||||||
|
if (p->key) printf("\"%s\":", p->key);
|
||||||
|
if (p->type == KSON_TYPE_BRACKET || p->type == KSON_TYPE_BRACE) {
|
||||||
|
putchar(p->type == KSON_TYPE_BRACKET? '[' : '{');
|
||||||
|
if (p->n) {
|
||||||
|
putchar('\n'); for (i = 0; i <= depth; ++i) fputs(" ", stdout);
|
||||||
|
for (i = 0; i < (long)p->n; ++i) {
|
||||||
|
if (i) {
|
||||||
|
int i;
|
||||||
|
putchar(',');
|
||||||
|
putchar('\n'); for (i = 0; i <= depth; ++i) fputs(" ", stdout);
|
||||||
|
}
|
||||||
|
kson_format_recur(p->v.child[i], depth + 1);
|
||||||
|
}
|
||||||
|
putchar('\n'); for (i = 0; i < depth; ++i) fputs(" ", stdout);
|
||||||
|
}
|
||||||
|
putchar(p->type == KSON_TYPE_BRACKET? ']' : '}');
|
||||||
|
} else {
|
||||||
|
if (p->type != KSON_TYPE_NO_QUOTE)
|
||||||
|
putchar(p->type == KSON_TYPE_SGL_QUOTE? '\'' : '"');
|
||||||
|
fputs(p->v.str, stdout);
|
||||||
|
if (p->type != KSON_TYPE_NO_QUOTE)
|
||||||
|
putchar(p->type == KSON_TYPE_SGL_QUOTE? '\'' : '"');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void kson_format(const kson_node_t *root)
|
||||||
|
{
|
||||||
|
kson_format_recur(root, 0);
|
||||||
|
putchar('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
/*********************
|
||||||
|
*** Main function ***
|
||||||
|
*********************/
|
||||||
|
|
||||||
|
#ifdef KSON_MAIN
|
||||||
|
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
kson_t *kson = 0;
|
||||||
|
if (argc > 1) {
|
||||||
|
FILE *fp;
|
||||||
|
int len = 0, max = 0, tmp, i;
|
||||||
|
char *json = 0, buf[0x10000];
|
||||||
|
if ((fp = fopen(argv[1], "rb")) != 0) {
|
||||||
|
// read the entire file into a string
|
||||||
|
while ((tmp = fread(buf, 1, 0x10000, fp)) != 0) {
|
||||||
|
if (len + tmp + 1 > max) {
|
||||||
|
max = len + tmp + 1;
|
||||||
|
kroundup32(max);
|
||||||
|
json = (char*)realloc(json, max);
|
||||||
|
}
|
||||||
|
memcpy(json + len, buf, tmp);
|
||||||
|
len += tmp;
|
||||||
|
}
|
||||||
|
fclose(fp);
|
||||||
|
// parse
|
||||||
|
kson = kson_parse(json);
|
||||||
|
free(json);
|
||||||
|
if (kson) {
|
||||||
|
kson_format(kson->root);
|
||||||
|
if (argc > 2) {
|
||||||
|
// path finding
|
||||||
|
const kson_node_t *p = kson->root;
|
||||||
|
for (i = 2; i < argc && p; ++i) {
|
||||||
|
if (p->type == KSON_TYPE_BRACKET)
|
||||||
|
p = kson_by_index(p, atoi(argv[i]));
|
||||||
|
else if (p->type == KSON_TYPE_BRACE)
|
||||||
|
p = kson_by_key(p, argv[i]);
|
||||||
|
else p = 0;
|
||||||
|
}
|
||||||
|
if (p) {
|
||||||
|
if (kson_is_internal(p)) printf("Reached an internal node\n");
|
||||||
|
else printf("Value: %s\n", p->v.str);
|
||||||
|
} else printf("Failed to find the slot\n");
|
||||||
|
}
|
||||||
|
} else printf("Failed to parse\n");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
kson = kson_parse("{'a' : 1,'b':[0,'isn\\'t',true],'d':[{\n\n\n}]}");
|
||||||
|
if (kson) {
|
||||||
|
const kson_node_t *p = kson_by_path(kson->root, 2, "b", 1);
|
||||||
|
if (p) printf("*** %s\n", p->v.str);
|
||||||
|
else printf("!!! not found\n");
|
||||||
|
kson_format(kson->root);
|
||||||
|
} else {
|
||||||
|
printf("Failed to parse\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
kson_destroy(kson);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,64 @@
|
||||||
|
#ifndef KSON_H
|
||||||
|
#define KSON_H
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#define KSON_TYPE_NO_QUOTE 1
|
||||||
|
#define KSON_TYPE_SGL_QUOTE 2
|
||||||
|
#define KSON_TYPE_DBL_QUOTE 3
|
||||||
|
#define KSON_TYPE_BRACKET 4
|
||||||
|
#define KSON_TYPE_BRACE 5
|
||||||
|
|
||||||
|
#define KSON_OK 0
|
||||||
|
#define KSON_ERR_EXTRA_LEFT 1
|
||||||
|
#define KSON_ERR_EXTRA_RIGHT 2
|
||||||
|
#define KSON_ERR_NO_KEY 3
|
||||||
|
|
||||||
|
typedef struct kson_node_s {
|
||||||
|
unsigned long long type:3, n:61;
|
||||||
|
char *key;
|
||||||
|
union {
|
||||||
|
struct kson_node_s **child;
|
||||||
|
char *str;
|
||||||
|
} v;
|
||||||
|
} kson_node_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
long n_nodes;
|
||||||
|
kson_node_t *root;
|
||||||
|
} kson_t;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
kson_t *kson_parse(const char *json);
|
||||||
|
void kson_destroy(kson_t *kson);
|
||||||
|
const kson_node_t *kson_by_path(const kson_node_t *root, int path_len, ...);
|
||||||
|
void kson_format(const kson_node_t *root);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define kson_is_internal(p) ((p)->type == KSON_TYPE_BRACKET || (p)->type == KSON_TYPE_BRACE)
|
||||||
|
|
||||||
|
static inline const kson_node_t *kson_by_key(const kson_node_t *p, const char *key)
|
||||||
|
{
|
||||||
|
long i;
|
||||||
|
if (!kson_is_internal(p)) return 0;
|
||||||
|
for (i = 0; i < (long)p->n; ++i) {
|
||||||
|
const kson_node_t *q = p->v.child[i];
|
||||||
|
if (q->key && strcmp(q->key, key) == 0)
|
||||||
|
return q;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline const kson_node_t *kson_by_index(const kson_node_t *p, long i)
|
||||||
|
{
|
||||||
|
if (!kson_is_internal(p)) return 0;
|
||||||
|
return 0 <= i && i < (long)p->n? p->v.child[i] : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,353 @@
|
||||||
|
/* The MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2008, 2011 Attractive Chaos <attractor@live.co.uk>
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of this software and associated documentation files (the
|
||||||
|
"Software"), to deal in the Software without restriction, including
|
||||||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be
|
||||||
|
included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||||
|
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||||
|
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
2011-04-10 (0.1.6):
|
||||||
|
|
||||||
|
* Added sample
|
||||||
|
|
||||||
|
2011-03 (0.1.5):
|
||||||
|
|
||||||
|
* Added shuffle/permutation
|
||||||
|
|
||||||
|
2008-11-16 (0.1.4):
|
||||||
|
|
||||||
|
* Fixed a bug in introsort() that happens in rare cases.
|
||||||
|
|
||||||
|
2008-11-05 (0.1.3):
|
||||||
|
|
||||||
|
* Fixed a bug in introsort() for complex comparisons.
|
||||||
|
|
||||||
|
* Fixed a bug in mergesort(). The previous version is not stable.
|
||||||
|
|
||||||
|
2008-09-15 (0.1.2):
|
||||||
|
|
||||||
|
* Accelerated introsort. On my Mac (not on another Linux machine),
|
||||||
|
my implementation is as fast as std::sort on random input.
|
||||||
|
|
||||||
|
* Added combsort and in introsort, switch to combsort if the
|
||||||
|
recursion is too deep.
|
||||||
|
|
||||||
|
2008-09-13 (0.1.1):
|
||||||
|
|
||||||
|
* Added k-small algorithm
|
||||||
|
|
||||||
|
2008-09-05 (0.1.0):
|
||||||
|
|
||||||
|
* Initial version
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef AC_KSORT_H
|
||||||
|
#define AC_KSORT_H
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
void *left, *right;
|
||||||
|
int depth;
|
||||||
|
} ks_isort_stack_t;
|
||||||
|
|
||||||
|
#define KSORT_SWAP(type_t, a, b) { register type_t t=(a); (a)=(b); (b)=t; }
|
||||||
|
|
||||||
|
#define KSORT_INIT(name, type_t, __sort_lt) \
|
||||||
|
void ks_mergesort_##name(size_t n, type_t array[], type_t temp[]) \
|
||||||
|
{ \
|
||||||
|
type_t *a2[2], *a, *b; \
|
||||||
|
int curr, shift; \
|
||||||
|
\
|
||||||
|
a2[0] = array; \
|
||||||
|
a2[1] = temp? temp : (type_t*)malloc(sizeof(type_t) * n); \
|
||||||
|
for (curr = 0, shift = 0; (1ul<<shift) < n; ++shift) { \
|
||||||
|
a = a2[curr]; b = a2[1-curr]; \
|
||||||
|
if (shift == 0) { \
|
||||||
|
type_t *p = b, *i, *eb = a + n; \
|
||||||
|
for (i = a; i < eb; i += 2) { \
|
||||||
|
if (i == eb - 1) *p++ = *i; \
|
||||||
|
else { \
|
||||||
|
if (__sort_lt(*(i+1), *i)) { \
|
||||||
|
*p++ = *(i+1); *p++ = *i; \
|
||||||
|
} else { \
|
||||||
|
*p++ = *i; *p++ = *(i+1); \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} else { \
|
||||||
|
size_t i, step = 1ul<<shift; \
|
||||||
|
for (i = 0; i < n; i += step<<1) { \
|
||||||
|
type_t *p, *j, *k, *ea, *eb; \
|
||||||
|
if (n < i + step) { \
|
||||||
|
ea = a + n; eb = a; \
|
||||||
|
} else { \
|
||||||
|
ea = a + i + step; \
|
||||||
|
eb = a + (n < i + (step<<1)? n : i + (step<<1)); \
|
||||||
|
} \
|
||||||
|
j = a + i; k = a + i + step; p = b + i; \
|
||||||
|
while (j < ea && k < eb) { \
|
||||||
|
if (__sort_lt(*k, *j)) *p++ = *k++; \
|
||||||
|
else *p++ = *j++; \
|
||||||
|
} \
|
||||||
|
while (j < ea) *p++ = *j++; \
|
||||||
|
while (k < eb) *p++ = *k++; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
curr = 1 - curr; \
|
||||||
|
} \
|
||||||
|
if (curr == 1) { \
|
||||||
|
type_t *p = a2[0], *i = a2[1], *eb = array + n; \
|
||||||
|
for (; p < eb; ++i) *p++ = *i; \
|
||||||
|
} \
|
||||||
|
if (temp == 0) free(a2[1]); \
|
||||||
|
} \
|
||||||
|
void ks_heapadjust_##name(size_t i, size_t n, type_t l[]) \
|
||||||
|
{ \
|
||||||
|
size_t k = i; \
|
||||||
|
type_t tmp = l[i]; \
|
||||||
|
while ((k = (k << 1) + 1) < n) { \
|
||||||
|
if (k != n - 1 && __sort_lt(l[k], l[k+1])) ++k; \
|
||||||
|
if (__sort_lt(l[k], tmp)) break; \
|
||||||
|
l[i] = l[k]; i = k; \
|
||||||
|
} \
|
||||||
|
l[i] = tmp; \
|
||||||
|
} \
|
||||||
|
void ks_heapmake_##name(size_t lsize, type_t l[]) \
|
||||||
|
{ \
|
||||||
|
size_t i; \
|
||||||
|
for (i = (lsize >> 1) - 1; i != (size_t)(-1); --i) \
|
||||||
|
ks_heapadjust_##name(i, lsize, l); \
|
||||||
|
} \
|
||||||
|
void ks_heapsort_##name(size_t lsize, type_t l[]) \
|
||||||
|
{ \
|
||||||
|
size_t i; \
|
||||||
|
for (i = lsize - 1; i > 0; --i) { \
|
||||||
|
type_t tmp; \
|
||||||
|
tmp = *l; *l = l[i]; l[i] = tmp; ks_heapadjust_##name(0, i, l); \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
static inline void __ks_insertsort_##name(type_t *s, type_t *t) \
|
||||||
|
{ \
|
||||||
|
type_t *i, *j, swap_tmp; \
|
||||||
|
for (i = s + 1; i < t; ++i) \
|
||||||
|
for (j = i; j > s && __sort_lt(*j, *(j-1)); --j) { \
|
||||||
|
swap_tmp = *j; *j = *(j-1); *(j-1) = swap_tmp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
void ks_combsort_##name(size_t n, type_t a[]) \
|
||||||
|
{ \
|
||||||
|
const double shrink_factor = 1.2473309501039786540366528676643; \
|
||||||
|
int do_swap; \
|
||||||
|
size_t gap = n; \
|
||||||
|
type_t tmp, *i, *j; \
|
||||||
|
do { \
|
||||||
|
if (gap > 2) { \
|
||||||
|
gap = (size_t)(gap / shrink_factor); \
|
||||||
|
if (gap == 9 || gap == 10) gap = 11; \
|
||||||
|
} \
|
||||||
|
do_swap = 0; \
|
||||||
|
for (i = a; i < a + n - gap; ++i) { \
|
||||||
|
j = i + gap; \
|
||||||
|
if (__sort_lt(*j, *i)) { \
|
||||||
|
tmp = *i; *i = *j; *j = tmp; \
|
||||||
|
do_swap = 1; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} while (do_swap || gap > 2); \
|
||||||
|
if (gap != 1) __ks_insertsort_##name(a, a + n); \
|
||||||
|
} \
|
||||||
|
void ks_introsort_##name(size_t n, type_t a[]) \
|
||||||
|
{ \
|
||||||
|
int d; \
|
||||||
|
ks_isort_stack_t *top, *stack; \
|
||||||
|
type_t rp, swap_tmp; \
|
||||||
|
type_t *s, *t, *i, *j, *k; \
|
||||||
|
\
|
||||||
|
if (n < 1) return; \
|
||||||
|
else if (n == 2) { \
|
||||||
|
if (__sort_lt(a[1], a[0])) { swap_tmp = a[0]; a[0] = a[1]; a[1] = swap_tmp; } \
|
||||||
|
return; \
|
||||||
|
} \
|
||||||
|
for (d = 2; 1ul<<d < n; ++d); \
|
||||||
|
stack = (ks_isort_stack_t*)malloc(sizeof(ks_isort_stack_t) * ((sizeof(size_t)*d)+2)); \
|
||||||
|
top = stack; s = a; t = a + (n-1); d <<= 1; \
|
||||||
|
while (1) { \
|
||||||
|
if (s < t) { \
|
||||||
|
if (--d == 0) { \
|
||||||
|
ks_combsort_##name(t - s + 1, s); \
|
||||||
|
t = s; \
|
||||||
|
continue; \
|
||||||
|
} \
|
||||||
|
i = s; j = t; k = i + ((j-i)>>1) + 1; \
|
||||||
|
if (__sort_lt(*k, *i)) { \
|
||||||
|
if (__sort_lt(*k, *j)) k = j; \
|
||||||
|
} else k = __sort_lt(*j, *i)? i : j; \
|
||||||
|
rp = *k; \
|
||||||
|
if (k != t) { swap_tmp = *k; *k = *t; *t = swap_tmp; } \
|
||||||
|
for (;;) { \
|
||||||
|
do ++i; while (__sort_lt(*i, rp)); \
|
||||||
|
do --j; while (i <= j && __sort_lt(rp, *j)); \
|
||||||
|
if (j <= i) break; \
|
||||||
|
swap_tmp = *i; *i = *j; *j = swap_tmp; \
|
||||||
|
} \
|
||||||
|
swap_tmp = *i; *i = *t; *t = swap_tmp; \
|
||||||
|
if (i-s > t-i) { \
|
||||||
|
if (i-s > 16) { top->left = s; top->right = i-1; top->depth = d; ++top; } \
|
||||||
|
s = t-i > 16? i+1 : t; \
|
||||||
|
} else { \
|
||||||
|
if (t-i > 16) { top->left = i+1; top->right = t; top->depth = d; ++top; } \
|
||||||
|
t = i-s > 16? i-1 : s; \
|
||||||
|
} \
|
||||||
|
} else { \
|
||||||
|
if (top == stack) { \
|
||||||
|
free(stack); \
|
||||||
|
__ks_insertsort_##name(a, a+n); \
|
||||||
|
return; \
|
||||||
|
} else { --top; s = (type_t*)top->left; t = (type_t*)top->right; d = top->depth; } \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
/* This function is adapted from: http://ndevilla.free.fr/median/ */ \
|
||||||
|
/* 0 <= kk < n */ \
|
||||||
|
type_t ks_ksmall_##name(size_t n, type_t arr[], size_t kk) \
|
||||||
|
{ \
|
||||||
|
type_t *low, *high, *k, *ll, *hh, *mid; \
|
||||||
|
low = arr; high = arr + n - 1; k = arr + kk; \
|
||||||
|
for (;;) { \
|
||||||
|
if (high <= low) return *k; \
|
||||||
|
if (high == low + 1) { \
|
||||||
|
if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \
|
||||||
|
return *k; \
|
||||||
|
} \
|
||||||
|
mid = low + (high - low) / 2; \
|
||||||
|
if (__sort_lt(*high, *mid)) KSORT_SWAP(type_t, *mid, *high); \
|
||||||
|
if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \
|
||||||
|
if (__sort_lt(*low, *mid)) KSORT_SWAP(type_t, *mid, *low); \
|
||||||
|
KSORT_SWAP(type_t, *mid, *(low+1)); \
|
||||||
|
ll = low + 1; hh = high; \
|
||||||
|
for (;;) { \
|
||||||
|
do ++ll; while (__sort_lt(*ll, *low)); \
|
||||||
|
do --hh; while (__sort_lt(*low, *hh)); \
|
||||||
|
if (hh < ll) break; \
|
||||||
|
KSORT_SWAP(type_t, *ll, *hh); \
|
||||||
|
} \
|
||||||
|
KSORT_SWAP(type_t, *low, *hh); \
|
||||||
|
if (hh <= k) low = ll; \
|
||||||
|
if (hh >= k) high = hh - 1; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
void ks_shuffle_##name(size_t n, type_t a[]) \
|
||||||
|
{ \
|
||||||
|
int i, j; \
|
||||||
|
for (i = n; i > 1; --i) { \
|
||||||
|
type_t tmp; \
|
||||||
|
j = (int)(drand48() * i); \
|
||||||
|
tmp = a[j]; a[j] = a[i-1]; a[i-1] = tmp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
void ks_sample_##name(size_t n, size_t r, type_t a[]) /* FIXME: NOT TESTED!!! */ \
|
||||||
|
{ /* reference: http://code.activestate.com/recipes/272884/ */ \
|
||||||
|
int i, k, pop = n; \
|
||||||
|
for (i = (int)r, k = 0; i >= 0; --i) { \
|
||||||
|
double z = 1., x = drand48(); \
|
||||||
|
type_t tmp; \
|
||||||
|
while (x < z) z -= z * i / (pop--); \
|
||||||
|
if (k != n - pop - 1) tmp = a[k], a[k] = a[n-pop-1], a[n-pop-1] = tmp; \
|
||||||
|
++k; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define ks_mergesort(name, n, a, t) ks_mergesort_##name(n, a, t)
|
||||||
|
#define ks_introsort(name, n, a) ks_introsort_##name(n, a)
|
||||||
|
#define ks_combsort(name, n, a) ks_combsort_##name(n, a)
|
||||||
|
#define ks_heapsort(name, n, a) ks_heapsort_##name(n, a)
|
||||||
|
#define ks_heapmake(name, n, a) ks_heapmake_##name(n, a)
|
||||||
|
#define ks_heapadjust(name, i, n, a) ks_heapadjust_##name(i, n, a)
|
||||||
|
#define ks_ksmall(name, n, a, k) ks_ksmall_##name(n, a, k)
|
||||||
|
#define ks_shuffle(name, n, a) ks_shuffle_##name(n, a)
|
||||||
|
|
||||||
|
#define ks_lt_generic(a, b) ((a) < (b))
|
||||||
|
#define ks_lt_str(a, b) (strcmp((a), (b)) < 0)
|
||||||
|
|
||||||
|
typedef const char *ksstr_t;
|
||||||
|
|
||||||
|
#define KSORT_INIT_GENERIC(type_t) KSORT_INIT(type_t, type_t, ks_lt_generic)
|
||||||
|
#define KSORT_INIT_STR KSORT_INIT(str, ksstr_t, ks_lt_str)
|
||||||
|
|
||||||
|
#define RS_MIN_SIZE 64
|
||||||
|
#define RS_MAX_BITS 8
|
||||||
|
|
||||||
|
#define KRADIX_SORT_INIT(name, rstype_t, rskey, sizeof_key) \
|
||||||
|
typedef struct { \
|
||||||
|
rstype_t *b, *e; \
|
||||||
|
} rsbucket_##name##_t; \
|
||||||
|
void rs_insertsort_##name(rstype_t *beg, rstype_t *end) \
|
||||||
|
{ \
|
||||||
|
rstype_t *i; \
|
||||||
|
for (i = beg + 1; i < end; ++i) \
|
||||||
|
if (rskey(*i) < rskey(*(i - 1))) { \
|
||||||
|
rstype_t *j, tmp = *i; \
|
||||||
|
for (j = i; j > beg && rskey(tmp) < rskey(*(j-1)); --j) \
|
||||||
|
*j = *(j - 1); \
|
||||||
|
*j = tmp; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
void rs_sort_##name(rstype_t *beg, rstype_t *end, int n_bits, int s) \
|
||||||
|
{ \
|
||||||
|
rstype_t *i; \
|
||||||
|
int size = 1<<n_bits, m = size - 1; \
|
||||||
|
rsbucket_##name##_t *k, b[1<<RS_MAX_BITS], *be = b + size; \
|
||||||
|
assert(n_bits <= RS_MAX_BITS); \
|
||||||
|
for (k = b; k != be; ++k) k->b = k->e = beg; \
|
||||||
|
for (i = beg; i != end; ++i) ++b[rskey(*i)>>s&m].e; \
|
||||||
|
for (k = b + 1; k != be; ++k) \
|
||||||
|
k->e += (k-1)->e - beg, k->b = (k-1)->e; \
|
||||||
|
for (k = b; k != be;) { \
|
||||||
|
if (k->b != k->e) { \
|
||||||
|
rsbucket_##name##_t *l; \
|
||||||
|
if ((l = b + (rskey(*k->b)>>s&m)) != k) { \
|
||||||
|
rstype_t tmp = *k->b, swap; \
|
||||||
|
do { \
|
||||||
|
swap = tmp; tmp = *l->b; *l->b++ = swap; \
|
||||||
|
l = b + (rskey(tmp)>>s&m); \
|
||||||
|
} while (l != k); \
|
||||||
|
*k->b++ = tmp; \
|
||||||
|
} else ++k->b; \
|
||||||
|
} else ++k; \
|
||||||
|
} \
|
||||||
|
for (b->b = beg, k = b + 1; k != be; ++k) k->b = (k-1)->e; \
|
||||||
|
if (s) { \
|
||||||
|
s = s > n_bits? s - n_bits : 0; \
|
||||||
|
for (k = b; k != be; ++k) \
|
||||||
|
if (k->e - k->b > RS_MIN_SIZE) rs_sort_##name(k->b, k->e, n_bits, s); \
|
||||||
|
else if (k->e - k->b > 1) rs_insertsort_##name(k->b, k->e); \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
void radix_sort_##name(rstype_t *beg, rstype_t *end) \
|
||||||
|
{ \
|
||||||
|
if (end - beg <= RS_MIN_SIZE) rs_insertsort_##name(beg, end); \
|
||||||
|
else rs_sort_##name(beg, end, RS_MAX_BITS, (sizeof_key - 1) * RS_MAX_BITS); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,250 @@
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include "kstring.h"
|
||||||
|
|
||||||
|
int kvsprintf(kstring_t *s, const char *fmt, va_list ap)
|
||||||
|
{
|
||||||
|
va_list args;
|
||||||
|
int l;
|
||||||
|
va_copy(args, ap);
|
||||||
|
l = vsnprintf(s->s + s->l, s->m - s->l, fmt, args); // This line does not work with glibc 2.0. See `man snprintf'.
|
||||||
|
va_end(args);
|
||||||
|
if (l + 1 > s->m - s->l) {
|
||||||
|
s->m = s->l + l + 2;
|
||||||
|
kroundup32(s->m);
|
||||||
|
s->s = (char*)realloc(s->s, s->m);
|
||||||
|
va_copy(args, ap);
|
||||||
|
l = vsnprintf(s->s + s->l, s->m - s->l, fmt, args);
|
||||||
|
va_end(args);
|
||||||
|
}
|
||||||
|
s->l += l;
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ksprintf(kstring_t *s, const char *fmt, ...)
|
||||||
|
{
|
||||||
|
va_list ap;
|
||||||
|
int l;
|
||||||
|
va_start(ap, fmt);
|
||||||
|
l = kvsprintf(s, fmt, ap);
|
||||||
|
va_end(ap);
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *kstrtok(const char *str, const char *sep_in, ks_tokaux_t *aux)
|
||||||
|
{
|
||||||
|
const unsigned char *p, *start, *sep = (unsigned char *) sep_in;
|
||||||
|
if (sep) { // set up the table
|
||||||
|
if (str == 0 && aux->finished) return 0; // no need to set up if we have finished
|
||||||
|
aux->finished = 0;
|
||||||
|
if (sep[0] && sep[1]) {
|
||||||
|
aux->sep = -1;
|
||||||
|
aux->tab[0] = aux->tab[1] = aux->tab[2] = aux->tab[3] = 0;
|
||||||
|
for (p = sep; *p; ++p) aux->tab[*p>>6] |= 1ull<<(*p&0x3f);
|
||||||
|
} else aux->sep = sep[0];
|
||||||
|
}
|
||||||
|
if (aux->finished) return 0;
|
||||||
|
else if (str) start = (unsigned char *) str, aux->finished = 0;
|
||||||
|
else start = (unsigned char *) aux->p + 1;
|
||||||
|
if (aux->sep < 0) {
|
||||||
|
for (p = start; *p; ++p)
|
||||||
|
if (aux->tab[*p>>6]>>(*p&0x3f)&1) break;
|
||||||
|
} else {
|
||||||
|
for (p = start; *p; ++p)
|
||||||
|
if (*p == aux->sep) break;
|
||||||
|
}
|
||||||
|
aux->p = (const char *) p; // end of token
|
||||||
|
if (*p == 0) aux->finished = 1; // no more tokens
|
||||||
|
return (char*)start;
|
||||||
|
}
|
||||||
|
|
||||||
|
// s MUST BE a null terminated string; l = strlen(s)
|
||||||
|
int ksplit_core(char *s, int delimiter, int *_max, int **_offsets)
|
||||||
|
{
|
||||||
|
int i, n, max, last_char, last_start, *offsets, l;
|
||||||
|
n = 0; max = *_max; offsets = *_offsets;
|
||||||
|
l = strlen(s);
|
||||||
|
|
||||||
|
#define __ksplit_aux do { \
|
||||||
|
if (_offsets) { \
|
||||||
|
s[i] = 0; \
|
||||||
|
if (n == max) { \
|
||||||
|
int *tmp; \
|
||||||
|
max = max? max<<1 : 2; \
|
||||||
|
if ((tmp = (int*)realloc(offsets, sizeof(int) * max))) { \
|
||||||
|
offsets = tmp; \
|
||||||
|
} else { \
|
||||||
|
free(offsets); \
|
||||||
|
*_offsets = NULL; \
|
||||||
|
return 0; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
offsets[n++] = last_start; \
|
||||||
|
} else ++n; \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
for (i = 0, last_char = last_start = 0; i <= l; ++i) {
|
||||||
|
if (delimiter == 0) {
|
||||||
|
if (isspace(s[i]) || s[i] == 0) {
|
||||||
|
if (isgraph(last_char)) __ksplit_aux; // the end of a field
|
||||||
|
} else {
|
||||||
|
if (isspace(last_char) || last_char == 0) last_start = i;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (s[i] == delimiter || s[i] == 0) {
|
||||||
|
if (last_char != 0 && last_char != delimiter) __ksplit_aux; // the end of a field
|
||||||
|
} else {
|
||||||
|
if (last_char == delimiter || last_char == 0) last_start = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
last_char = s[i];
|
||||||
|
}
|
||||||
|
*_max = max; *_offsets = offsets;
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
int kgetline(kstring_t *s, kgets_func *fgets_fn, void *fp)
|
||||||
|
{
|
||||||
|
size_t l0 = s->l;
|
||||||
|
|
||||||
|
while (s->l == l0 || s->s[s->l-1] != '\n') {
|
||||||
|
if (s->m - s->l < 200) ks_resize(s, s->m + 200);
|
||||||
|
if (fgets_fn(s->s + s->l, s->m - s->l, fp) == NULL) break;
|
||||||
|
s->l += strlen(s->s + s->l);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (s->l == l0) return EOF;
|
||||||
|
|
||||||
|
if (s->l > l0 && s->s[s->l-1] == '\n') {
|
||||||
|
s->l--;
|
||||||
|
if (s->l > l0 && s->s[s->l-1] == '\r') s->l--;
|
||||||
|
}
|
||||||
|
s->s[s->l] = '\0';
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**********************
|
||||||
|
* Boyer-Moore search *
|
||||||
|
**********************/
|
||||||
|
|
||||||
|
typedef unsigned char ubyte_t;
|
||||||
|
|
||||||
|
// reference: http://www-igm.univ-mlv.fr/~lecroq/string/node14.html
|
||||||
|
static int *ksBM_prep(const ubyte_t *pat, int m)
|
||||||
|
{
|
||||||
|
int i, *suff, *prep, *bmGs, *bmBc;
|
||||||
|
prep = (int*)calloc(m + 256, sizeof(int));
|
||||||
|
bmGs = prep; bmBc = prep + m;
|
||||||
|
{ // preBmBc()
|
||||||
|
for (i = 0; i < 256; ++i) bmBc[i] = m;
|
||||||
|
for (i = 0; i < m - 1; ++i) bmBc[pat[i]] = m - i - 1;
|
||||||
|
}
|
||||||
|
suff = (int*)calloc(m, sizeof(int));
|
||||||
|
{ // suffixes()
|
||||||
|
int f = 0, g;
|
||||||
|
suff[m - 1] = m;
|
||||||
|
g = m - 1;
|
||||||
|
for (i = m - 2; i >= 0; --i) {
|
||||||
|
if (i > g && suff[i + m - 1 - f] < i - g)
|
||||||
|
suff[i] = suff[i + m - 1 - f];
|
||||||
|
else {
|
||||||
|
if (i < g) g = i;
|
||||||
|
f = i;
|
||||||
|
while (g >= 0 && pat[g] == pat[g + m - 1 - f]) --g;
|
||||||
|
suff[i] = f - g;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{ // preBmGs()
|
||||||
|
int j = 0;
|
||||||
|
for (i = 0; i < m; ++i) bmGs[i] = m;
|
||||||
|
for (i = m - 1; i >= 0; --i)
|
||||||
|
if (suff[i] == i + 1)
|
||||||
|
for (; j < m - 1 - i; ++j)
|
||||||
|
if (bmGs[j] == m)
|
||||||
|
bmGs[j] = m - 1 - i;
|
||||||
|
for (i = 0; i <= m - 2; ++i)
|
||||||
|
bmGs[m - 1 - suff[i]] = m - 1 - i;
|
||||||
|
}
|
||||||
|
free(suff);
|
||||||
|
return prep;
|
||||||
|
}
|
||||||
|
|
||||||
|
void *kmemmem(const void *_str, int n, const void *_pat, int m, int **_prep)
|
||||||
|
{
|
||||||
|
int i, j, *prep = 0, *bmGs, *bmBc;
|
||||||
|
const ubyte_t *str, *pat;
|
||||||
|
str = (const ubyte_t*)_str; pat = (const ubyte_t*)_pat;
|
||||||
|
prep = (_prep == 0 || *_prep == 0)? ksBM_prep(pat, m) : *_prep;
|
||||||
|
if (_prep && *_prep == 0) *_prep = prep;
|
||||||
|
bmGs = prep; bmBc = prep + m;
|
||||||
|
j = 0;
|
||||||
|
while (j <= n - m) {
|
||||||
|
for (i = m - 1; i >= 0 && pat[i] == str[i+j]; --i);
|
||||||
|
if (i >= 0) {
|
||||||
|
int max = bmBc[str[i+j]] - m + 1 + i;
|
||||||
|
if (max < bmGs[i]) max = bmGs[i];
|
||||||
|
j += max;
|
||||||
|
} else return (void*)(str + j);
|
||||||
|
}
|
||||||
|
if (_prep == 0) free(prep);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *kstrstr(const char *str, const char *pat, int **_prep)
|
||||||
|
{
|
||||||
|
return (char*)kmemmem(str, strlen(str), pat, strlen(pat), _prep);
|
||||||
|
}
|
||||||
|
|
||||||
|
char *kstrnstr(const char *str, const char *pat, int n, int **_prep)
|
||||||
|
{
|
||||||
|
return (char*)kmemmem(str, n, pat, strlen(pat), _prep);
|
||||||
|
}
|
||||||
|
|
||||||
|
/***********************
|
||||||
|
* The main() function *
|
||||||
|
***********************/
|
||||||
|
|
||||||
|
#ifdef KSTRING_MAIN
|
||||||
|
#include <stdio.h>
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
kstring_t *s;
|
||||||
|
int *fields, n, i;
|
||||||
|
ks_tokaux_t aux;
|
||||||
|
char *p;
|
||||||
|
s = (kstring_t*)calloc(1, sizeof(kstring_t));
|
||||||
|
// test ksprintf()
|
||||||
|
ksprintf(s, " abcdefg: %d ", 100);
|
||||||
|
printf("'%s'\n", s->s);
|
||||||
|
// test ksplit()
|
||||||
|
fields = ksplit(s, 0, &n);
|
||||||
|
for (i = 0; i < n; ++i)
|
||||||
|
printf("field[%d] = '%s'\n", i, s->s + fields[i]);
|
||||||
|
// test kstrtok()
|
||||||
|
s->l = 0;
|
||||||
|
for (p = kstrtok("ab:cde:fg/hij::k", ":/", &aux); p; p = kstrtok(0, 0, &aux)) {
|
||||||
|
kputsn(p, aux.p - p, s);
|
||||||
|
kputc('\n', s);
|
||||||
|
}
|
||||||
|
printf("%s", s->s);
|
||||||
|
// free
|
||||||
|
free(s->s); free(s); free(fields);
|
||||||
|
|
||||||
|
{
|
||||||
|
static char *str = "abcdefgcdgcagtcakcdcd";
|
||||||
|
static char *pat = "cd";
|
||||||
|
char *ret, *s = str;
|
||||||
|
int *prep = 0;
|
||||||
|
while ((ret = kstrstr(s, pat, &prep)) != 0) {
|
||||||
|
printf("match: %s\n", ret);
|
||||||
|
s = ret + prep[0];
|
||||||
|
}
|
||||||
|
free(prep);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,277 @@
|
||||||
|
/* The MIT License
|
||||||
|
|
||||||
|
Copyright (c) by Attractive Chaos <attractor@live.co.uk>
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of this software and associated documentation files (the
|
||||||
|
"Software"), to deal in the Software without restriction, including
|
||||||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be
|
||||||
|
included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||||
|
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||||
|
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KSTRING_H
|
||||||
|
#define KSTRING_H
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#ifndef kroundup32
|
||||||
|
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ > 4)
|
||||||
|
#define KS_ATTR_PRINTF(fmt, arg) __attribute__((__format__ (__printf__, fmt, arg)))
|
||||||
|
#else
|
||||||
|
#define KS_ATTR_PRINTF(fmt, arg)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/* kstring_t is a simple non-opaque type whose fields are likely to be
|
||||||
|
* used directly by user code (but see also ks_str() and ks_len() below).
|
||||||
|
* A kstring_t object is initialised by either of
|
||||||
|
* kstring_t str = { 0, 0, NULL };
|
||||||
|
* kstring_t str; ...; str.l = str.m = 0; str.s = NULL;
|
||||||
|
* and either ownership of the underlying buffer should be given away before
|
||||||
|
* the object disappears (see ks_release() below) or the kstring_t should be
|
||||||
|
* destroyed with free(str.s); */
|
||||||
|
#ifndef KSTRING_T
|
||||||
|
#define KSTRING_T kstring_t
|
||||||
|
typedef struct __kstring_t {
|
||||||
|
size_t l, m;
|
||||||
|
char *s;
|
||||||
|
} kstring_t;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
uint64_t tab[4];
|
||||||
|
int sep, finished;
|
||||||
|
const char *p; // end of the current token
|
||||||
|
} ks_tokaux_t;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int kvsprintf(kstring_t *s, const char *fmt, va_list ap) KS_ATTR_PRINTF(2,0);
|
||||||
|
int ksprintf(kstring_t *s, const char *fmt, ...) KS_ATTR_PRINTF(2,3);
|
||||||
|
int ksplit_core(char *s, int delimiter, int *_max, int **_offsets);
|
||||||
|
char *kstrstr(const char *str, const char *pat, int **_prep);
|
||||||
|
char *kstrnstr(const char *str, const char *pat, int n, int **_prep);
|
||||||
|
void *kmemmem(const void *_str, int n, const void *_pat, int m, int **_prep);
|
||||||
|
|
||||||
|
/* kstrtok() is similar to strtok_r() except that str is not
|
||||||
|
* modified and both str and sep can be NULL. For efficiency, it is
|
||||||
|
* actually recommended to set both to NULL in the subsequent calls
|
||||||
|
* if sep is not changed. */
|
||||||
|
char *kstrtok(const char *str, const char *sep, ks_tokaux_t *aux);
|
||||||
|
|
||||||
|
/* kgetline() uses the supplied fgets()-like function to read a "\n"-
|
||||||
|
* or "\r\n"-terminated line from fp. The line read is appended to the
|
||||||
|
* kstring without its terminator and 0 is returned; EOF is returned at
|
||||||
|
* EOF or on error (determined by querying fp, as per fgets()). */
|
||||||
|
typedef char *kgets_func(char *, int, void *);
|
||||||
|
int kgetline(kstring_t *s, kgets_func *fgets, void *fp);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static inline int ks_resize(kstring_t *s, size_t size)
|
||||||
|
{
|
||||||
|
if (s->m < size) {
|
||||||
|
char *tmp;
|
||||||
|
s->m = size;
|
||||||
|
kroundup32(s->m);
|
||||||
|
if ((tmp = (char*)realloc(s->s, s->m)))
|
||||||
|
s->s = tmp;
|
||||||
|
else
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline char *ks_str(kstring_t *s)
|
||||||
|
{
|
||||||
|
return s->s;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline size_t ks_len(kstring_t *s)
|
||||||
|
{
|
||||||
|
return s->l;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Give ownership of the underlying buffer away to something else (making
|
||||||
|
// that something else responsible for freeing it), leaving the kstring_t
|
||||||
|
// empty and ready to be used again, or ready to go out of scope without
|
||||||
|
// needing free(str.s) to prevent a memory leak.
|
||||||
|
static inline char *ks_release(kstring_t *s)
|
||||||
|
{
|
||||||
|
char *ss = s->s;
|
||||||
|
s->l = s->m = 0;
|
||||||
|
s->s = NULL;
|
||||||
|
return ss;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int kputsn(const char *p, int l, kstring_t *s)
|
||||||
|
{
|
||||||
|
if (s->l + l + 1 >= s->m) {
|
||||||
|
char *tmp;
|
||||||
|
s->m = s->l + l + 2;
|
||||||
|
kroundup32(s->m);
|
||||||
|
if ((tmp = (char*)realloc(s->s, s->m)))
|
||||||
|
s->s = tmp;
|
||||||
|
else
|
||||||
|
return EOF;
|
||||||
|
}
|
||||||
|
memcpy(s->s + s->l, p, l);
|
||||||
|
s->l += l;
|
||||||
|
s->s[s->l] = 0;
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int kputs(const char *p, kstring_t *s)
|
||||||
|
{
|
||||||
|
return kputsn(p, strlen(p), s);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int kputc(int c, kstring_t *s)
|
||||||
|
{
|
||||||
|
if (s->l + 1 >= s->m) {
|
||||||
|
char *tmp;
|
||||||
|
s->m = s->l + 2;
|
||||||
|
kroundup32(s->m);
|
||||||
|
if ((tmp = (char*)realloc(s->s, s->m)))
|
||||||
|
s->s = tmp;
|
||||||
|
else
|
||||||
|
return EOF;
|
||||||
|
}
|
||||||
|
s->s[s->l++] = c;
|
||||||
|
s->s[s->l] = 0;
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int kputc_(int c, kstring_t *s)
|
||||||
|
{
|
||||||
|
if (s->l + 1 > s->m) {
|
||||||
|
char *tmp;
|
||||||
|
s->m = s->l + 1;
|
||||||
|
kroundup32(s->m);
|
||||||
|
if ((tmp = (char*)realloc(s->s, s->m)))
|
||||||
|
s->s = tmp;
|
||||||
|
else
|
||||||
|
return EOF;
|
||||||
|
}
|
||||||
|
s->s[s->l++] = c;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int kputsn_(const void *p, int l, kstring_t *s)
|
||||||
|
{
|
||||||
|
if (s->l + l > s->m) {
|
||||||
|
char *tmp;
|
||||||
|
s->m = s->l + l;
|
||||||
|
kroundup32(s->m);
|
||||||
|
if ((tmp = (char*)realloc(s->s, s->m)))
|
||||||
|
s->s = tmp;
|
||||||
|
else
|
||||||
|
return EOF;
|
||||||
|
}
|
||||||
|
memcpy(s->s + s->l, p, l);
|
||||||
|
s->l += l;
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int kputw(int c, kstring_t *s)
|
||||||
|
{
|
||||||
|
char buf[16];
|
||||||
|
int i, l = 0;
|
||||||
|
unsigned int x = c;
|
||||||
|
if (c < 0) x = -x;
|
||||||
|
do { buf[l++] = x%10 + '0'; x /= 10; } while (x > 0);
|
||||||
|
if (c < 0) buf[l++] = '-';
|
||||||
|
if (s->l + l + 1 >= s->m) {
|
||||||
|
char *tmp;
|
||||||
|
s->m = s->l + l + 2;
|
||||||
|
kroundup32(s->m);
|
||||||
|
if ((tmp = (char*)realloc(s->s, s->m)))
|
||||||
|
s->s = tmp;
|
||||||
|
else
|
||||||
|
return EOF;
|
||||||
|
}
|
||||||
|
for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i];
|
||||||
|
s->s[s->l] = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int kputuw(unsigned c, kstring_t *s)
|
||||||
|
{
|
||||||
|
char buf[16];
|
||||||
|
int l, i;
|
||||||
|
unsigned x;
|
||||||
|
if (c == 0) return kputc('0', s);
|
||||||
|
for (l = 0, x = c; x > 0; x /= 10) buf[l++] = x%10 + '0';
|
||||||
|
if (s->l + l + 1 >= s->m) {
|
||||||
|
char *tmp;
|
||||||
|
s->m = s->l + l + 2;
|
||||||
|
kroundup32(s->m);
|
||||||
|
if ((tmp = (char*)realloc(s->s, s->m)))
|
||||||
|
s->s = tmp;
|
||||||
|
else
|
||||||
|
return EOF;
|
||||||
|
}
|
||||||
|
for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i];
|
||||||
|
s->s[s->l] = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int kputl(long c, kstring_t *s)
|
||||||
|
{
|
||||||
|
char buf[32];
|
||||||
|
int i, l = 0;
|
||||||
|
unsigned long x = c;
|
||||||
|
if (c < 0) x = -x;
|
||||||
|
do { buf[l++] = x%10 + '0'; x /= 10; } while (x > 0);
|
||||||
|
if (c < 0) buf[l++] = '-';
|
||||||
|
if (s->l + l + 1 >= s->m) {
|
||||||
|
char *tmp;
|
||||||
|
s->m = s->l + l + 2;
|
||||||
|
kroundup32(s->m);
|
||||||
|
if ((tmp = (char*)realloc(s->s, s->m)))
|
||||||
|
s->s = tmp;
|
||||||
|
else
|
||||||
|
return EOF;
|
||||||
|
}
|
||||||
|
for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i];
|
||||||
|
s->s[s->l] = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns 's' split by delimiter, with *n being the number of components;
|
||||||
|
* NULL on failue.
|
||||||
|
*/
|
||||||
|
static inline int *ksplit(kstring_t *s, int delimiter, int *n)
|
||||||
|
{
|
||||||
|
int max = 0, *offsets = 0;
|
||||||
|
*n = ksplit_core(s->s, delimiter, &max, &offsets);
|
||||||
|
return offsets;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,633 @@
|
||||||
|
/* The MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2011 by Attractive Chaos <attractor@live.co.uk>
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of this software and associated documentation files (the
|
||||||
|
"Software"), to deal in the Software without restriction, including
|
||||||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be
|
||||||
|
included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||||
|
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||||
|
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <emmintrin.h>
|
||||||
|
#include "ksw.h"
|
||||||
|
|
||||||
|
#ifdef __GNUC__
|
||||||
|
#define LIKELY(x) __builtin_expect((x),1)
|
||||||
|
#define UNLIKELY(x) __builtin_expect((x),0)
|
||||||
|
#else
|
||||||
|
#define LIKELY(x) (x)
|
||||||
|
#define UNLIKELY(x) (x)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
const kswr_t g_defr = { 0, -1, -1, -1, -1, -1, -1 };
|
||||||
|
|
||||||
|
struct _kswq_t {
|
||||||
|
int qlen, slen;
|
||||||
|
uint8_t shift, mdiff, max, size;
|
||||||
|
__m128i *qp, *H0, *H1, *E, *Hmax;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize the query data structure
|
||||||
|
*
|
||||||
|
* @param size Number of bytes used to store a score; valid valures are 1 or 2
|
||||||
|
* @param qlen Length of the query sequence
|
||||||
|
* @param query Query sequence
|
||||||
|
* @param m Size of the alphabet
|
||||||
|
* @param mat Scoring matrix in a one-dimension array
|
||||||
|
*
|
||||||
|
* @return Query data structure
|
||||||
|
*/
|
||||||
|
kswq_t *ksw_qinit(int size, int qlen, const uint8_t *query, int m, const int8_t *mat)
|
||||||
|
{
|
||||||
|
kswq_t *q;
|
||||||
|
int slen, a, tmp, p;
|
||||||
|
|
||||||
|
size = size > 1? 2 : 1;
|
||||||
|
p = 8 * (3 - size); // # values per __m128i
|
||||||
|
slen = (qlen + p - 1) / p; // segmented length
|
||||||
|
q = (kswq_t*)malloc(sizeof(kswq_t) + 256 + 16 * slen * (m + 4)); // a single block of memory
|
||||||
|
q->qp = (__m128i*)(((size_t)q + sizeof(kswq_t) + 15) >> 4 << 4); // align memory
|
||||||
|
q->H0 = q->qp + slen * m;
|
||||||
|
q->H1 = q->H0 + slen;
|
||||||
|
q->E = q->H1 + slen;
|
||||||
|
q->Hmax = q->E + slen;
|
||||||
|
q->slen = slen; q->qlen = qlen; q->size = size;
|
||||||
|
// compute shift
|
||||||
|
tmp = m * m;
|
||||||
|
for (a = 0, q->shift = 127, q->mdiff = 0; a < tmp; ++a) { // find the minimum and maximum score
|
||||||
|
if (mat[a] < (int8_t)q->shift) q->shift = mat[a];
|
||||||
|
if (mat[a] > (int8_t)q->mdiff) q->mdiff = mat[a];
|
||||||
|
}
|
||||||
|
q->max = q->mdiff;
|
||||||
|
q->shift = 256 - q->shift; // NB: q->shift is uint8_t
|
||||||
|
q->mdiff += q->shift; // this is the difference between the min and max scores
|
||||||
|
// An example: p=8, qlen=19, slen=3 and segmentation:
|
||||||
|
// {{0,3,6,9,12,15,18,-1},{1,4,7,10,13,16,-1,-1},{2,5,8,11,14,17,-1,-1}}
|
||||||
|
if (size == 1) {
|
||||||
|
int8_t *t = (int8_t*)q->qp;
|
||||||
|
for (a = 0; a < m; ++a) {
|
||||||
|
int i, k, nlen = slen * p;
|
||||||
|
const int8_t *ma = mat + a * m;
|
||||||
|
for (i = 0; i < slen; ++i)
|
||||||
|
for (k = i; k < nlen; k += slen) // p iterations
|
||||||
|
*t++ = (k >= qlen? 0 : ma[query[k]]) + q->shift;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
int16_t *t = (int16_t*)q->qp;
|
||||||
|
for (a = 0; a < m; ++a) {
|
||||||
|
int i, k, nlen = slen * p;
|
||||||
|
const int8_t *ma = mat + a * m;
|
||||||
|
for (i = 0; i < slen; ++i)
|
||||||
|
for (k = i; k < nlen; k += slen) // p iterations
|
||||||
|
*t++ = (k >= qlen? 0 : ma[query[k]]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return q;
|
||||||
|
}
|
||||||
|
|
||||||
|
kswr_t ksw_u8(kswq_t *q, int tlen, const uint8_t *target, int _gapo, int _gape, int xtra) // the first gap costs -(_o+_e)
|
||||||
|
{
|
||||||
|
int slen, i, m_b, n_b, te = -1, gmax = 0, minsc, endsc;
|
||||||
|
uint64_t *b;
|
||||||
|
__m128i zero, gapoe, gape, shift, *H0, *H1, *E, *Hmax;
|
||||||
|
kswr_t r;
|
||||||
|
|
||||||
|
#define __max_16(ret, xx) do { \
|
||||||
|
(xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 8)); \
|
||||||
|
(xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 4)); \
|
||||||
|
(xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 2)); \
|
||||||
|
(xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 1)); \
|
||||||
|
(ret) = _mm_extract_epi16((xx), 0) & 0x00ff; \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
// initialization
|
||||||
|
r = g_defr;
|
||||||
|
minsc = (xtra&KSW_XSUBO)? xtra&0xffff : 0x10000;
|
||||||
|
endsc = (xtra&KSW_XSTOP)? xtra&0xffff : 0x10000;
|
||||||
|
m_b = n_b = 0; b = 0;
|
||||||
|
zero = _mm_set1_epi32(0);
|
||||||
|
gapoe = _mm_set1_epi8(_gapo + _gape);
|
||||||
|
gape = _mm_set1_epi8(_gape);
|
||||||
|
shift = _mm_set1_epi8(q->shift);
|
||||||
|
H0 = q->H0; H1 = q->H1; E = q->E; Hmax = q->Hmax;
|
||||||
|
slen = q->slen;
|
||||||
|
for (i = 0; i < slen; ++i) {
|
||||||
|
_mm_store_si128(E + i, zero);
|
||||||
|
_mm_store_si128(H0 + i, zero);
|
||||||
|
_mm_store_si128(Hmax + i, zero);
|
||||||
|
}
|
||||||
|
// the core loop
|
||||||
|
for (i = 0; i < tlen; ++i) {
|
||||||
|
int j, k, cmp, imax;
|
||||||
|
__m128i e, h, f = zero, max = zero, *S = q->qp + target[i] * slen; // s is the 1st score vector
|
||||||
|
h = _mm_load_si128(H0 + slen - 1); // h={2,5,8,11,14,17,-1,-1} in the above example
|
||||||
|
h = _mm_slli_si128(h, 1); // h=H(i-1,-1); << instead of >> because x64 is little-endian
|
||||||
|
for (j = 0; LIKELY(j < slen); ++j) {
|
||||||
|
/* SW cells are computed in the following order:
|
||||||
|
* H(i,j) = max{H(i-1,j-1)+S(i,j), E(i,j), F(i,j)}
|
||||||
|
* E(i+1,j) = max{H(i,j)-q, E(i,j)-r}
|
||||||
|
* F(i,j+1) = max{H(i,j)-q, F(i,j)-r}
|
||||||
|
*/
|
||||||
|
// compute H'(i,j); note that at the beginning, h=H'(i-1,j-1)
|
||||||
|
h = _mm_adds_epu8(h, _mm_load_si128(S + j));
|
||||||
|
h = _mm_subs_epu8(h, shift); // h=H'(i-1,j-1)+S(i,j)
|
||||||
|
e = _mm_load_si128(E + j); // e=E'(i,j)
|
||||||
|
h = _mm_max_epu8(h, e);
|
||||||
|
h = _mm_max_epu8(h, f); // h=H'(i,j)
|
||||||
|
max = _mm_max_epu8(max, h); // set max
|
||||||
|
_mm_store_si128(H1 + j, h); // save to H'(i,j)
|
||||||
|
// now compute E'(i+1,j)
|
||||||
|
h = _mm_subs_epu8(h, gapoe); // h=H'(i,j)-gapo
|
||||||
|
e = _mm_subs_epu8(e, gape); // e=E'(i,j)-gape
|
||||||
|
e = _mm_max_epu8(e, h); // e=E'(i+1,j)
|
||||||
|
_mm_store_si128(E + j, e); // save to E'(i+1,j)
|
||||||
|
// now compute F'(i,j+1)
|
||||||
|
f = _mm_subs_epu8(f, gape);
|
||||||
|
f = _mm_max_epu8(f, h);
|
||||||
|
// get H'(i-1,j) and prepare for the next j
|
||||||
|
h = _mm_load_si128(H0 + j); // h=H'(i-1,j)
|
||||||
|
}
|
||||||
|
// NB: we do not need to set E(i,j) as we disallow adjecent insertion and then deletion
|
||||||
|
for (k = 0; LIKELY(k < 16); ++k) { // this block mimics SWPS3; NB: H(i,j) updated in the lazy-F loop cannot exceed max
|
||||||
|
f = _mm_slli_si128(f, 1);
|
||||||
|
for (j = 0; LIKELY(j < slen); ++j) {
|
||||||
|
h = _mm_load_si128(H1 + j);
|
||||||
|
h = _mm_max_epu8(h, f); // h=H'(i,j)
|
||||||
|
_mm_store_si128(H1 + j, h);
|
||||||
|
h = _mm_subs_epu8(h, gapoe);
|
||||||
|
f = _mm_subs_epu8(f, gape);
|
||||||
|
cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_subs_epu8(f, h), zero));
|
||||||
|
if (UNLIKELY(cmp == 0xffff)) goto end_loop16;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
end_loop16:
|
||||||
|
//int k;for (k=0;k<16;++k)printf("%d ", ((uint8_t*)&max)[k]);printf("\n");
|
||||||
|
__max_16(imax, max); // imax is the maximum number in max
|
||||||
|
if (imax >= minsc) { // write the b array; this condition adds branching unfornately
|
||||||
|
if (n_b == 0 || (int32_t)b[n_b-1] + 1 != i) { // then append
|
||||||
|
if (n_b == m_b) {
|
||||||
|
m_b = m_b? m_b<<1 : 8;
|
||||||
|
b = (uint64_t*)realloc(b, 8 * m_b);
|
||||||
|
}
|
||||||
|
b[n_b++] = (uint64_t)imax<<32 | i;
|
||||||
|
} else if ((int)(b[n_b-1]>>32) < imax) b[n_b-1] = (uint64_t)imax<<32 | i; // modify the last
|
||||||
|
}
|
||||||
|
if (imax > gmax) {
|
||||||
|
gmax = imax; te = i; // te is the end position on the target
|
||||||
|
for (j = 0; LIKELY(j < slen); ++j) // keep the H1 vector
|
||||||
|
_mm_store_si128(Hmax + j, _mm_load_si128(H1 + j));
|
||||||
|
if (gmax + q->shift >= 255 || gmax >= endsc) break;
|
||||||
|
}
|
||||||
|
S = H1; H1 = H0; H0 = S; // swap H0 and H1
|
||||||
|
}
|
||||||
|
r.score = gmax + q->shift < 255? gmax : 255;
|
||||||
|
r.te = te;
|
||||||
|
if (r.score != 255) { // get a->qe, the end of query match; find the 2nd best score
|
||||||
|
int max = -1, low, high, qlen = slen * 16;
|
||||||
|
uint8_t *t = (uint8_t*)Hmax;
|
||||||
|
for (i = 0; i < qlen; ++i, ++t)
|
||||||
|
if ((int)*t > max) max = *t, r.qe = i / 16 + i % 16 * slen;
|
||||||
|
//printf("%d,%d\n", max, gmax);
|
||||||
|
if (b) {
|
||||||
|
i = (r.score + q->max - 1) / q->max;
|
||||||
|
low = te - i; high = te + i;
|
||||||
|
for (i = 0; i < n_b; ++i) {
|
||||||
|
int e = (int32_t)b[i];
|
||||||
|
if ((e < low || e > high) && (int)(b[i]>>32) > r.score2)
|
||||||
|
r.score2 = b[i]>>32, r.te2 = e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(b);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
kswr_t ksw_i16(kswq_t *q, int tlen, const uint8_t *target, int _gapo, int _gape, int xtra) // the first gap costs -(_o+_e)
|
||||||
|
{
|
||||||
|
int slen, i, m_b, n_b, te = -1, gmax = 0, minsc, endsc;
|
||||||
|
uint64_t *b;
|
||||||
|
__m128i zero, gapoe, gape, *H0, *H1, *E, *Hmax;
|
||||||
|
kswr_t r;
|
||||||
|
|
||||||
|
#define __max_8(ret, xx) do { \
|
||||||
|
(xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 8)); \
|
||||||
|
(xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 4)); \
|
||||||
|
(xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 2)); \
|
||||||
|
(ret) = _mm_extract_epi16((xx), 0); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
// initialization
|
||||||
|
r = g_defr;
|
||||||
|
minsc = (xtra&KSW_XSUBO)? xtra&0xffff : 0x10000;
|
||||||
|
endsc = (xtra&KSW_XSTOP)? xtra&0xffff : 0x10000;
|
||||||
|
m_b = n_b = 0; b = 0;
|
||||||
|
zero = _mm_set1_epi32(0);
|
||||||
|
gapoe = _mm_set1_epi16(_gapo + _gape);
|
||||||
|
gape = _mm_set1_epi16(_gape);
|
||||||
|
H0 = q->H0; H1 = q->H1; E = q->E; Hmax = q->Hmax;
|
||||||
|
slen = q->slen;
|
||||||
|
for (i = 0; i < slen; ++i) {
|
||||||
|
_mm_store_si128(E + i, zero);
|
||||||
|
_mm_store_si128(H0 + i, zero);
|
||||||
|
_mm_store_si128(Hmax + i, zero);
|
||||||
|
}
|
||||||
|
// the core loop
|
||||||
|
for (i = 0; i < tlen; ++i) {
|
||||||
|
int j, k, imax;
|
||||||
|
__m128i e, h, f = zero, max = zero, *S = q->qp + target[i] * slen; // s is the 1st score vector
|
||||||
|
h = _mm_load_si128(H0 + slen - 1); // h={2,5,8,11,14,17,-1,-1} in the above example
|
||||||
|
h = _mm_slli_si128(h, 2);
|
||||||
|
for (j = 0; LIKELY(j < slen); ++j) {
|
||||||
|
h = _mm_adds_epi16(h, *S++);
|
||||||
|
e = _mm_load_si128(E + j);
|
||||||
|
h = _mm_max_epi16(h, e);
|
||||||
|
h = _mm_max_epi16(h, f);
|
||||||
|
max = _mm_max_epi16(max, h);
|
||||||
|
_mm_store_si128(H1 + j, h);
|
||||||
|
h = _mm_subs_epu16(h, gapoe);
|
||||||
|
e = _mm_subs_epu16(e, gape);
|
||||||
|
e = _mm_max_epi16(e, h);
|
||||||
|
_mm_store_si128(E + j, e);
|
||||||
|
f = _mm_subs_epu16(f, gape);
|
||||||
|
f = _mm_max_epi16(f, h);
|
||||||
|
h = _mm_load_si128(H0 + j);
|
||||||
|
}
|
||||||
|
for (k = 0; LIKELY(k < 16); ++k) {
|
||||||
|
f = _mm_slli_si128(f, 2);
|
||||||
|
for (j = 0; LIKELY(j < slen); ++j) {
|
||||||
|
h = _mm_load_si128(H1 + j);
|
||||||
|
h = _mm_max_epi16(h, f);
|
||||||
|
_mm_store_si128(H1 + j, h);
|
||||||
|
h = _mm_subs_epu16(h, gapoe);
|
||||||
|
f = _mm_subs_epu16(f, gape);
|
||||||
|
if(UNLIKELY(!_mm_movemask_epi8(_mm_cmpgt_epi16(f, h)))) goto end_loop8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
end_loop8:
|
||||||
|
__max_8(imax, max);
|
||||||
|
if (imax >= minsc) {
|
||||||
|
if (n_b == 0 || (int32_t)b[n_b-1] + 1 != i) {
|
||||||
|
if (n_b == m_b) {
|
||||||
|
m_b = m_b? m_b<<1 : 8;
|
||||||
|
b = (uint64_t*)realloc(b, 8 * m_b);
|
||||||
|
}
|
||||||
|
b[n_b++] = (uint64_t)imax<<32 | i;
|
||||||
|
} else if ((int)(b[n_b-1]>>32) < imax) b[n_b-1] = (uint64_t)imax<<32 | i; // modify the last
|
||||||
|
}
|
||||||
|
if (imax > gmax) {
|
||||||
|
gmax = imax; te = i;
|
||||||
|
for (j = 0; LIKELY(j < slen); ++j)
|
||||||
|
_mm_store_si128(Hmax + j, _mm_load_si128(H1 + j));
|
||||||
|
if (gmax >= endsc) break;
|
||||||
|
}
|
||||||
|
S = H1; H1 = H0; H0 = S;
|
||||||
|
}
|
||||||
|
r.score = gmax; r.te = te;
|
||||||
|
{
|
||||||
|
int max = -1, low, high, qlen = slen * 8;
|
||||||
|
uint16_t *t = (uint16_t*)Hmax;
|
||||||
|
for (i = 0, r.qe = -1; i < qlen; ++i, ++t)
|
||||||
|
if ((int)*t > max) max = *t, r.qe = i / 8 + i % 8 * slen;
|
||||||
|
if (b) {
|
||||||
|
i = (r.score + q->max - 1) / q->max;
|
||||||
|
low = te - i; high = te + i;
|
||||||
|
for (i = 0; i < n_b; ++i) {
|
||||||
|
int e = (int32_t)b[i];
|
||||||
|
if ((e < low || e > high) && (int)(b[i]>>32) > r.score2)
|
||||||
|
r.score2 = b[i]>>32, r.te2 = e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(b);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void revseq(int l, uint8_t *s)
|
||||||
|
{
|
||||||
|
int i, t;
|
||||||
|
for (i = 0; i < l>>1; ++i)
|
||||||
|
t = s[i], s[i] = s[l - 1 - i], s[l - 1 - i] = t;
|
||||||
|
}
|
||||||
|
|
||||||
|
kswr_t ksw_align(int qlen, uint8_t *query, int tlen, uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int xtra, kswq_t **qry)
|
||||||
|
{
|
||||||
|
int size;
|
||||||
|
kswq_t *q;
|
||||||
|
kswr_t r, rr;
|
||||||
|
kswr_t (*func)(kswq_t*, int, const uint8_t*, int, int, int);
|
||||||
|
|
||||||
|
q = (qry && *qry)? *qry : ksw_qinit((xtra&KSW_XBYTE)? 1 : 2, qlen, query, m, mat);
|
||||||
|
if (qry && *qry == 0) *qry = q;
|
||||||
|
func = q->size == 2? ksw_i16 : ksw_u8;
|
||||||
|
size = q->size;
|
||||||
|
r = func(q, tlen, target, gapo, gape, xtra);
|
||||||
|
if (qry == 0) free(q);
|
||||||
|
if ((xtra&KSW_XSTART) == 0 || ((xtra&KSW_XSUBO) && r.score < (xtra&0xffff))) return r;
|
||||||
|
revseq(r.qe + 1, query); revseq(r.te + 1, target); // +1 because qe/te points to the exact end, not the position after the end
|
||||||
|
q = ksw_qinit(size, r.qe + 1, query, m, mat);
|
||||||
|
rr = func(q, tlen, target, gapo, gape, KSW_XSTOP | r.score);
|
||||||
|
revseq(r.qe + 1, query); revseq(r.te + 1, target);
|
||||||
|
free(q);
|
||||||
|
if (r.score == rr.score)
|
||||||
|
r.tb = r.te - rr.te, r.qb = r.qe - rr.qe;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
/********************
|
||||||
|
*** SW extension ***
|
||||||
|
********************/
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int32_t h, e;
|
||||||
|
} eh_t;
|
||||||
|
|
||||||
|
int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int h0, int *_qle, int *_tle)
|
||||||
|
{
|
||||||
|
eh_t *eh; // score array
|
||||||
|
int8_t *qp; // query profile
|
||||||
|
int i, j, k, gapoe = gapo + gape, beg, end, max, max_i, max_j, max_gap;
|
||||||
|
if (h0 < 0) h0 = 0;
|
||||||
|
// allocate memory
|
||||||
|
qp = malloc(qlen * m);
|
||||||
|
eh = calloc(qlen + 1, 8);
|
||||||
|
// generate the query profile
|
||||||
|
for (k = i = 0; k < m; ++k) {
|
||||||
|
const int8_t *p = &mat[k * m];
|
||||||
|
for (j = 0; j < qlen; ++j) qp[i++] = p[query[j]];
|
||||||
|
}
|
||||||
|
// fill the first row
|
||||||
|
eh[0].h = h0; eh[1].h = h0 > gapoe? h0 - gapoe : 0;
|
||||||
|
for (j = 2; j <= qlen && eh[j-1].h > gape; ++j)
|
||||||
|
eh[j].h = eh[j-1].h - gape;
|
||||||
|
// adjust $w if it is too large
|
||||||
|
k = m * m;
|
||||||
|
for (i = 0, max = 0; i < k; ++i) // get the max score
|
||||||
|
max = max > mat[i]? max : mat[i];
|
||||||
|
max_gap = (int)((double)(qlen * max - gapo) / gape + 1.);
|
||||||
|
max_gap = max_gap > 1? max_gap : 1;
|
||||||
|
w = w < max_gap? w : max_gap;
|
||||||
|
// DP loop
|
||||||
|
max = h0, max_i = max_j = -1;
|
||||||
|
beg = 0, end = qlen;
|
||||||
|
for (i = 0; LIKELY(i < tlen); ++i) {
|
||||||
|
int f = 0, h1, m = 0, mj = -1;
|
||||||
|
int8_t *q = &qp[target[i] * qlen];
|
||||||
|
// compute the first column
|
||||||
|
h1 = h0 - (gapo + gape * (i + 1));
|
||||||
|
if (h1 < 0) h1 = 0;
|
||||||
|
// apply the band and the constraint (if provided)
|
||||||
|
if (beg < i - w) beg = i - w;
|
||||||
|
if (end > i + w + 1) end = i + w + 1;
|
||||||
|
if (end > qlen) end = qlen;
|
||||||
|
for (j = beg; LIKELY(j < end); ++j) {
|
||||||
|
// At the beginning of the loop: eh[j] = { H(i-1,j-1), E(i,j) }, f = F(i,j) and h1 = H(i,j-1)
|
||||||
|
// Similar to SSE2-SW, cells are computed in the following order:
|
||||||
|
// H(i,j) = max{H(i-1,j-1)+S(i,j), E(i,j), F(i,j)}
|
||||||
|
// E(i+1,j) = max{H(i,j)-gapo, E(i,j)} - gape
|
||||||
|
// F(i,j+1) = max{H(i,j)-gapo, F(i,j)} - gape
|
||||||
|
eh_t *p = &eh[j];
|
||||||
|
int h = p->h, e = p->e; // get H(i-1,j-1) and E(i-1,j)
|
||||||
|
p->h = h1; // set H(i,j-1) for the next row
|
||||||
|
h += q[j];
|
||||||
|
h = h > e? h : e;
|
||||||
|
h = h > f? h : f;
|
||||||
|
h1 = h; // save H(i,j) to h1 for the next column
|
||||||
|
mj = m > h? mj : j;
|
||||||
|
m = m > h? m : h; // m is stored at eh[mj+1]
|
||||||
|
h -= gapoe;
|
||||||
|
h = h > 0? h : 0;
|
||||||
|
e -= gape;
|
||||||
|
e = e > h? e : h; // computed E(i+1,j)
|
||||||
|
p->e = e; // save E(i+1,j) for the next row
|
||||||
|
f -= gape;
|
||||||
|
f = f > h? f : h; // computed F(i,j+1)
|
||||||
|
}
|
||||||
|
eh[end].h = h1; eh[end].e = 0;
|
||||||
|
if (m == 0) break;
|
||||||
|
if (m > max) max = m, max_i = i, max_j = mj;
|
||||||
|
// update beg and end for the next round
|
||||||
|
for (j = mj; j >= beg && eh[j].h; --j);
|
||||||
|
beg = j + 1;
|
||||||
|
for (j = mj + 2; j <= end && eh[j].h; ++j);
|
||||||
|
end = j;
|
||||||
|
//beg = 0; end = qlen; // uncomment this line for debugging
|
||||||
|
}
|
||||||
|
free(eh); free(qp);
|
||||||
|
if (_qle) *_qle = max_j + 1;
|
||||||
|
if (_tle) *_tle = max_i + 1;
|
||||||
|
return max;
|
||||||
|
}
|
||||||
|
|
||||||
|
/********************
|
||||||
|
* Global alignment *
|
||||||
|
********************/
|
||||||
|
|
||||||
|
#define MINUS_INF -0x40000000
|
||||||
|
|
||||||
|
static inline uint32_t *push_cigar(int *n_cigar, int *m_cigar, uint32_t *cigar, int op, int len)
|
||||||
|
{
|
||||||
|
if (*n_cigar == 0 || op != (cigar[(*n_cigar) - 1]&0xf)) {
|
||||||
|
if (*n_cigar == *m_cigar) {
|
||||||
|
*m_cigar = *m_cigar? (*m_cigar)<<1 : 4;
|
||||||
|
cigar = realloc(cigar, (*m_cigar) << 2);
|
||||||
|
}
|
||||||
|
cigar[(*n_cigar)++] = len<<4 | op;
|
||||||
|
} else cigar[(*n_cigar)-1] += len<<4;
|
||||||
|
return cigar;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ksw_global(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int *n_cigar_, uint32_t **cigar_)
|
||||||
|
{
|
||||||
|
eh_t *eh;
|
||||||
|
int8_t *qp; // query profile
|
||||||
|
int i, j, k, gapoe = gapo + gape, score, n_col;
|
||||||
|
uint8_t *z; // backtrack matrix; in each cell: f<<4|e<<2|h; in principle, we can halve the memory, but backtrack will be a little more complex
|
||||||
|
if (n_cigar_) *n_cigar_ = 0;
|
||||||
|
// allocate memory
|
||||||
|
n_col = qlen < 2*w+1? qlen : 2*w+1; // maximum #columns of the backtrack matrix
|
||||||
|
z = malloc(n_col * tlen);
|
||||||
|
qp = malloc(qlen * m);
|
||||||
|
eh = calloc(qlen + 1, 8);
|
||||||
|
// generate the query profile
|
||||||
|
for (k = i = 0; k < m; ++k) {
|
||||||
|
const int8_t *p = &mat[k * m];
|
||||||
|
for (j = 0; j < qlen; ++j) qp[i++] = p[query[j]];
|
||||||
|
}
|
||||||
|
// fill the first row
|
||||||
|
eh[0].h = 0; eh[0].e = MINUS_INF;
|
||||||
|
for (j = 1; j <= qlen && j <= w; ++j)
|
||||||
|
eh[j].h = -(gapo + gape * j), eh[j].e = MINUS_INF;
|
||||||
|
for (; j <= qlen; ++j) eh[j].h = eh[j].e = MINUS_INF; // everything is -inf outside the band
|
||||||
|
// DP loop
|
||||||
|
for (i = 0; LIKELY(i < tlen); ++i) { // target sequence is in the outer loop
|
||||||
|
int32_t f = MINUS_INF, h1, beg, end;
|
||||||
|
int8_t *q = &qp[target[i] * qlen];
|
||||||
|
uint8_t *zi = &z[i * n_col];
|
||||||
|
beg = i > w? i - w : 0;
|
||||||
|
end = i + w + 1 < qlen? i + w + 1 : qlen; // only loop through [beg,end) of the query sequence
|
||||||
|
h1 = beg == 0? -(gapo + gape * (i + 1)) : MINUS_INF;
|
||||||
|
for (j = beg; LIKELY(j < end); ++j) {
|
||||||
|
// This loop is organized in a similar way to ksw_extend() and ksw_sse2(), except:
|
||||||
|
// 1) not checking h>0; 2) recording direction for backtracking
|
||||||
|
eh_t *p = &eh[j];
|
||||||
|
int32_t h = p->h, e = p->e;
|
||||||
|
uint8_t d; // direction
|
||||||
|
p->h = h1;
|
||||||
|
h += q[j];
|
||||||
|
d = h > e? 0 : 1;
|
||||||
|
h = h > e? h : e;
|
||||||
|
d = h > f? d : 2;
|
||||||
|
h = h > f? h : f;
|
||||||
|
h1 = h;
|
||||||
|
h -= gapoe;
|
||||||
|
e -= gape;
|
||||||
|
d |= e > h? 1<<2 : 0;
|
||||||
|
e = e > h? e : h;
|
||||||
|
p->e = e;
|
||||||
|
f -= gape;
|
||||||
|
d |= f > h? 2<<4 : 0; // if we want to halve the memory, use one bit only, instead of two
|
||||||
|
f = f > h? f : h;
|
||||||
|
zi[j - beg] = d; // z[i,j] keeps h for the current cell and e/f for the next cell
|
||||||
|
}
|
||||||
|
eh[end].h = h1; eh[end].e = MINUS_INF;
|
||||||
|
}
|
||||||
|
score = eh[qlen].h;
|
||||||
|
if (n_cigar_ && cigar_) { // backtrack
|
||||||
|
int n_cigar = 0, m_cigar = 0, which = 0;
|
||||||
|
uint32_t *cigar = 0, tmp;
|
||||||
|
i = tlen - 1; k = (i + w + 1 < qlen? i + w + 1 : qlen) - 1; // (i,k) points to the last cell
|
||||||
|
while (i >= 0 && k >= 0) {
|
||||||
|
which = z[i * n_col + (k - (i > w? i - w : 0))] >> (which<<1) & 3;
|
||||||
|
if (which == 0) cigar = push_cigar(&n_cigar, &m_cigar, cigar, 0, 1), --i, --k;
|
||||||
|
else if (which == 1) cigar = push_cigar(&n_cigar, &m_cigar, cigar, 2, 1), --i;
|
||||||
|
else cigar = push_cigar(&n_cigar, &m_cigar, cigar, 1, 1), --k;
|
||||||
|
}
|
||||||
|
if (i >= 0) cigar = push_cigar(&n_cigar, &m_cigar, cigar, 2, i + 1);
|
||||||
|
if (k >= 0) cigar = push_cigar(&n_cigar, &m_cigar, cigar, 1, k + 1);
|
||||||
|
for (i = 0; i < n_cigar>>1; ++i) // reverse CIGAR
|
||||||
|
tmp = cigar[i], cigar[i] = cigar[n_cigar-1-i], cigar[n_cigar-1-i] = tmp;
|
||||||
|
*n_cigar_ = n_cigar, *cigar_ = cigar;
|
||||||
|
}
|
||||||
|
free(eh); free(qp); free(z);
|
||||||
|
return score;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*******************************************
|
||||||
|
* Main function (not compiled by default) *
|
||||||
|
*******************************************/
|
||||||
|
|
||||||
|
#ifdef _KSW_MAIN
|
||||||
|
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <zlib.h>
|
||||||
|
#include "kseq.h"
|
||||||
|
KSEQ_INIT(gzFile, gzread)
|
||||||
|
|
||||||
|
unsigned char seq_nt4_table[256] = {
|
||||||
|
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||||
|
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||||
|
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||||
|
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||||
|
4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||||
|
4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||||
|
4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||||
|
4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||||
|
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||||
|
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||||
|
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||||
|
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||||
|
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||||
|
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||||
|
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||||
|
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
|
||||||
|
};
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int c, sa = 1, sb = 3, i, j, k, forward_only = 0, max_rseq = 0;
|
||||||
|
int8_t mat[25];
|
||||||
|
int gapo = 5, gape = 2, minsc = 0, xtra = KSW_XSTART;
|
||||||
|
uint8_t *rseq = 0;
|
||||||
|
gzFile fpt, fpq;
|
||||||
|
kseq_t *kst, *ksq;
|
||||||
|
|
||||||
|
// parse command line
|
||||||
|
while ((c = getopt(argc, argv, "a:b:q:r:ft:1")) >= 0) {
|
||||||
|
switch (c) {
|
||||||
|
case 'a': sa = atoi(optarg); break;
|
||||||
|
case 'b': sb = atoi(optarg); break;
|
||||||
|
case 'q': gapo = atoi(optarg); break;
|
||||||
|
case 'r': gape = atoi(optarg); break;
|
||||||
|
case 't': minsc = atoi(optarg); break;
|
||||||
|
case 'f': forward_only = 1; break;
|
||||||
|
case '1': xtra |= KSW_XBYTE; break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (optind + 2 > argc) {
|
||||||
|
fprintf(stderr, "Usage: ksw [-1] [-f] [-a%d] [-b%d] [-q%d] [-r%d] [-t%d] <target.fa> <query.fa>\n", sa, sb, gapo, gape, minsc);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (minsc > 0xffff) minsc = 0xffff;
|
||||||
|
xtra |= KSW_XSUBO | minsc;
|
||||||
|
// initialize scoring matrix
|
||||||
|
for (i = k = 0; i < 4; ++i) {
|
||||||
|
for (j = 0; j < 4; ++j)
|
||||||
|
mat[k++] = i == j? sa : -sb;
|
||||||
|
mat[k++] = 0; // ambiguous base
|
||||||
|
}
|
||||||
|
for (j = 0; j < 5; ++j) mat[k++] = 0;
|
||||||
|
// open file
|
||||||
|
fpt = gzopen(argv[optind], "r"); kst = kseq_init(fpt);
|
||||||
|
fpq = gzopen(argv[optind+1], "r"); ksq = kseq_init(fpq);
|
||||||
|
// all-pair alignment
|
||||||
|
while (kseq_read(ksq) > 0) {
|
||||||
|
kswq_t *q[2] = {0, 0};
|
||||||
|
kswr_t r;
|
||||||
|
for (i = 0; i < (int)ksq->seq.l; ++i) ksq->seq.s[i] = seq_nt4_table[(int)ksq->seq.s[i]];
|
||||||
|
if (!forward_only) { // reverse
|
||||||
|
if ((int)ksq->seq.m > max_rseq) {
|
||||||
|
max_rseq = ksq->seq.m;
|
||||||
|
rseq = (uint8_t*)realloc(rseq, max_rseq);
|
||||||
|
}
|
||||||
|
for (i = 0, j = ksq->seq.l - 1; i < (int)ksq->seq.l; ++i, --j)
|
||||||
|
rseq[j] = ksq->seq.s[i] == 4? 4 : 3 - ksq->seq.s[i];
|
||||||
|
}
|
||||||
|
gzrewind(fpt); kseq_rewind(kst);
|
||||||
|
while (kseq_read(kst) > 0) {
|
||||||
|
for (i = 0; i < (int)kst->seq.l; ++i) kst->seq.s[i] = seq_nt4_table[(int)kst->seq.s[i]];
|
||||||
|
r = ksw_align(ksq->seq.l, (uint8_t*)ksq->seq.s, kst->seq.l, (uint8_t*)kst->seq.s, 5, mat, gapo, gape, xtra, &q[0]);
|
||||||
|
if (r.score >= minsc)
|
||||||
|
printf("%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n", kst->name.s, r.tb, r.te+1, ksq->name.s, r.qb, r.qe+1, r.score, r.score2, r.te2);
|
||||||
|
if (rseq) {
|
||||||
|
r = ksw_align(ksq->seq.l, rseq, kst->seq.l, (uint8_t*)kst->seq.s, 5, mat, gapo, gape, xtra, &q[1]);
|
||||||
|
if (r.score >= minsc)
|
||||||
|
printf("%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n", kst->name.s, r.tb, r.te+1, ksq->name.s, (int)ksq->seq.l - r.qb, (int)ksq->seq.l - 1 - r.qe, r.score, r.score2, r.te2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(q[0]); free(q[1]);
|
||||||
|
}
|
||||||
|
free(rseq);
|
||||||
|
kseq_destroy(kst); gzclose(fpt);
|
||||||
|
kseq_destroy(ksq); gzclose(fpq);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,72 @@
|
||||||
|
#ifndef __AC_KSW_H
|
||||||
|
#define __AC_KSW_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#define KSW_XBYTE 0x10000
|
||||||
|
#define KSW_XSTOP 0x20000
|
||||||
|
#define KSW_XSUBO 0x40000
|
||||||
|
#define KSW_XSTART 0x80000
|
||||||
|
|
||||||
|
struct _kswq_t;
|
||||||
|
typedef struct _kswq_t kswq_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int score; // best score
|
||||||
|
int te, qe; // target end and query end
|
||||||
|
int score2, te2; // second best score and ending position on the target
|
||||||
|
int tb, qb; // target start and query start
|
||||||
|
} kswr_t;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Aligning two sequences
|
||||||
|
*
|
||||||
|
* @param qlen length of the query sequence (typically <tlen)
|
||||||
|
* @param query query sequence with 0 <= query[i] < m
|
||||||
|
* @param tlen length of the target sequence
|
||||||
|
* @param target target sequence
|
||||||
|
* @param m number of residue types
|
||||||
|
* @param mat m*m scoring matrix in one-dimention array
|
||||||
|
* @param gapo gap open penalty; a gap of length l cost "-(gapo+l*gape)"
|
||||||
|
* @param gape gap extension penalty
|
||||||
|
* @param xtra extra information (see below)
|
||||||
|
* @param qry query profile (see below)
|
||||||
|
*
|
||||||
|
* @return alignment information in a struct; unset values to -1
|
||||||
|
*
|
||||||
|
* When xtra==0, ksw_align() uses a signed two-byte integer to store a
|
||||||
|
* score and only finds the best score and the end positions. The 2nd best
|
||||||
|
* score or the start positions are not attempted. The default behavior can
|
||||||
|
* be tuned by setting KSW_X* flags:
|
||||||
|
*
|
||||||
|
* KSW_XBYTE: use an unsigned byte to store a score. If overflow occurs,
|
||||||
|
* kswr_t::score will be set to 255
|
||||||
|
*
|
||||||
|
* KSW_XSUBO: track the 2nd best score and the ending position on the
|
||||||
|
* target if the 2nd best is higher than (xtra&0xffff)
|
||||||
|
*
|
||||||
|
* KSW_XSTOP: stop if the maximum score is above (xtra&0xffff)
|
||||||
|
*
|
||||||
|
* KSW_XSTART: find the start positions
|
||||||
|
*
|
||||||
|
* When *qry==NULL, ksw_align() will compute and allocate the query profile
|
||||||
|
* and when the function returns, *qry will point to the profile, which can
|
||||||
|
* be deallocated simply by free(). If one query is aligned against multiple
|
||||||
|
* target sequences, *qry should be set to NULL during the first call and
|
||||||
|
* freed after the last call. Note that qry can equal 0. In this case, the
|
||||||
|
* query profile will be deallocated in ksw_align().
|
||||||
|
*/
|
||||||
|
kswr_t ksw_align(int qlen, uint8_t *query, int tlen, uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int xtra, kswq_t **qry);
|
||||||
|
|
||||||
|
int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int h0, int *_qle, int *_tle);
|
||||||
|
int ksw_global(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int *_n_cigar, uint32_t **_cigar);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,256 @@
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <limits.h>
|
||||||
|
|
||||||
|
/************
|
||||||
|
* kt_for() *
|
||||||
|
************/
|
||||||
|
|
||||||
|
struct kt_for_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
struct kt_for_t *t;
|
||||||
|
long i;
|
||||||
|
} ktf_worker_t;
|
||||||
|
|
||||||
|
typedef struct kt_for_t {
|
||||||
|
int n_threads;
|
||||||
|
long n;
|
||||||
|
ktf_worker_t *w;
|
||||||
|
void (*func)(void*,long,int);
|
||||||
|
void *data;
|
||||||
|
} kt_for_t;
|
||||||
|
|
||||||
|
static inline long steal_work(kt_for_t *t)
|
||||||
|
{
|
||||||
|
int i, min_i = -1;
|
||||||
|
long k, min = LONG_MAX;
|
||||||
|
for (i = 0; i < t->n_threads; ++i)
|
||||||
|
if (min > t->w[i].i) min = t->w[i].i, min_i = i;
|
||||||
|
k = __sync_fetch_and_add(&t->w[min_i].i, t->n_threads);
|
||||||
|
return k >= t->n? -1 : k;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *ktf_worker(void *data)
|
||||||
|
{
|
||||||
|
ktf_worker_t *w = (ktf_worker_t*)data;
|
||||||
|
long i;
|
||||||
|
for (;;) {
|
||||||
|
i = __sync_fetch_and_add(&w->i, w->t->n_threads);
|
||||||
|
if (i >= w->t->n) break;
|
||||||
|
w->t->func(w->t->data, i, w - w->t->w);
|
||||||
|
}
|
||||||
|
while ((i = steal_work(w->t)) >= 0)
|
||||||
|
w->t->func(w->t->data, i, w - w->t->w);
|
||||||
|
pthread_exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void kt_for(int n_threads, void (*func)(void*,long,int), void *data, long n)
|
||||||
|
{
|
||||||
|
if (n_threads > 1) {
|
||||||
|
int i;
|
||||||
|
kt_for_t t;
|
||||||
|
pthread_t *tid;
|
||||||
|
t.func = func, t.data = data, t.n_threads = n_threads, t.n = n;
|
||||||
|
t.w = (ktf_worker_t*)alloca(n_threads * sizeof(ktf_worker_t));
|
||||||
|
tid = (pthread_t*)alloca(n_threads * sizeof(pthread_t));
|
||||||
|
for (i = 0; i < n_threads; ++i)
|
||||||
|
t.w[i].t = &t, t.w[i].i = i;
|
||||||
|
for (i = 0; i < n_threads; ++i) pthread_create(&tid[i], 0, ktf_worker, &t.w[i]);
|
||||||
|
for (i = 0; i < n_threads; ++i) pthread_join(tid[i], 0);
|
||||||
|
} else {
|
||||||
|
long j;
|
||||||
|
for (j = 0; j < n; ++j) func(data, j, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/***************************
|
||||||
|
* kt_for with thread pool *
|
||||||
|
***************************/
|
||||||
|
|
||||||
|
struct kt_forpool_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
struct kt_forpool_t *t;
|
||||||
|
long i;
|
||||||
|
int action;
|
||||||
|
} kto_worker_t;
|
||||||
|
|
||||||
|
typedef struct kt_forpool_t {
|
||||||
|
int n_threads, n_pending;
|
||||||
|
long n;
|
||||||
|
pthread_t *tid;
|
||||||
|
kto_worker_t *w;
|
||||||
|
void (*func)(void*,long,int);
|
||||||
|
void *data;
|
||||||
|
pthread_mutex_t mutex;
|
||||||
|
pthread_cond_t cv_m, cv_s;
|
||||||
|
} kt_forpool_t;
|
||||||
|
|
||||||
|
static inline long kt_fp_steal_work(kt_forpool_t *t)
|
||||||
|
{
|
||||||
|
int i, min_i = -1;
|
||||||
|
long k, min = LONG_MAX;
|
||||||
|
for (i = 0; i < t->n_threads; ++i)
|
||||||
|
if (min > t->w[i].i) min = t->w[i].i, min_i = i;
|
||||||
|
k = __sync_fetch_and_add(&t->w[min_i].i, t->n_threads);
|
||||||
|
return k >= t->n? -1 : k;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *kt_fp_worker(void *data)
|
||||||
|
{
|
||||||
|
kto_worker_t *w = (kto_worker_t*)data;
|
||||||
|
kt_forpool_t *fp = w->t;
|
||||||
|
for (;;) {
|
||||||
|
long i;
|
||||||
|
int action;
|
||||||
|
pthread_mutex_lock(&fp->mutex);
|
||||||
|
if (--fp->n_pending == 0)
|
||||||
|
pthread_cond_signal(&fp->cv_m);
|
||||||
|
w->action = 0;
|
||||||
|
while (w->action == 0) pthread_cond_wait(&fp->cv_s, &fp->mutex);
|
||||||
|
action = w->action;
|
||||||
|
pthread_mutex_unlock(&fp->mutex);
|
||||||
|
if (action < 0) break;
|
||||||
|
for (;;) { // process jobs allocated to this worker
|
||||||
|
i = __sync_fetch_and_add(&w->i, fp->n_threads);
|
||||||
|
if (i >= fp->n) break;
|
||||||
|
fp->func(fp->data, i, w - fp->w);
|
||||||
|
}
|
||||||
|
while ((i = kt_fp_steal_work(fp)) >= 0) // steal jobs allocated to other workers
|
||||||
|
fp->func(fp->data, i, w - fp->w);
|
||||||
|
}
|
||||||
|
pthread_exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void *kt_forpool_init(int n_threads)
|
||||||
|
{
|
||||||
|
kt_forpool_t *fp;
|
||||||
|
int i;
|
||||||
|
fp = (kt_forpool_t*)calloc(1, sizeof(kt_forpool_t));
|
||||||
|
fp->n_threads = fp->n_pending = n_threads;
|
||||||
|
fp->tid = (pthread_t*)calloc(fp->n_threads, sizeof(pthread_t));
|
||||||
|
fp->w = (kto_worker_t*)calloc(fp->n_threads, sizeof(kto_worker_t));
|
||||||
|
for (i = 0; i < fp->n_threads; ++i) fp->w[i].t = fp;
|
||||||
|
pthread_mutex_init(&fp->mutex, 0);
|
||||||
|
pthread_cond_init(&fp->cv_m, 0);
|
||||||
|
pthread_cond_init(&fp->cv_s, 0);
|
||||||
|
for (i = 0; i < fp->n_threads; ++i) pthread_create(&fp->tid[i], 0, kt_fp_worker, &fp->w[i]);
|
||||||
|
pthread_mutex_lock(&fp->mutex);
|
||||||
|
while (fp->n_pending) pthread_cond_wait(&fp->cv_m, &fp->mutex);
|
||||||
|
pthread_mutex_unlock(&fp->mutex);
|
||||||
|
return fp;
|
||||||
|
}
|
||||||
|
|
||||||
|
void kt_forpool_destroy(void *_fp)
|
||||||
|
{
|
||||||
|
kt_forpool_t *fp = (kt_forpool_t*)_fp;
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < fp->n_threads; ++i) fp->w[i].action = -1;
|
||||||
|
pthread_cond_broadcast(&fp->cv_s);
|
||||||
|
for (i = 0; i < fp->n_threads; ++i) pthread_join(fp->tid[i], 0);
|
||||||
|
pthread_cond_destroy(&fp->cv_s);
|
||||||
|
pthread_cond_destroy(&fp->cv_m);
|
||||||
|
pthread_mutex_destroy(&fp->mutex);
|
||||||
|
free(fp->w); free(fp->tid); free(fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
void kt_forpool(void *_fp, void (*func)(void*,long,int), void *data, long n)
|
||||||
|
{
|
||||||
|
kt_forpool_t *fp = (kt_forpool_t*)_fp;
|
||||||
|
long i;
|
||||||
|
if (fp && fp->n_threads > 1) {
|
||||||
|
fp->n = n, fp->func = func, fp->data = data, fp->n_pending = fp->n_threads;
|
||||||
|
for (i = 0; i < fp->n_threads; ++i) fp->w[i].i = i, fp->w[i].action = 1;
|
||||||
|
pthread_mutex_lock(&fp->mutex);
|
||||||
|
pthread_cond_broadcast(&fp->cv_s);
|
||||||
|
while (fp->n_pending) pthread_cond_wait(&fp->cv_m, &fp->mutex);
|
||||||
|
pthread_mutex_unlock(&fp->mutex);
|
||||||
|
} else for (i = 0; i < n; ++i) func(data, i, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*****************
|
||||||
|
* kt_pipeline() *
|
||||||
|
*****************/
|
||||||
|
|
||||||
|
struct ktp_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
struct ktp_t *pl;
|
||||||
|
int64_t index;
|
||||||
|
int step;
|
||||||
|
void *data;
|
||||||
|
} ktp_worker_t;
|
||||||
|
|
||||||
|
typedef struct ktp_t {
|
||||||
|
void *shared;
|
||||||
|
void *(*func)(void*, int, void*);
|
||||||
|
int64_t index;
|
||||||
|
int n_workers, n_steps;
|
||||||
|
ktp_worker_t *workers;
|
||||||
|
pthread_mutex_t mutex;
|
||||||
|
pthread_cond_t cv;
|
||||||
|
} ktp_t;
|
||||||
|
|
||||||
|
static void *ktp_worker(void *data)
|
||||||
|
{
|
||||||
|
ktp_worker_t *w = (ktp_worker_t*)data;
|
||||||
|
ktp_t *p = w->pl;
|
||||||
|
while (w->step < p->n_steps) {
|
||||||
|
// test whether we can kick off the job with this worker
|
||||||
|
pthread_mutex_lock(&p->mutex);
|
||||||
|
for (;;) {
|
||||||
|
int i;
|
||||||
|
// test whether another worker is doing the same step
|
||||||
|
for (i = 0; i < p->n_workers; ++i) {
|
||||||
|
if (w == &p->workers[i]) continue; // ignore itself
|
||||||
|
if (p->workers[i].step <= w->step && p->workers[i].index < w->index)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (i == p->n_workers) break; // no workers with smaller indices are doing w->step or the previous steps
|
||||||
|
pthread_cond_wait(&p->cv, &p->mutex);
|
||||||
|
}
|
||||||
|
pthread_mutex_unlock(&p->mutex);
|
||||||
|
|
||||||
|
// working on w->step
|
||||||
|
w->data = p->func(p->shared, w->step, w->step? w->data : 0); // for the first step, input is NULL
|
||||||
|
|
||||||
|
// update step and let other workers know
|
||||||
|
pthread_mutex_lock(&p->mutex);
|
||||||
|
w->step = w->step == p->n_steps - 1 || w->data? (w->step + 1) % p->n_steps : p->n_steps;
|
||||||
|
if (w->step == 0) w->index = p->index++;
|
||||||
|
pthread_cond_broadcast(&p->cv);
|
||||||
|
pthread_mutex_unlock(&p->mutex);
|
||||||
|
}
|
||||||
|
pthread_exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void kt_pipeline(int n_threads, void *(*func)(void*, int, void*), void *shared_data, int n_steps)
|
||||||
|
{
|
||||||
|
ktp_t aux;
|
||||||
|
pthread_t *tid;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (n_threads < 1) n_threads = 1;
|
||||||
|
aux.n_workers = n_threads;
|
||||||
|
aux.n_steps = n_steps;
|
||||||
|
aux.func = func;
|
||||||
|
aux.shared = shared_data;
|
||||||
|
aux.index = 0;
|
||||||
|
pthread_mutex_init(&aux.mutex, 0);
|
||||||
|
pthread_cond_init(&aux.cv, 0);
|
||||||
|
|
||||||
|
aux.workers = (ktp_worker_t*)alloca(n_threads * sizeof(ktp_worker_t));
|
||||||
|
for (i = 0; i < n_threads; ++i) {
|
||||||
|
ktp_worker_t *w = &aux.workers[i];
|
||||||
|
w->step = 0; w->pl = &aux; w->data = 0;
|
||||||
|
w->index = aux.index++;
|
||||||
|
}
|
||||||
|
|
||||||
|
tid = (pthread_t*)alloca(n_threads * sizeof(pthread_t));
|
||||||
|
for (i = 0; i < n_threads; ++i) pthread_create(&tid[i], 0, ktp_worker, &aux.workers[i]);
|
||||||
|
for (i = 0; i < n_threads; ++i) pthread_join(tid[i], 0);
|
||||||
|
|
||||||
|
pthread_mutex_destroy(&aux.mutex);
|
||||||
|
pthread_cond_destroy(&aux.cv);
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,19 @@
|
||||||
|
#ifndef KTHREAD_H
|
||||||
|
#define KTHREAD_H
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void kt_for(int n_threads, void (*func)(void*,long,int), void *data, long n);
|
||||||
|
void kt_pipeline(int n_threads, void *(*func)(void*, int, void*), void *shared_data, int n_steps);
|
||||||
|
|
||||||
|
void *kt_forpool_init(int n_threads);
|
||||||
|
void kt_forpool_destroy(void *_fp);
|
||||||
|
void kt_forpool(void *_fp, void (*func)(void*,long,int), void *data, long n);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,583 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <curl/curl.h>
|
||||||
|
#include "kurl.h"
|
||||||
|
|
||||||
|
/**********************
|
||||||
|
*** Core kurl APIs ***
|
||||||
|
**********************/
|
||||||
|
|
||||||
|
#define KU_DEF_BUFLEN 0x8000
|
||||||
|
#define KU_MAX_SKIP (KU_DEF_BUFLEN<<1) // if seek step is smaller than this, skip
|
||||||
|
|
||||||
|
#define kurl_isfile(u) ((u)->fd >= 0)
|
||||||
|
|
||||||
|
#ifndef kroundup32
|
||||||
|
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct kurl_t {
|
||||||
|
CURLM *multi; // cURL multi handler
|
||||||
|
CURL *curl; // cURL easy handle
|
||||||
|
uint8_t *buf; // buffer
|
||||||
|
off_t off0; // offset of the first byte in the buffer; the actual file offset equals off0 + p_buf
|
||||||
|
int fd; // file descriptor for a normal file; <0 for a remote file
|
||||||
|
int m_buf; // max buffer size; for a remote file, CURL_MAX_WRITE_SIZE*2 is recommended
|
||||||
|
int l_buf; // length of the buffer; l_buf == 0 iff the input read entirely; l_buf <= m_buf
|
||||||
|
int p_buf; // file position in the buffer; p_buf <= l_buf
|
||||||
|
int done_reading; // true if we can read nothing from the file; buffer may not be empty even if done_reading is set
|
||||||
|
int err; // error code
|
||||||
|
struct curl_slist *hdr;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
char *url, *date, *auth;
|
||||||
|
} s3aux_t;
|
||||||
|
|
||||||
|
int kurl_init(void) // required for SSL and win32 socket; NOT thread safe
|
||||||
|
{
|
||||||
|
return curl_global_init(CURL_GLOBAL_DEFAULT);
|
||||||
|
}
|
||||||
|
|
||||||
|
void kurl_destroy(void)
|
||||||
|
{
|
||||||
|
curl_global_cleanup();
|
||||||
|
}
|
||||||
|
|
||||||
|
static int prepare(kurl_t *ku, int do_seek)
|
||||||
|
{
|
||||||
|
if (kurl_isfile(ku)) {
|
||||||
|
if (do_seek && lseek(ku->fd, ku->off0, SEEK_SET) != ku->off0)
|
||||||
|
return -1;
|
||||||
|
} else { // FIXME: for S3, we need to re-authorize
|
||||||
|
int rc;
|
||||||
|
rc = curl_multi_remove_handle(ku->multi, ku->curl);
|
||||||
|
rc = curl_easy_setopt(ku->curl, CURLOPT_RESUME_FROM, ku->off0);
|
||||||
|
rc = curl_multi_add_handle(ku->multi, ku->curl);
|
||||||
|
}
|
||||||
|
ku->p_buf = ku->l_buf = 0; // empty the buffer
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t write_cb(char *ptr, size_t size, size_t nmemb, void *data) // callback required by cURL
|
||||||
|
{
|
||||||
|
kurl_t *ku = (kurl_t*)data;
|
||||||
|
ssize_t nbytes = size * nmemb;
|
||||||
|
if (nbytes + ku->l_buf > ku->m_buf)
|
||||||
|
return CURL_WRITEFUNC_PAUSE;
|
||||||
|
memcpy(ku->buf + ku->l_buf, ptr, nbytes);
|
||||||
|
ku->l_buf += nbytes;
|
||||||
|
return nbytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int fill_buffer(kurl_t *ku) // fill the buffer
|
||||||
|
{
|
||||||
|
assert(ku->p_buf == ku->l_buf); // buffer is always used up when fill_buffer() is called; otherwise a bug
|
||||||
|
ku->off0 += ku->l_buf;
|
||||||
|
ku->p_buf = ku->l_buf = 0;
|
||||||
|
if (ku->done_reading) return 0;
|
||||||
|
if (kurl_isfile(ku)) {
|
||||||
|
// The following block is equivalent to "ku->l_buf = read(ku->fd, ku->buf, ku->m_buf)" on Mac.
|
||||||
|
// On Linux, the man page does not specify whether read() guarantees to read ku->m_buf bytes
|
||||||
|
// even if ->fd references a normal file with sufficient remaining bytes.
|
||||||
|
while (ku->l_buf < ku->m_buf) {
|
||||||
|
int l;
|
||||||
|
l = read(ku->fd, ku->buf + ku->l_buf, ku->m_buf - ku->l_buf);
|
||||||
|
if (l == 0) break;
|
||||||
|
ku->l_buf += l;
|
||||||
|
}
|
||||||
|
if (ku->l_buf < ku->m_buf) ku->done_reading = 1;
|
||||||
|
} else {
|
||||||
|
int n_running, rc;
|
||||||
|
fd_set fdr, fdw, fde;
|
||||||
|
do {
|
||||||
|
int maxfd = -1;
|
||||||
|
long curl_to = -1;
|
||||||
|
struct timeval to;
|
||||||
|
// the following is adaped from docs/examples/fopen.c
|
||||||
|
to.tv_sec = 10, to.tv_usec = 0; // 10 seconds
|
||||||
|
curl_multi_timeout(ku->multi, &curl_to);
|
||||||
|
if (curl_to >= 0) {
|
||||||
|
to.tv_sec = curl_to / 1000;
|
||||||
|
if (to.tv_sec > 1) to.tv_sec = 1;
|
||||||
|
else to.tv_usec = (curl_to % 1000) * 1000;
|
||||||
|
}
|
||||||
|
FD_ZERO(&fdr); FD_ZERO(&fdw); FD_ZERO(&fde);
|
||||||
|
curl_multi_fdset(ku->multi, &fdr, &fdw, &fde, &maxfd); // FIXME: check return code
|
||||||
|
if (maxfd >= 0 && (rc = select(maxfd+1, &fdr, &fdw, &fde, &to)) < 0) break;
|
||||||
|
if (maxfd < 0) { // check curl_multi_fdset.3 about why we wait for 100ms here
|
||||||
|
struct timespec req, rem;
|
||||||
|
req.tv_sec = 0; req.tv_nsec = 100000000; // this is 100ms
|
||||||
|
nanosleep(&req, &rem);
|
||||||
|
}
|
||||||
|
curl_easy_pause(ku->curl, CURLPAUSE_CONT);
|
||||||
|
rc = curl_multi_perform(ku->multi, &n_running); // FIXME: check return code
|
||||||
|
} while (n_running && ku->l_buf < ku->m_buf - CURL_MAX_WRITE_SIZE);
|
||||||
|
if (ku->l_buf < ku->m_buf - CURL_MAX_WRITE_SIZE) ku->done_reading = 1;
|
||||||
|
}
|
||||||
|
return ku->l_buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
int kurl_close(kurl_t *ku)
|
||||||
|
{
|
||||||
|
if (ku == 0) return 0;
|
||||||
|
if (ku->fd < 0) {
|
||||||
|
curl_multi_remove_handle(ku->multi, ku->curl);
|
||||||
|
curl_easy_cleanup(ku->curl);
|
||||||
|
curl_multi_cleanup(ku->multi);
|
||||||
|
if (ku->hdr) curl_slist_free_all(ku->hdr);
|
||||||
|
} else close(ku->fd);
|
||||||
|
free(ku->buf);
|
||||||
|
free(ku);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
kurl_t *kurl_open(const char *url, kurl_opt_t *opt)
|
||||||
|
{
|
||||||
|
extern s3aux_t s3_parse(const char *url, const char *_id, const char *_secret, const char *fn);
|
||||||
|
const char *p, *q;
|
||||||
|
kurl_t *ku;
|
||||||
|
int fd = -1, is_file = 1, failed = 0;
|
||||||
|
|
||||||
|
p = strstr(url, "://");
|
||||||
|
if (p && *p) {
|
||||||
|
for (q = url; q != p; ++q)
|
||||||
|
if (!isalnum(*q)) break;
|
||||||
|
if (q == p) is_file = 0;
|
||||||
|
}
|
||||||
|
if (is_file && (fd = open(url, O_RDONLY)) < 0) return 0;
|
||||||
|
|
||||||
|
ku = (kurl_t*)calloc(1, sizeof(kurl_t));
|
||||||
|
ku->fd = is_file? fd : -1;
|
||||||
|
if (!kurl_isfile(ku)) {
|
||||||
|
ku->multi = curl_multi_init();
|
||||||
|
ku->curl = curl_easy_init();
|
||||||
|
if (strstr(url, "s3://") == url) {
|
||||||
|
s3aux_t a;
|
||||||
|
a = s3_parse(url, (opt? opt->s3keyid : 0), (opt? opt->s3secretkey : 0), (opt? opt->s3key_fn : 0));
|
||||||
|
if (a.url == 0 || a.date == 0 || a.auth == 0) {
|
||||||
|
kurl_close(ku);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
ku->hdr = curl_slist_append(ku->hdr, a.date);
|
||||||
|
ku->hdr = curl_slist_append(ku->hdr, a.auth);
|
||||||
|
curl_easy_setopt(ku->curl, CURLOPT_URL, a.url);
|
||||||
|
curl_easy_setopt(ku->curl, CURLOPT_HTTPHEADER, ku->hdr);
|
||||||
|
free(a.date); free(a.auth); free(a.url);
|
||||||
|
} else curl_easy_setopt(ku->curl, CURLOPT_URL, url);
|
||||||
|
curl_easy_setopt(ku->curl, CURLOPT_WRITEDATA, ku);
|
||||||
|
curl_easy_setopt(ku->curl, CURLOPT_VERBOSE, 0L);
|
||||||
|
curl_easy_setopt(ku->curl, CURLOPT_NOSIGNAL, 1L);
|
||||||
|
curl_easy_setopt(ku->curl, CURLOPT_WRITEFUNCTION, write_cb);
|
||||||
|
curl_easy_setopt(ku->curl, CURLOPT_SSL_VERIFYPEER, 0L);
|
||||||
|
curl_easy_setopt(ku->curl, CURLOPT_SSL_VERIFYHOST, 0L);
|
||||||
|
curl_easy_setopt(ku->curl, CURLOPT_FOLLOWLOCATION, 1L);
|
||||||
|
}
|
||||||
|
ku->m_buf = KU_DEF_BUFLEN;
|
||||||
|
if (!kurl_isfile(ku) && ku->m_buf < CURL_MAX_WRITE_SIZE * 2)
|
||||||
|
ku->m_buf = CURL_MAX_WRITE_SIZE * 2; // for remote files, the buffer set to 2*CURL_MAX_WRITE_SIZE
|
||||||
|
ku->buf = (uint8_t*)calloc(ku->m_buf, 1);
|
||||||
|
if (kurl_isfile(ku)) failed = (fill_buffer(ku) <= 0);
|
||||||
|
else failed = (prepare(ku, 0) < 0 || fill_buffer(ku) <= 0);
|
||||||
|
if (failed) {
|
||||||
|
kurl_close(ku);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return ku;
|
||||||
|
}
|
||||||
|
|
||||||
|
kurl_t *kurl_dopen(int fd)
|
||||||
|
{
|
||||||
|
kurl_t *ku;
|
||||||
|
ku = (kurl_t*)calloc(1, sizeof(kurl_t));
|
||||||
|
ku->fd = fd;
|
||||||
|
ku->m_buf = KU_DEF_BUFLEN;
|
||||||
|
ku->buf = (uint8_t*)calloc(ku->m_buf, 1);
|
||||||
|
if (prepare(ku, 0) < 0 || fill_buffer(ku) <= 0) {
|
||||||
|
kurl_close(ku);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return ku;
|
||||||
|
}
|
||||||
|
|
||||||
|
int kurl_buflen(kurl_t *ku, int len)
|
||||||
|
{
|
||||||
|
if (len <= 0 || len < ku->l_buf) return ku->m_buf;
|
||||||
|
if (!kurl_isfile(ku) && len < CURL_MAX_WRITE_SIZE * 2) return ku->m_buf;
|
||||||
|
ku->m_buf = len;
|
||||||
|
kroundup32(ku->m_buf);
|
||||||
|
ku->buf = (uint8_t*)realloc(ku->buf, ku->m_buf);
|
||||||
|
return ku->m_buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
ssize_t kurl_read(kurl_t *ku, void *buf, size_t nbytes)
|
||||||
|
{
|
||||||
|
ssize_t rest = nbytes;
|
||||||
|
if (ku->l_buf == 0) return 0; // end-of-file
|
||||||
|
while (rest) {
|
||||||
|
if (ku->l_buf - ku->p_buf >= rest) {
|
||||||
|
if (buf) memcpy((uint8_t*)buf + (nbytes - rest), ku->buf + ku->p_buf, rest);
|
||||||
|
ku->p_buf += rest;
|
||||||
|
rest = 0;
|
||||||
|
} else {
|
||||||
|
int ret;
|
||||||
|
if (buf && ku->l_buf > ku->p_buf)
|
||||||
|
memcpy((uint8_t*)buf + (nbytes - rest), ku->buf + ku->p_buf, ku->l_buf - ku->p_buf);
|
||||||
|
rest -= ku->l_buf - ku->p_buf;
|
||||||
|
ku->p_buf = ku->l_buf;
|
||||||
|
ret = fill_buffer(ku);
|
||||||
|
if (ret <= 0) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nbytes - rest;
|
||||||
|
}
|
||||||
|
|
||||||
|
off_t kurl_seek(kurl_t *ku, off_t offset, int whence) // FIXME: sometimes when seek() fails, read() will fail as well.
|
||||||
|
{
|
||||||
|
off_t new_off = -1, cur_off;
|
||||||
|
int failed = 0, seek_end = 0;
|
||||||
|
if (ku == 0) return -1;
|
||||||
|
cur_off = ku->off0 + ku->p_buf;
|
||||||
|
if (whence == SEEK_SET) new_off = offset;
|
||||||
|
else if (whence == SEEK_CUR) new_off += cur_off + offset;
|
||||||
|
else if (whence == SEEK_END && kurl_isfile(ku)) new_off = lseek(ku->fd, offset, SEEK_END), seek_end = 1;
|
||||||
|
else { // not supported whence
|
||||||
|
ku->err = KURL_INV_WHENCE;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (new_off < 0) { // negtive absolute offset
|
||||||
|
ku->err = KURL_SEEK_OUT;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (!seek_end && new_off >= cur_off && new_off - cur_off + ku->p_buf < ku->l_buf) {
|
||||||
|
ku->p_buf += new_off - cur_off;
|
||||||
|
return ku->off0 + ku->p_buf;
|
||||||
|
}
|
||||||
|
if (seek_end || new_off < cur_off || new_off - cur_off > KU_MAX_SKIP) { // if jump is large, do actual seek
|
||||||
|
ku->off0 = new_off;
|
||||||
|
ku->done_reading = 0;
|
||||||
|
if (prepare(ku, 1) < 0 || fill_buffer(ku) <= 0) failed = 1;
|
||||||
|
} else { // if jump is small, read through
|
||||||
|
off_t r;
|
||||||
|
r = kurl_read(ku, 0, new_off - cur_off);
|
||||||
|
if (r + cur_off != new_off) failed = 1; // out of range
|
||||||
|
}
|
||||||
|
if (failed) ku->err = KURL_SEEK_OUT, ku->l_buf = ku->p_buf = 0, new_off = -1;
|
||||||
|
return new_off;
|
||||||
|
}
|
||||||
|
|
||||||
|
off_t kurl_tell(const kurl_t *ku)
|
||||||
|
{
|
||||||
|
if (ku == 0) return -1;
|
||||||
|
return ku->off0 + ku->p_buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
int kurl_eof(const kurl_t *ku)
|
||||||
|
{
|
||||||
|
if (ku == 0) return 1;
|
||||||
|
return (ku->l_buf == 0); // unless file end, buffer should never be empty
|
||||||
|
}
|
||||||
|
|
||||||
|
int kurl_fileno(const kurl_t *ku)
|
||||||
|
{
|
||||||
|
if (ku == 0) return -1;
|
||||||
|
return ku->fd;
|
||||||
|
}
|
||||||
|
|
||||||
|
int kurl_error(const kurl_t *ku)
|
||||||
|
{
|
||||||
|
if (ku == 0) return KURL_NULL;
|
||||||
|
return ku->err;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*****************
|
||||||
|
*** HMAC-SHA1 ***
|
||||||
|
*****************/
|
||||||
|
|
||||||
|
/* This code is public-domain - it is based on libcrypt placed in the public domain by Wei Dai and other contributors. */
|
||||||
|
|
||||||
|
#define HASH_LENGTH 20
|
||||||
|
#define BLOCK_LENGTH 64
|
||||||
|
|
||||||
|
typedef struct sha1nfo {
|
||||||
|
union { uint8_t b[BLOCK_LENGTH]; uint32_t w[BLOCK_LENGTH/4]; } buf;
|
||||||
|
uint8_t bufOffset;
|
||||||
|
union { uint8_t b[HASH_LENGTH]; uint32_t w[HASH_LENGTH/4]; } state;
|
||||||
|
uint32_t byteCount;
|
||||||
|
uint8_t keyBuffer[BLOCK_LENGTH];
|
||||||
|
uint8_t innerHash[HASH_LENGTH];
|
||||||
|
} sha1nfo;
|
||||||
|
|
||||||
|
void sha1_init(sha1nfo *s)
|
||||||
|
{
|
||||||
|
const uint8_t table[] = { 0x01,0x23,0x45,0x67, 0x89,0xab,0xcd,0xef, 0xfe,0xdc,0xba,0x98, 0x76,0x54,0x32,0x10, 0xf0,0xe1,0xd2,0xc3 };
|
||||||
|
memcpy(s->state.b, table, HASH_LENGTH);
|
||||||
|
s->byteCount = 0;
|
||||||
|
s->bufOffset = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define rol32(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits))))
|
||||||
|
|
||||||
|
static void sha1_hashBlock(sha1nfo *s)
|
||||||
|
{
|
||||||
|
uint32_t i, t, a = s->state.w[0], b = s->state.w[1], c = s->state.w[2], d = s->state.w[3], e = s->state.w[4];
|
||||||
|
for (i = 0; i < 80; i++) {
|
||||||
|
if (i >= 16) {
|
||||||
|
t = s->buf.w[(i+13)&15] ^ s->buf.w[(i+8)&15] ^ s->buf.w[(i+2)&15] ^ s->buf.w[i&15];
|
||||||
|
s->buf.w[i&15] = rol32(t, 1);
|
||||||
|
}
|
||||||
|
if (i < 20) t = 0x5a827999 + (d ^ (b & (c ^ d)));
|
||||||
|
else if (i < 40) t = 0x6ed9eba1 + (b ^ c ^ d);
|
||||||
|
else if (i < 60) t = 0x8f1bbcdc + ((b & c) | (d & (b | c)));
|
||||||
|
else t = 0xca62c1d6 + (b ^ c ^ d);
|
||||||
|
t += rol32(a, 5) + e + s->buf.w[i&15];
|
||||||
|
e = d; d = c; c = rol32(b, 30); b = a; a = t;
|
||||||
|
}
|
||||||
|
s->state.w[0] += a; s->state.w[1] += b; s->state.w[2] += c; s->state.w[3] += d; s->state.w[4] += e;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void sha1_add(sha1nfo *s, uint8_t data)
|
||||||
|
{
|
||||||
|
s->buf.b[s->bufOffset ^ 3] = data;
|
||||||
|
if (++s->bufOffset == BLOCK_LENGTH) {
|
||||||
|
sha1_hashBlock(s);
|
||||||
|
s->bufOffset = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void sha1_write1(sha1nfo *s, uint8_t data)
|
||||||
|
{
|
||||||
|
++s->byteCount;
|
||||||
|
sha1_add(s, data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void sha1_write(sha1nfo *s, const char *data, size_t len)
|
||||||
|
{
|
||||||
|
while (len--) sha1_write1(s, (uint8_t)*data++);
|
||||||
|
}
|
||||||
|
|
||||||
|
const uint8_t *sha1_final(sha1nfo *s)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
sha1_add(s, 0x80);
|
||||||
|
while (s->bufOffset != 56) sha1_add(s, 0);
|
||||||
|
sha1_add(s, 0);
|
||||||
|
sha1_add(s, 0);
|
||||||
|
sha1_add(s, 0);
|
||||||
|
sha1_add(s, s->byteCount >> 29);
|
||||||
|
sha1_add(s, s->byteCount >> 21);
|
||||||
|
sha1_add(s, s->byteCount >> 13);
|
||||||
|
sha1_add(s, s->byteCount >> 5);
|
||||||
|
sha1_add(s, s->byteCount << 3);
|
||||||
|
for (i = 0; i < 5; ++i) {
|
||||||
|
uint32_t a = s->state.w[i];
|
||||||
|
s->state.w[i] = a<<24 | (a<<8&0x00ff0000) | (a>>8&0x0000ff00) | a>>24;
|
||||||
|
}
|
||||||
|
return s->state.b;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define HMAC_IPAD 0x36
|
||||||
|
#define HMAC_OPAD 0x5c
|
||||||
|
|
||||||
|
void sha1_init_hmac(sha1nfo *s, const uint8_t* key, int l_key)
|
||||||
|
{
|
||||||
|
uint8_t i;
|
||||||
|
memset(s->keyBuffer, 0, BLOCK_LENGTH);
|
||||||
|
if (l_key > BLOCK_LENGTH) {
|
||||||
|
sha1_init(s);
|
||||||
|
while (l_key--) sha1_write1(s, *key++);
|
||||||
|
memcpy(s->keyBuffer, sha1_final(s), HASH_LENGTH);
|
||||||
|
} else memcpy(s->keyBuffer, key, l_key);
|
||||||
|
sha1_init(s);
|
||||||
|
for (i = 0; i < BLOCK_LENGTH; ++i)
|
||||||
|
sha1_write1(s, s->keyBuffer[i] ^ HMAC_IPAD);
|
||||||
|
}
|
||||||
|
|
||||||
|
const uint8_t *sha1_final_hmac(sha1nfo *s)
|
||||||
|
{
|
||||||
|
uint8_t i;
|
||||||
|
memcpy(s->innerHash, sha1_final(s), HASH_LENGTH);
|
||||||
|
sha1_init(s);
|
||||||
|
for (i = 0; i < BLOCK_LENGTH; ++i) sha1_write1(s, s->keyBuffer[i] ^ HMAC_OPAD);
|
||||||
|
for (i = 0; i < HASH_LENGTH; ++i) sha1_write1(s, s->innerHash[i]);
|
||||||
|
return sha1_final(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*******************
|
||||||
|
*** S3 protocol ***
|
||||||
|
*******************/
|
||||||
|
|
||||||
|
#include <time.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
|
||||||
|
static void s3_sign(const char *key, const char *data, char out[29])
|
||||||
|
{
|
||||||
|
const char *b64tab = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
||||||
|
const uint8_t *digest;
|
||||||
|
int i, j, rest;
|
||||||
|
sha1nfo s;
|
||||||
|
sha1_init_hmac(&s, (uint8_t*)key, strlen(key));
|
||||||
|
sha1_write(&s, data, strlen(data));
|
||||||
|
digest = sha1_final_hmac(&s);
|
||||||
|
for (j = i = 0, rest = 8; i < 20; ++j) { // base64 encoding
|
||||||
|
if (rest <= 6) {
|
||||||
|
int next = i < 19? digest[i+1] : 0;
|
||||||
|
out[j] = b64tab[(int)(digest[i] << (6-rest) & 0x3f) | next >> (rest+2)], ++i, rest += 2;
|
||||||
|
} else out[j] = b64tab[(int)digest[i] >> (rest-6) & 0x3f], rest -= 6;
|
||||||
|
}
|
||||||
|
out[j++] = '='; out[j] = 0; // SHA1 digest always has 160 bits, or 20 bytes. We need one '=' at the end.
|
||||||
|
}
|
||||||
|
|
||||||
|
static char *s3_read_awssecret(const char *fn)
|
||||||
|
{
|
||||||
|
char *p, *secret, buf[128], *path;
|
||||||
|
FILE *fp;
|
||||||
|
int l;
|
||||||
|
if (fn == 0) {
|
||||||
|
char *home;
|
||||||
|
home = getenv("HOME");
|
||||||
|
if (home == 0) return 0;
|
||||||
|
l = strlen(home) + 12;
|
||||||
|
path = (char*)malloc(strlen(home) + 12);
|
||||||
|
strcat(strcpy(path, home), "/.awssecret");
|
||||||
|
} else path = (char*)fn;
|
||||||
|
fp = fopen(path, "r");
|
||||||
|
if (path != fn) free(path);
|
||||||
|
if (fp == 0) return 0;
|
||||||
|
l = fread(buf, 1, 127, fp);
|
||||||
|
fclose(fp);
|
||||||
|
buf[l] = 0;
|
||||||
|
for (p = buf; *p != 0 && *p != '\n'; ++p);
|
||||||
|
if (*p == 0) return 0;
|
||||||
|
*p = 0; secret = p + 1;
|
||||||
|
for (++p; *p != 0 && *p != '\n'; ++p);
|
||||||
|
*p = 0;
|
||||||
|
l = p - buf + 1;
|
||||||
|
p = (char*)malloc(l);
|
||||||
|
memcpy(p, buf, l);
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct { int l, m; char *s; } kstring_t;
|
||||||
|
|
||||||
|
static inline int kputsn(const char *p, int l, kstring_t *s)
|
||||||
|
{
|
||||||
|
if (s->l + l + 1 >= s->m) {
|
||||||
|
s->m = s->l + l + 2;
|
||||||
|
kroundup32(s->m);
|
||||||
|
s->s = (char*)realloc(s->s, s->m);
|
||||||
|
}
|
||||||
|
memcpy(s->s + s->l, p, l);
|
||||||
|
s->l += l;
|
||||||
|
s->s[s->l] = 0;
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
|
||||||
|
s3aux_t s3_parse(const char *url, const char *_id, const char *_secret, const char *fn_secret)
|
||||||
|
{
|
||||||
|
const char *id, *secret, *bucket, *obj;
|
||||||
|
char *id_secret = 0, date[64], sig[29];
|
||||||
|
time_t t;
|
||||||
|
struct tm tmt;
|
||||||
|
s3aux_t a = {0,0};
|
||||||
|
kstring_t str = {0,0,0};
|
||||||
|
// parse URL
|
||||||
|
if (strstr(url, "s3://") != url) return a;
|
||||||
|
bucket = url + 5;
|
||||||
|
for (obj = bucket; *obj && *obj != '/'; ++obj);
|
||||||
|
if (*obj == 0) return a; // no object
|
||||||
|
// acquire AWS credential and time
|
||||||
|
if (_id == 0 || _secret == 0) {
|
||||||
|
id_secret = s3_read_awssecret(fn_secret);
|
||||||
|
if (id_secret == 0) return a; // fail to read the AWS credential
|
||||||
|
id = id_secret;
|
||||||
|
secret = id_secret + strlen(id) + 1;
|
||||||
|
} else id = _id, secret = _secret;
|
||||||
|
// compose URL for curl
|
||||||
|
kputsn("https://", 8, &str);
|
||||||
|
kputsn(bucket, obj - bucket, &str);
|
||||||
|
kputsn(".s3.amazonaws.com", 17, &str);
|
||||||
|
kputsn(obj, strlen(obj), &str);
|
||||||
|
a.url = str.s;
|
||||||
|
// compose the Date line
|
||||||
|
str.l = str.m = 0; str.s = 0;
|
||||||
|
t = time(0);
|
||||||
|
strftime(date, 64, "%a, %d %b %Y %H:%M:%S +0000", gmtime_r(&t, &tmt));
|
||||||
|
kputsn("Date: ", 6, &str);
|
||||||
|
kputsn(date, strlen(date), &str);
|
||||||
|
a.date = str.s;
|
||||||
|
// compose the string to sign and sign it
|
||||||
|
str.l = str.m = 0; str.s = 0;
|
||||||
|
kputsn("GET\n\n\n", 6, &str);
|
||||||
|
kputsn(date, strlen(date), &str);
|
||||||
|
kputsn("\n", 1, &str);
|
||||||
|
kputsn(bucket-1, strlen(bucket-1), &str);
|
||||||
|
s3_sign(secret, str.s, sig);
|
||||||
|
// compose the Authorization line
|
||||||
|
str.l = 0;
|
||||||
|
kputsn("Authorization: AWS ", 19, &str);
|
||||||
|
kputsn(id, strlen(id), &str);
|
||||||
|
kputsn(":", 1, &str);
|
||||||
|
kputsn(sig, strlen(sig), &str);
|
||||||
|
a.auth = str.s;
|
||||||
|
// printf("curl -H '%s' -H '%s' %s\n", a.date, a.auth, a.url);
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*********************
|
||||||
|
*** Main function ***
|
||||||
|
*********************/
|
||||||
|
|
||||||
|
#ifdef KURL_MAIN
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
kurl_t *f;
|
||||||
|
int c, l, l_buf = 0x10000;
|
||||||
|
off_t start = 0, rest = -1;
|
||||||
|
uint8_t *buf;
|
||||||
|
char *p;
|
||||||
|
kurl_opt_t opt;
|
||||||
|
|
||||||
|
memset(&opt, 0, sizeof(kurl_opt_t));
|
||||||
|
while ((c = getopt(argc, argv, "c:l:a:")) >= 0) {
|
||||||
|
if (c == 'c') start = strtol(optarg, &p, 0);
|
||||||
|
else if (c == 'l') rest = strtol(optarg, &p, 0);
|
||||||
|
else if (c == 'a') opt.s3key_fn = optarg;
|
||||||
|
}
|
||||||
|
if (optind == argc) {
|
||||||
|
fprintf(stderr, "Usage: kurl [-c start] [-l length] <url>\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
kurl_init();
|
||||||
|
f = kurl_open(argv[optind], &opt);
|
||||||
|
if (f == 0) {
|
||||||
|
fprintf(stderr, "ERROR: fail to open URL\n");
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
if (start > 0) {
|
||||||
|
if (kurl_seek(f, start, SEEK_SET) < 0) {
|
||||||
|
kurl_close(f);
|
||||||
|
fprintf(stderr, "ERROR: fail to seek\n");
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
buf = (uint8_t*)calloc(l_buf, 1);
|
||||||
|
while (rest != 0) {
|
||||||
|
int to_read = rest > 0 && rest < l_buf? rest : l_buf;
|
||||||
|
l = kurl_read(f, buf, to_read);
|
||||||
|
if (l == 0) break;
|
||||||
|
fwrite(buf, 1, l, stdout);
|
||||||
|
rest -= l;
|
||||||
|
}
|
||||||
|
free(buf);
|
||||||
|
kurl_close(f);
|
||||||
|
kurl_destroy();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,57 @@
|
||||||
|
#ifndef KURL_H
|
||||||
|
#define KURL_H
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
|
||||||
|
#define KURL_NULL 1
|
||||||
|
#define KURL_INV_WHENCE 2
|
||||||
|
#define KURL_SEEK_OUT 3
|
||||||
|
#define KURL_NO_AUTH 4
|
||||||
|
|
||||||
|
struct kurl_t;
|
||||||
|
typedef struct kurl_t kurl_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
const char *s3keyid;
|
||||||
|
const char *s3secretkey;
|
||||||
|
const char *s3key_fn;
|
||||||
|
} kurl_opt_t;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int kurl_init(void);
|
||||||
|
void kurl_destroy(void);
|
||||||
|
|
||||||
|
kurl_t *kurl_open(const char *url, kurl_opt_t *opt);
|
||||||
|
kurl_t *kurl_dopen(int fd);
|
||||||
|
int kurl_close(kurl_t *ku);
|
||||||
|
ssize_t kurl_read(kurl_t *ku, void *buf, size_t nbytes);
|
||||||
|
off_t kurl_seek(kurl_t *ku, off_t offset, int whence);
|
||||||
|
int kurl_buflen(kurl_t *ku, int len);
|
||||||
|
|
||||||
|
off_t kurl_tell(const kurl_t *ku);
|
||||||
|
int kurl_eof(const kurl_t *ku);
|
||||||
|
int kurl_fileno(const kurl_t *ku);
|
||||||
|
int kurl_error(const kurl_t *ku);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef KNETFILE_H
|
||||||
|
#define KNETFILE_H
|
||||||
|
typedef kurl_t knetFile;
|
||||||
|
#define knet_open(fn, mode) kurl_open(fn, 0)
|
||||||
|
#define knet_dopen(fd, mode) kurl_dopen(fd)
|
||||||
|
#define knet_close(fp) kurl_close(fp)
|
||||||
|
#define knet_read(fp, buf, len) kurl_read(fp, buf, len)
|
||||||
|
#define knet_seek(fp, off, whence) kurl_seek(fp, off, whence)
|
||||||
|
#define knet_tell(fp) kurl_tell(fp)
|
||||||
|
#define knet_fileno(fp) kurl_fileno(fp)
|
||||||
|
#define knet_win32_init() kurl_init()
|
||||||
|
#define knet_win32_destroy() kurl_destroy()
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,90 @@
|
||||||
|
/* The MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2008, by Attractive Chaos <attractor@live.co.uk>
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of this software and associated documentation files (the
|
||||||
|
"Software"), to deal in the Software without restriction, including
|
||||||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be
|
||||||
|
included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||||
|
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||||
|
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
An example:
|
||||||
|
|
||||||
|
#include "kvec.h"
|
||||||
|
int main() {
|
||||||
|
kvec_t(int) array;
|
||||||
|
kv_init(array);
|
||||||
|
kv_push(int, array, 10); // append
|
||||||
|
kv_a(int, array, 20) = 5; // dynamic
|
||||||
|
kv_A(array, 20) = 4; // static
|
||||||
|
kv_destroy(array);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
2008-09-22 (0.1.0):
|
||||||
|
|
||||||
|
* The initial version.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef AC_KVEC_H
|
||||||
|
#define AC_KVEC_H
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
||||||
|
|
||||||
|
#define kvec_t(type) struct { size_t n, m; type *a; }
|
||||||
|
#define kv_init(v) ((v).n = (v).m = 0, (v).a = 0)
|
||||||
|
#define kv_destroy(v) free((v).a)
|
||||||
|
#define kv_A(v, i) ((v).a[(i)])
|
||||||
|
#define kv_pop(v) ((v).a[--(v).n])
|
||||||
|
#define kv_size(v) ((v).n)
|
||||||
|
#define kv_max(v) ((v).m)
|
||||||
|
|
||||||
|
#define kv_resize(type, v, s) ((v).m = (s), (v).a = (type*)realloc((v).a, sizeof(type) * (v).m))
|
||||||
|
|
||||||
|
#define kv_copy(type, v1, v0) do { \
|
||||||
|
if ((v1).m < (v0).n) kv_resize(type, v1, (v0).n); \
|
||||||
|
(v1).n = (v0).n; \
|
||||||
|
memcpy((v1).a, (v0).a, sizeof(type) * (v0).n); \
|
||||||
|
} while (0) \
|
||||||
|
|
||||||
|
#define kv_push(type, v, x) do { \
|
||||||
|
if ((v).n == (v).m) { \
|
||||||
|
(v).m = (v).m? (v).m<<1 : 2; \
|
||||||
|
(v).a = (type*)realloc((v).a, sizeof(type) * (v).m); \
|
||||||
|
} \
|
||||||
|
(v).a[(v).n++] = (x); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define kv_pushp(type, v) (((v).n == (v).m)? \
|
||||||
|
((v).m = ((v).m? (v).m<<1 : 2), \
|
||||||
|
(v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \
|
||||||
|
: 0), ((v).a + ((v).n++))
|
||||||
|
|
||||||
|
#define kv_a(type, v, i) (((v).m <= (size_t)(i)? \
|
||||||
|
((v).m = (v).n = (i) + 1, kv_roundup32((v).m), \
|
||||||
|
(v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \
|
||||||
|
: (v).n <= (size_t)(i)? (v).n = (i) + 1 \
|
||||||
|
: 0), (v).a[(i)])
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,149 @@
|
||||||
|
-- bioinformatics routines
|
||||||
|
|
||||||
|
-- Description: read a fasta/fastq file
|
||||||
|
local function readseq(fp)
|
||||||
|
local finished, last = false, nil;
|
||||||
|
return function()
|
||||||
|
local match;
|
||||||
|
if finished then return nil end
|
||||||
|
if (last == nil) then -- the first record or a record following a fastq
|
||||||
|
for l in fp:lines() do
|
||||||
|
if l:byte(1) == 62 or l:byte(1) == 64 then -- ">" || "@"
|
||||||
|
last = l;
|
||||||
|
break;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
if last == nil then
|
||||||
|
finished = true;
|
||||||
|
return nil;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
local tmp = last:find("%s");
|
||||||
|
name = (tmp and last:sub(2, tmp-1)) or last:sub(2); -- sequence name
|
||||||
|
local seqs = {};
|
||||||
|
local c; -- the first character of the last line
|
||||||
|
last = nil;
|
||||||
|
for l in fp:lines() do -- read sequence
|
||||||
|
c = l:byte(1);
|
||||||
|
if c == 62 or c == 64 or c == 43 then
|
||||||
|
last = l;
|
||||||
|
break;
|
||||||
|
end
|
||||||
|
table.insert(seqs, l);
|
||||||
|
end
|
||||||
|
if last == nil then finished = true end -- end of file
|
||||||
|
if c ~= 43 then return name, table.concat(seqs) end -- a fasta record
|
||||||
|
local seq, len = table.concat(seqs), 0; -- prepare to parse quality
|
||||||
|
seqs = {};
|
||||||
|
for l in fp:lines() do -- read quality
|
||||||
|
table.insert(seqs, l);
|
||||||
|
len = len + #l;
|
||||||
|
if len >= #seq then
|
||||||
|
last = nil;
|
||||||
|
return name, seq, table.concat(seqs);
|
||||||
|
end
|
||||||
|
end
|
||||||
|
finished = true;
|
||||||
|
return name, seq;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
-- extract subsequence from a fasta file indexe by samtools faidx
|
||||||
|
local function faidxsub(fn)
|
||||||
|
local fpidx = io.open(fn .. ".fai");
|
||||||
|
if fpidx == nil then
|
||||||
|
io.stderr:write("[faidxsub] fail to open the FASTA index file.\n");
|
||||||
|
return nil
|
||||||
|
end
|
||||||
|
local idx = {};
|
||||||
|
for l in fpidx:lines() do
|
||||||
|
local name, len, offset, line_blen, line_len = l:match("(%S+)%s(%d+)%s(%d+)%s(%d+)%s(%d+)");
|
||||||
|
if name then
|
||||||
|
idx[name] = {tonumber(len), offset, line_blen, line_len};
|
||||||
|
end
|
||||||
|
end
|
||||||
|
fpidx:close();
|
||||||
|
local fp = io.open(fn);
|
||||||
|
return function(name, beg_, end_) -- 0-based coordinate
|
||||||
|
if name == nil then fp:close(); return nil; end
|
||||||
|
if idx[name] then
|
||||||
|
local a = idx[name];
|
||||||
|
beg_ = beg_ or 0;
|
||||||
|
end_ = end_ or a[1];
|
||||||
|
end_ = (end_ <= a[1] and end_) or a[1];
|
||||||
|
local fb, fe = math.floor(beg_ / a[3]), math.floor(end_ / a[3]);
|
||||||
|
local qb, qe = beg_ - fb * a[3], end_ - fe * a[3];
|
||||||
|
fp:seek("set", a[2] + fb * a[4] + qb);
|
||||||
|
local s = fp:read((fe - fb) * a[4] + (qe - qb)):gsub("%s", "");
|
||||||
|
return s;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
--Description: Index a list of intervals and test if a given interval overlaps with the list
|
||||||
|
--Example: lua -lbio -e 'a={{100,201},{200,300},{400,600}};f=bio.intvovlp(a);print(f(600,700))'
|
||||||
|
--[[
|
||||||
|
By default, we keep for each tiling 8192 window the interval overlaping the
|
||||||
|
window while having the smallest start position. This method may not work
|
||||||
|
well when most intervals are small but few intervals span a long distance.
|
||||||
|
]]--
|
||||||
|
local function intvovlp(intv, bits)
|
||||||
|
bits = bits or 13 -- the default bin size is 8192 = 1<<13
|
||||||
|
table.sort(intv, function(a,b) return a[1] < b[1] end) -- sort by the start
|
||||||
|
-- merge intervals; the step speeds up testing, but can be skipped
|
||||||
|
local b, e, k = -1, -1, 1
|
||||||
|
for i = 1, #intv do
|
||||||
|
if e < intv[i][1] then
|
||||||
|
if e >= 0 then intv[k], k = {b, e}, k + 1 end
|
||||||
|
b, e = intv[i][1], intv[i][2]
|
||||||
|
else e = intv[i][2] end
|
||||||
|
end
|
||||||
|
if e >= 0 then intv[k] = {b, e} end
|
||||||
|
while #a > k do table.remove(a) end -- truncate the interval list
|
||||||
|
-- build the index for the list of intervals
|
||||||
|
local idx, size, max = {}, math.pow(2, bits), 0
|
||||||
|
for i = 1, #a do
|
||||||
|
b = math.modf(intv[i][1] / size)
|
||||||
|
e = math.modf(intv[i][2] / size)
|
||||||
|
if b == e then idx[b] = idx[b] or i
|
||||||
|
else for j = b, e do idx[j] = idx[j] or i end end
|
||||||
|
max = (max > e and max) or e
|
||||||
|
end
|
||||||
|
-- return a function (closure)
|
||||||
|
return function(_beg, _end)
|
||||||
|
local x = math.modf(_beg / size)
|
||||||
|
if x > max then return false end
|
||||||
|
local off = idx[x]; -- the start bin
|
||||||
|
if off == nil then -- the following is not the best in efficiency
|
||||||
|
for i = x - 1, 0, -1 do -- find the minimum bin with a value
|
||||||
|
if idx[i] ~= nil then off = idx[i]; break; end
|
||||||
|
end
|
||||||
|
if off == nil then return false end
|
||||||
|
end
|
||||||
|
for i = off, #intv do -- start from off and search for overlaps
|
||||||
|
if intv[i][1] >= _end then return false
|
||||||
|
elseif intv[i][2] > _beg then return true end
|
||||||
|
end
|
||||||
|
return false
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
bio = {
|
||||||
|
readseq = readseq,
|
||||||
|
faidxsub = faidxsub,
|
||||||
|
intvovlp = intvovlp
|
||||||
|
}
|
||||||
|
|
||||||
|
bio.nt16 = {
|
||||||
|
[0]=15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
|
||||||
|
15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
|
||||||
|
15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15, 15,15, 5, 6, 8,15, 7, 9, 0,10,15,15, 15,15,15,15,
|
||||||
|
15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15, 15,15, 5, 6, 8,15, 7, 9, 0,10,15,15, 15,15,15,15,
|
||||||
|
15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
|
||||||
|
15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
|
||||||
|
15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
|
||||||
|
15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15
|
||||||
|
}
|
||||||
|
bio.ntcnt = { [0]=4, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 }
|
||||||
|
bio.ntcomp = { [0]=0, 8, 4, 12, 2, 10, 9, 14, 1, 6, 5, 13, 3, 11, 7, 15 }
|
||||||
|
bio.ntrev = 'XACMGRSVTWYHKDBN'
|
||||||
|
|
@ -0,0 +1,677 @@
|
||||||
|
--[[
|
||||||
|
The MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2011, Attractive Chaos <attractor@live.co.uk>
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
|
]]--
|
||||||
|
|
||||||
|
--[[
|
||||||
|
This is a Lua library, more exactly a collection of Lua snippets, covering
|
||||||
|
utilities (e.g. getopt), string operations (e.g. split), statistics (e.g.
|
||||||
|
Fisher's exact test), special functions (e.g. logarithm gamma) and matrix
|
||||||
|
operations (e.g. Gauss-Jordan elimination). The routines are designed to be
|
||||||
|
as independent as possible, such that one can copy-paste relevant pieces of
|
||||||
|
code without worrying about additional library dependencies.
|
||||||
|
|
||||||
|
If you use routines from this library, please include the licensing
|
||||||
|
information above where appropriate.
|
||||||
|
]]--
|
||||||
|
|
||||||
|
--[[
|
||||||
|
Library functions and dependencies. "a>b" means "a is required by b"; "b<a"
|
||||||
|
means "b depends on a".
|
||||||
|
|
||||||
|
os.getopt()
|
||||||
|
string:split()
|
||||||
|
io.xopen()
|
||||||
|
table.ksmall()
|
||||||
|
table.shuffle()
|
||||||
|
math.lgamma() >math.lbinom() >math.igamma()
|
||||||
|
math.igamma() <math.lgamma() >matrix.chi2()
|
||||||
|
math.erfc()
|
||||||
|
math.lbinom() <math.lgamma() >math.fisher_exact()
|
||||||
|
math.bernstein_poly() <math.lbinom()
|
||||||
|
math.fisher_exact() <math.lbinom()
|
||||||
|
math.jackknife()
|
||||||
|
math.pearson()
|
||||||
|
math.spearman()
|
||||||
|
math.fmin()
|
||||||
|
matrix
|
||||||
|
matrix.add()
|
||||||
|
matrix.T() >matrix.mul()
|
||||||
|
matrix.mul() <matrix.T()
|
||||||
|
matrix.tostring()
|
||||||
|
matrix.chi2() <math.igamma()
|
||||||
|
matrix.solve()
|
||||||
|
]]--
|
||||||
|
|
||||||
|
-- Description: getopt() translated from the BSD getopt(); compatible with the default Unix getopt()
|
||||||
|
--[[ Example:
|
||||||
|
for o, a in os.getopt(arg, 'a:b') do
|
||||||
|
print(o, a)
|
||||||
|
end
|
||||||
|
]]--
|
||||||
|
function os.getopt(args, ostr)
|
||||||
|
local arg, place = nil, 0;
|
||||||
|
return function ()
|
||||||
|
if place == 0 then -- update scanning pointer
|
||||||
|
place = 1
|
||||||
|
if #args == 0 or args[1]:sub(1, 1) ~= '-' then place = 0; return nil end
|
||||||
|
if #args[1] >= 2 then
|
||||||
|
place = place + 1
|
||||||
|
if args[1]:sub(2, 2) == '-' then -- found "--"
|
||||||
|
place = 0
|
||||||
|
table.remove(args, 1);
|
||||||
|
return nil;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
local optopt = args[1]:sub(place, place);
|
||||||
|
place = place + 1;
|
||||||
|
local oli = ostr:find(optopt);
|
||||||
|
if optopt == ':' or oli == nil then -- unknown option
|
||||||
|
if optopt == '-' then return nil end
|
||||||
|
if place > #args[1] then
|
||||||
|
table.remove(args, 1);
|
||||||
|
place = 0;
|
||||||
|
end
|
||||||
|
return '?';
|
||||||
|
end
|
||||||
|
oli = oli + 1;
|
||||||
|
if ostr:sub(oli, oli) ~= ':' then -- do not need argument
|
||||||
|
arg = nil;
|
||||||
|
if place > #args[1] then
|
||||||
|
table.remove(args, 1);
|
||||||
|
place = 0;
|
||||||
|
end
|
||||||
|
else -- need an argument
|
||||||
|
if place <= #args[1] then -- no white space
|
||||||
|
arg = args[1]:sub(place);
|
||||||
|
else
|
||||||
|
table.remove(args, 1);
|
||||||
|
if #args == 0 then -- an option requiring argument is the last one
|
||||||
|
place = 0;
|
||||||
|
if ostr:sub(1, 1) == ':' then return ':' end
|
||||||
|
return '?';
|
||||||
|
else arg = args[1] end
|
||||||
|
end
|
||||||
|
table.remove(args, 1);
|
||||||
|
place = 0;
|
||||||
|
end
|
||||||
|
return optopt, arg;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Description: string split
|
||||||
|
function string:split(sep, n)
|
||||||
|
local a, start = {}, 1;
|
||||||
|
sep = sep or "%s+";
|
||||||
|
repeat
|
||||||
|
local b, e = self:find(sep, start);
|
||||||
|
if b == nil then
|
||||||
|
table.insert(a, self:sub(start));
|
||||||
|
break
|
||||||
|
end
|
||||||
|
a[#a+1] = self:sub(start, b - 1);
|
||||||
|
start = e + 1;
|
||||||
|
if n and #a == n then
|
||||||
|
table.insert(a, self:sub(start));
|
||||||
|
break
|
||||||
|
end
|
||||||
|
until start > #self;
|
||||||
|
return a;
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Description: smart file open
|
||||||
|
function io.xopen(fn, mode)
|
||||||
|
mode = mode or 'r';
|
||||||
|
if fn == nil then return io.stdin;
|
||||||
|
elseif fn == '-' then return (mode == 'r' and io.stdin) or io.stdout;
|
||||||
|
elseif fn:sub(-3) == '.gz' then return (mode == 'r' and io.popen('gzip -dc ' .. fn, 'r')) or io.popen('gzip > ' .. fn, 'w');
|
||||||
|
elseif fn:sub(-4) == '.bz2' then return (mode == 'r' and io.popen('bzip2 -dc ' .. fn, 'r')) or io.popen('bgzip2 > ' .. fn, 'w');
|
||||||
|
else return io.open(fn, mode) end
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Description: find the k-th smallest element in an array (Ref. http://ndevilla.free.fr/median/)
|
||||||
|
function table.ksmall(arr, k)
|
||||||
|
local low, high = 1, #arr;
|
||||||
|
while true do
|
||||||
|
if high <= low then return arr[k] end
|
||||||
|
if high == low + 1 then
|
||||||
|
if arr[high] < arr[low] then arr[high], arr[low] = arr[low], arr[high] end;
|
||||||
|
return arr[k];
|
||||||
|
end
|
||||||
|
local mid = math.floor((high + low) / 2);
|
||||||
|
if arr[high] < arr[mid] then arr[mid], arr[high] = arr[high], arr[mid] end
|
||||||
|
if arr[high] < arr[low] then arr[low], arr[high] = arr[high], arr[low] end
|
||||||
|
if arr[low] < arr[mid] then arr[low], arr[mid] = arr[mid], arr[low] end
|
||||||
|
arr[mid], arr[low+1] = arr[low+1], arr[mid];
|
||||||
|
local ll, hh = low + 1, high;
|
||||||
|
while true do
|
||||||
|
repeat ll = ll + 1 until arr[ll] >= arr[low]
|
||||||
|
repeat hh = hh - 1 until arr[low] >= arr[hh]
|
||||||
|
if hh < ll then break end
|
||||||
|
arr[ll], arr[hh] = arr[hh], arr[ll];
|
||||||
|
end
|
||||||
|
arr[low], arr[hh] = arr[hh], arr[low];
|
||||||
|
if hh <= k then low = ll end
|
||||||
|
if hh >= k then high = hh - 1 end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Description: shuffle/permutate an array
|
||||||
|
function table.shuffle(a)
|
||||||
|
for i = #a, 1, -1 do
|
||||||
|
local j = math.random(i)
|
||||||
|
a[j], a[i] = a[i], a[j]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Mathematics
|
||||||
|
--
|
||||||
|
|
||||||
|
-- Description: log gamma function
|
||||||
|
-- Required by: math.lbinom()
|
||||||
|
-- Reference: AS245, 2nd algorithm, http://lib.stat.cmu.edu/apstat/245
|
||||||
|
function math.lgamma(z)
|
||||||
|
local x;
|
||||||
|
x = 0.1659470187408462e-06 / (z+7);
|
||||||
|
x = x + 0.9934937113930748e-05 / (z+6);
|
||||||
|
x = x - 0.1385710331296526 / (z+5);
|
||||||
|
x = x + 12.50734324009056 / (z+4);
|
||||||
|
x = x - 176.6150291498386 / (z+3);
|
||||||
|
x = x + 771.3234287757674 / (z+2);
|
||||||
|
x = x - 1259.139216722289 / (z+1);
|
||||||
|
x = x + 676.5203681218835 / z;
|
||||||
|
x = x + 0.9999999999995183;
|
||||||
|
return math.log(x) - 5.58106146679532777 - z + (z-0.5) * math.log(z+6.5);
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Description: regularized incomplete gamma function
|
||||||
|
-- Dependent on: math.lgamma()
|
||||||
|
--[[
|
||||||
|
Formulas are taken from Wiki, with additional input from Numerical
|
||||||
|
Recipes in C (for modified Lentz's algorithm) and AS245
|
||||||
|
(http://lib.stat.cmu.edu/apstat/245).
|
||||||
|
|
||||||
|
A good online calculator is available at:
|
||||||
|
|
||||||
|
http://www.danielsoper.com/statcalc/calc23.aspx
|
||||||
|
|
||||||
|
It calculates upper incomplete gamma function, which equals
|
||||||
|
math.igamma(s,z,true)*math.exp(math.lgamma(s))
|
||||||
|
]]--
|
||||||
|
function math.igamma(s, z, complement)
|
||||||
|
|
||||||
|
local function _kf_gammap(s, z)
|
||||||
|
local sum, x = 1, 1;
|
||||||
|
for k = 1, 100 do
|
||||||
|
x = x * z / (s + k);
|
||||||
|
sum = sum + x;
|
||||||
|
if x / sum < 1e-14 then break end
|
||||||
|
end
|
||||||
|
return math.exp(s * math.log(z) - z - math.lgamma(s + 1.) + math.log(sum));
|
||||||
|
end
|
||||||
|
|
||||||
|
local function _kf_gammaq(s, z)
|
||||||
|
local C, D, f, TINY;
|
||||||
|
f = 1. + z - s; C = f; D = 0.; TINY = 1e-290;
|
||||||
|
-- Modified Lentz's algorithm for computing continued fraction. See Numerical Recipes in C, 2nd edition, section 5.2
|
||||||
|
for j = 1, 100 do
|
||||||
|
local d;
|
||||||
|
local a, b = j * (s - j), j*2 + 1 + z - s;
|
||||||
|
D = b + a * D;
|
||||||
|
if D < TINY then D = TINY end
|
||||||
|
C = b + a / C;
|
||||||
|
if C < TINY then C = TINY end
|
||||||
|
D = 1. / D;
|
||||||
|
d = C * D;
|
||||||
|
f = f * d;
|
||||||
|
if math.abs(d - 1) < 1e-14 then break end
|
||||||
|
end
|
||||||
|
return math.exp(s * math.log(z) - z - math.lgamma(s) - math.log(f));
|
||||||
|
end
|
||||||
|
|
||||||
|
if complement then
|
||||||
|
return ((z <= 1 or z < s) and 1 - _kf_gammap(s, z)) or _kf_gammaq(s, z);
|
||||||
|
else
|
||||||
|
return ((z <= 1 or z < s) and _kf_gammap(s, z)) or (1 - _kf_gammaq(s, z));
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
math.M_SQRT2 = 1.41421356237309504880 -- sqrt(2)
|
||||||
|
math.M_SQRT1_2 = 0.70710678118654752440 -- 1/sqrt(2)
|
||||||
|
|
||||||
|
-- Description: complement error function erfc(x): \Phi(x) = 0.5 * erfc(-x/M_SQRT2)
|
||||||
|
function math.erfc(x)
|
||||||
|
local z = math.abs(x) * math.M_SQRT2
|
||||||
|
if z > 37 then return (x > 0 and 0) or 2 end
|
||||||
|
local expntl = math.exp(-0.5 * z * z)
|
||||||
|
local p
|
||||||
|
if z < 10. / math.M_SQRT2 then -- for small z
|
||||||
|
p = expntl * ((((((.03526249659989109 * z + .7003830644436881) * z + 6.37396220353165) * z + 33.912866078383)
|
||||||
|
* z + 112.0792914978709) * z + 221.2135961699311) * z + 220.2068679123761)
|
||||||
|
/ (((((((.08838834764831844 * z + 1.755667163182642) * z + 16.06417757920695) * z + 86.78073220294608)
|
||||||
|
* z + 296.5642487796737) * z + 637.3336333788311) * z + 793.8265125199484) * z + 440.4137358247522);
|
||||||
|
else p = expntl / 2.506628274631001 / (z + 1. / (z + 2. / (z + 3. / (z + 4. / (z + .65))))) end
|
||||||
|
return (x > 0 and 2 * p) or 2 * (1 - p)
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Description: log binomial coefficient
|
||||||
|
-- Dependent on: math.lgamma()
|
||||||
|
-- Required by: math.fisher_exact()
|
||||||
|
function math.lbinom(n, m)
|
||||||
|
if m == nil then
|
||||||
|
local a = {};
|
||||||
|
a[0], a[n] = 0, 0;
|
||||||
|
local t = math.lgamma(n+1);
|
||||||
|
for m = 1, n-1 do a[m] = t - math.lgamma(m+1) - math.lgamma(n-m+1) end
|
||||||
|
return a;
|
||||||
|
else return math.lgamma(n+1) - math.lgamma(m+1) - math.lgamma(n-m+1) end
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Description: Berstein polynomials (mainly for Bezier curves)
|
||||||
|
-- Dependent on: math.lbinom()
|
||||||
|
-- Note: to compute derivative: let beta_new[i]=beta[i+1]-beta[i]
|
||||||
|
function math.bernstein_poly(beta)
|
||||||
|
local n = #beta - 1;
|
||||||
|
local lbc = math.lbinom(n); -- log binomial coefficients
|
||||||
|
return function (t)
|
||||||
|
assert(t >= 0 and t <= 1);
|
||||||
|
if t == 0 then return beta[1] end
|
||||||
|
if t == 1 then return beta[n+1] end
|
||||||
|
local sum, logt, logt1 = 0, math.log(t), math.log(1-t);
|
||||||
|
for i = 0, n do sum = sum + beta[i+1] * math.exp(lbc[i] + i * logt + (n-i) * logt1) end
|
||||||
|
return sum;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Description: Fisher's exact test
|
||||||
|
-- Dependent on: math.lbinom()
|
||||||
|
-- Return: left-, right- and two-tail P-values
|
||||||
|
--[[
|
||||||
|
Fisher's exact test for 2x2 congintency tables:
|
||||||
|
|
||||||
|
n11 n12 | n1_
|
||||||
|
n21 n22 | n2_
|
||||||
|
-----------+----
|
||||||
|
n_1 n_2 | n
|
||||||
|
|
||||||
|
Reference: http://www.langsrud.com/fisher.htm
|
||||||
|
]]--
|
||||||
|
function math.fisher_exact(n11, n12, n21, n22)
|
||||||
|
local aux; -- keep the states of n* for acceleration
|
||||||
|
|
||||||
|
-- Description: hypergeometric function
|
||||||
|
local function hypergeo(n11, n1_, n_1, n)
|
||||||
|
return math.exp(math.lbinom(n1_, n11) + math.lbinom(n-n1_, n_1-n11) - math.lbinom(n, n_1));
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Description: incremental hypergeometric function
|
||||||
|
-- Note: aux = {n11, n1_, n_1, n, p}
|
||||||
|
local function hypergeo_inc(n11, n1_, n_1, n)
|
||||||
|
if n1_ ~= 0 or n_1 ~= 0 or n ~= 0 then
|
||||||
|
aux = {n11, n1_, n_1, n, 1};
|
||||||
|
else -- then only n11 is changed
|
||||||
|
local mod;
|
||||||
|
_, mod = math.modf(n11 / 11);
|
||||||
|
if mod ~= 0 and n11 + aux[4] - aux[2] - aux[3] ~= 0 then
|
||||||
|
if n11 == aux[1] + 1 then -- increase by 1
|
||||||
|
aux[5] = aux[5] * (aux[2] - aux[1]) / n11 * (aux[3] - aux[1]) / (n11 + aux[4] - aux[2] - aux[3]);
|
||||||
|
aux[1] = n11;
|
||||||
|
return aux[5];
|
||||||
|
end
|
||||||
|
if n11 == aux[1] - 1 then -- descrease by 1
|
||||||
|
aux[5] = aux[5] * aux[1] / (aux[2] - n11) * (aux[1] + aux[4] - aux[2] - aux[3]) / (aux[3] - n11);
|
||||||
|
aux[1] = n11;
|
||||||
|
return aux[5];
|
||||||
|
end
|
||||||
|
end
|
||||||
|
aux[1] = n11;
|
||||||
|
end
|
||||||
|
aux[5] = hypergeo(aux[1], aux[2], aux[3], aux[4]);
|
||||||
|
return aux[5];
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Description: computing the P-value by Fisher's exact test
|
||||||
|
local max, min, left, right, n1_, n_1, n, two, p, q, i, j;
|
||||||
|
n1_, n_1, n = n11 + n12, n11 + n21, n11 + n12 + n21 + n22;
|
||||||
|
max = (n_1 < n1_ and n_1) or n1_; -- max n11, for the right tail
|
||||||
|
min = n1_ + n_1 - n;
|
||||||
|
if min < 0 then min = 0 end -- min n11, for the left tail
|
||||||
|
two, left, right = 1, 1, 1;
|
||||||
|
if min == max then return 1 end -- no need to do test
|
||||||
|
q = hypergeo_inc(n11, n1_, n_1, n); -- the probability of the current table
|
||||||
|
-- left tail
|
||||||
|
i, left, p = min + 1, 0, hypergeo_inc(min, 0, 0, 0);
|
||||||
|
while p < 0.99999999 * q do
|
||||||
|
left, p, i = left + p, hypergeo_inc(i, 0, 0, 0), i + 1;
|
||||||
|
end
|
||||||
|
i = i - 1;
|
||||||
|
if p < 1.00000001 * q then left = left + p;
|
||||||
|
else i = i - 1 end
|
||||||
|
-- right tail
|
||||||
|
j, right, p = max - 1, 0, hypergeo_inc(max, 0, 0, 0);
|
||||||
|
while p < 0.99999999 * q do
|
||||||
|
right, p, j = right + p, hypergeo_inc(j, 0, 0, 0), j - 1;
|
||||||
|
end
|
||||||
|
j = j + 1;
|
||||||
|
if p < 1.00000001 * q then right = right + p;
|
||||||
|
else j = j + 1 end
|
||||||
|
-- two-tail
|
||||||
|
two = left + right;
|
||||||
|
if two > 1 then two = 1 end
|
||||||
|
-- adjust left and right
|
||||||
|
if math.abs(i - n11) < math.abs(j - n11) then right = 1 - left + q;
|
||||||
|
else left = 1 - right + q end
|
||||||
|
return left, right, two;
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Description: Delete-m Jackknife
|
||||||
|
--[[
|
||||||
|
Given g groups of values with a statistics estimated from m[i] samples in
|
||||||
|
i-th group being t[i], compute the mean and the variance. t0 below is the
|
||||||
|
estimate from all samples. Reference:
|
||||||
|
|
||||||
|
Busing et al. (1999) Delete-m Jackknife for unequal m. Statistics and Computing, 9:3-8.
|
||||||
|
]]--
|
||||||
|
function math.jackknife(g, m, t, t0)
|
||||||
|
local h, n, sum = {}, 0, 0;
|
||||||
|
for j = 1, g do n = n + m[j] end
|
||||||
|
if t0 == nil then -- When t0 is absent, estimate it in a naive way
|
||||||
|
t0 = 0;
|
||||||
|
for j = 1, g do t0 = t0 + m[j] * t[j] end
|
||||||
|
t0 = t0 / n;
|
||||||
|
end
|
||||||
|
local mean, var = 0, 0;
|
||||||
|
for j = 1, g do
|
||||||
|
h[j] = n / m[j];
|
||||||
|
mean = mean + (1 - m[j] / n) * t[j];
|
||||||
|
end
|
||||||
|
mean = g * t0 - mean; -- Eq. (8)
|
||||||
|
for j = 1, g do
|
||||||
|
local x = h[j] * t0 - (h[j] - 1) * t[j] - mean;
|
||||||
|
var = var + 1 / (h[j] - 1) * x * x;
|
||||||
|
end
|
||||||
|
var = var / g;
|
||||||
|
return mean, var;
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Description: Pearson correlation coefficient
|
||||||
|
-- Input: a is an n*2 table
|
||||||
|
function math.pearson(a)
|
||||||
|
-- compute the mean
|
||||||
|
local x1, y1 = 0, 0
|
||||||
|
for _, v in pairs(a) do
|
||||||
|
x1, y1 = x1 + v[1], y1 + v[2]
|
||||||
|
end
|
||||||
|
-- compute the coefficient
|
||||||
|
x1, y1 = x1 / #a, y1 / #a
|
||||||
|
local x2, y2, xy = 0, 0, 0
|
||||||
|
for _, v in pairs(a) do
|
||||||
|
local tx, ty = v[1] - x1, v[2] - y1
|
||||||
|
xy, x2, y2 = xy + tx * ty, x2 + tx * tx, y2 + ty * ty
|
||||||
|
end
|
||||||
|
return xy / math.sqrt(x2) / math.sqrt(y2)
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Description: Spearman correlation coefficient
|
||||||
|
function math.spearman(a)
|
||||||
|
local function aux_func(t) -- auxiliary function
|
||||||
|
return (t == 1 and 0) or (t*t - 1) * t / 12
|
||||||
|
end
|
||||||
|
|
||||||
|
for _, v in pairs(a) do v.r = {} end
|
||||||
|
local T, S = {}, {}
|
||||||
|
-- compute the rank
|
||||||
|
for k = 1, 2 do
|
||||||
|
table.sort(a, function(u,v) return u[k]<v[k] end)
|
||||||
|
local same = 1
|
||||||
|
T[k] = 0
|
||||||
|
for i = 2, #a + 1 do
|
||||||
|
if i <= #a and a[i-1][k] == a[i][k] then same = same + 1
|
||||||
|
else
|
||||||
|
local rank = (i-1) * 2 - same + 1
|
||||||
|
for j = i - same, i - 1 do a[j].r[k] = rank end
|
||||||
|
if same > 1 then T[k], same = T[k] + aux_func(same), 1 end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
S[k] = aux_func(#a) - T[k]
|
||||||
|
end
|
||||||
|
-- compute the coefficient
|
||||||
|
local sum = 0
|
||||||
|
for _, v in pairs(a) do -- TODO: use nested loops to reduce loss of precision
|
||||||
|
local t = (v.r[1] - v.r[2]) / 2
|
||||||
|
sum = sum + t * t
|
||||||
|
end
|
||||||
|
return (S[1] + S[2] - sum) / 2 / math.sqrt(S[1] * S[2])
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Description: Hooke-Jeeves derivative-free optimization
|
||||||
|
function math.fmin(func, x, data, r, eps, max_calls)
|
||||||
|
local n, n_calls = #x, 0;
|
||||||
|
r = r or 0.5;
|
||||||
|
eps = eps or 1e-7;
|
||||||
|
max_calls = max_calls or 50000
|
||||||
|
|
||||||
|
function fmin_aux(x1, data, fx1, dx) -- auxiliary function
|
||||||
|
local ftmp;
|
||||||
|
for k = 1, n do
|
||||||
|
x1[k] = x1[k] + dx[k];
|
||||||
|
local ftmp = func(x1, data); n_calls = n_calls + 1;
|
||||||
|
if ftmp < fx1 then fx1 = ftmp;
|
||||||
|
else -- search the opposite direction
|
||||||
|
dx[k] = -dx[k];
|
||||||
|
x1[k] = x1[k] + dx[k] + dx[k];
|
||||||
|
ftmp = func(x1, data); n_calls = n_calls + 1;
|
||||||
|
if ftmp < fx1 then fx1 = ftmp
|
||||||
|
else x1[k] = x1[k] - dx[k] end -- back to the original x[k]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return fx1; -- here: fx1=f(n,x1)
|
||||||
|
end
|
||||||
|
|
||||||
|
local dx, x1 = {}, {};
|
||||||
|
for k = 1, n do -- initial directions, based on MGJ
|
||||||
|
dx[k] = math.abs(x[k]) * r;
|
||||||
|
if dx[k] == 0 then dx[k] = r end;
|
||||||
|
end
|
||||||
|
local radius = r;
|
||||||
|
local fx1, fx;
|
||||||
|
fx = func(x, data); fx1 = fx; n_calls = n_calls + 1;
|
||||||
|
while true do
|
||||||
|
for i = 1, n do x1[i] = x[i] end; -- x1 = x
|
||||||
|
fx1 = fmin_aux(x1, data, fx, dx);
|
||||||
|
while fx1 < fx do
|
||||||
|
for k = 1, n do
|
||||||
|
local t = x[k];
|
||||||
|
dx[k] = (x1[k] > x[k] and math.abs(dx[k])) or -math.abs(dx[k]);
|
||||||
|
x[k] = x1[k];
|
||||||
|
x1[k] = x1[k] + x1[k] - t;
|
||||||
|
end
|
||||||
|
fx = fx1;
|
||||||
|
if n_calls >= max_calls then break end
|
||||||
|
fx1 = func(x1, data); n_calls = n_calls + 1;
|
||||||
|
fx1 = fmin_aux(x1, data, fx1, dx);
|
||||||
|
if fx1 >= fx then break end
|
||||||
|
local kk = n;
|
||||||
|
for k = 1, n do
|
||||||
|
if math.abs(x1[k] - x[k]) > .5 * math.abs(dx[k]) then
|
||||||
|
kk = k;
|
||||||
|
break;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
if kk == n then break end
|
||||||
|
end
|
||||||
|
if radius >= eps then
|
||||||
|
if n_calls >= max_calls then break end
|
||||||
|
radius = radius * r;
|
||||||
|
for k = 1, n do dx[k] = dx[k] * r end
|
||||||
|
else break end
|
||||||
|
end
|
||||||
|
return fx1, n_calls;
|
||||||
|
end
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Matrix
|
||||||
|
--
|
||||||
|
|
||||||
|
matrix = {}
|
||||||
|
|
||||||
|
-- Description: matrix transpose
|
||||||
|
-- Required by: matrix.mul()
|
||||||
|
function matrix.T(a)
|
||||||
|
local m, n, x = #a, #a[1], {};
|
||||||
|
for i = 1, n do
|
||||||
|
x[i] = {};
|
||||||
|
for j = 1, m do x[i][j] = a[j][i] end
|
||||||
|
end
|
||||||
|
return x;
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Description: matrix add
|
||||||
|
function matrix.add(a, b)
|
||||||
|
assert(#a == #b and #a[1] == #b[1]);
|
||||||
|
local m, n, x = #a, #a[1], {};
|
||||||
|
for i = 1, m do
|
||||||
|
x[i] = {};
|
||||||
|
local ai, bi, xi = a[i], b[i], x[i];
|
||||||
|
for j = 1, n do xi[j] = ai[j] + bi[j] end
|
||||||
|
end
|
||||||
|
return x;
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Description: matrix mul
|
||||||
|
-- Dependent on: matrix.T()
|
||||||
|
-- Note: much slower without transpose
|
||||||
|
function matrix.mul(a, b)
|
||||||
|
assert(#a[1] == #b);
|
||||||
|
local m, n, p, x = #a, #a[1], #b[1], {};
|
||||||
|
local c = matrix.T(b); -- transpose for efficiency
|
||||||
|
for i = 1, m do
|
||||||
|
x[i] = {}
|
||||||
|
local xi = x[i];
|
||||||
|
for j = 1, p do
|
||||||
|
local sum, ai, cj = 0, a[i], c[j];
|
||||||
|
for k = 1, n do sum = sum + ai[k] * cj[k] end
|
||||||
|
xi[j] = sum;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return x;
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Description: matrix print
|
||||||
|
function matrix.tostring(a)
|
||||||
|
local z = {};
|
||||||
|
for i = 1, #a do
|
||||||
|
z[i] = table.concat(a[i], "\t");
|
||||||
|
end
|
||||||
|
return table.concat(z, "\n");
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Description: chi^2 test for contingency tables
|
||||||
|
-- Dependent on: math.igamma()
|
||||||
|
function matrix.chi2(a)
|
||||||
|
if #a == 2 and #a[1] == 2 then -- 2x2 table
|
||||||
|
local x, z
|
||||||
|
x = (a[1][1] + a[1][2]) * (a[2][1] + a[2][2]) * (a[1][1] + a[2][1]) * (a[1][2] + a[2][2])
|
||||||
|
if x == 0 then return 0, 1, false end
|
||||||
|
z = a[1][1] * a[2][2] - a[1][2] * a[2][1]
|
||||||
|
z = (a[1][1] + a[1][2] + a[2][1] + a[2][2]) * z * z / x
|
||||||
|
return z, math.igamma(.5, .5 * z, true), true
|
||||||
|
else -- generic table
|
||||||
|
local rs, cs, n, m, N, z = {}, {}, #a, #a[1], 0, 0
|
||||||
|
for i = 1, n do rs[i] = 0 end
|
||||||
|
for j = 1, m do cs[j] = 0 end
|
||||||
|
for i = 1, n do -- compute column sum and row sum
|
||||||
|
for j = 1, m do cs[j], rs[i] = cs[j] + a[i][j], rs[i] + a[i][j] end
|
||||||
|
end
|
||||||
|
for i = 1, n do N = N + rs[i] end
|
||||||
|
for i = 1, n do -- compute the chi^2 statistics
|
||||||
|
for j = 1, m do
|
||||||
|
local E = rs[i] * cs[j] / N;
|
||||||
|
z = z + (a[i][j] - E) * (a[i][j] - E) / E
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return z, math.igamma(.5 * (n-1) * (m-1), .5 * z, true), true;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Description: Gauss-Jordan elimination (solving equations; computing inverse)
|
||||||
|
-- Note: on return, a[n][n] is the inverse; b[n][m] is the solution
|
||||||
|
-- Reference: Section 2.1, Numerical Recipes in C, 2nd edition
|
||||||
|
function matrix.solve(a, b)
|
||||||
|
assert(#a == #a[1]);
|
||||||
|
local n, m = #a, (b and #b[1]) or 0;
|
||||||
|
local xc, xr, ipiv = {}, {}, {};
|
||||||
|
local ic, ir;
|
||||||
|
|
||||||
|
for j = 1, n do ipiv[j] = 0 end
|
||||||
|
for i = 1, n do
|
||||||
|
local big = 0;
|
||||||
|
for j = 1, n do
|
||||||
|
local aj = a[j];
|
||||||
|
if ipiv[j] ~= 1 then
|
||||||
|
for k = 1, n do
|
||||||
|
if ipiv[k] == 0 then
|
||||||
|
if math.abs(aj[k]) >= big then
|
||||||
|
big = math.abs(aj[k]);
|
||||||
|
ir, ic = j, k;
|
||||||
|
end
|
||||||
|
elseif ipiv[k] > 1 then return -2 end -- singular matrix
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
ipiv[ic] = ipiv[ic] + 1;
|
||||||
|
if ir ~= ic then
|
||||||
|
for l = 1, n do a[ir][l], a[ic][l] = a[ic][l], a[ir][l] end
|
||||||
|
if b then
|
||||||
|
for l = 1, m do b[ir][l], b[ic][l] = b[ic][l], b[ir][l] end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
xr[i], xc[i] = ir, ic;
|
||||||
|
if a[ic][ic] == 0 then return -3 end -- singular matrix
|
||||||
|
local pivinv = 1 / a[ic][ic];
|
||||||
|
a[ic][ic] = 1;
|
||||||
|
for l = 1, n do a[ic][l] = a[ic][l] * pivinv end
|
||||||
|
if b then
|
||||||
|
for l = 1, n do b[ic][l] = b[ic][l] * pivinv end
|
||||||
|
end
|
||||||
|
for ll = 1, n do
|
||||||
|
if ll ~= ic then
|
||||||
|
local tmp = a[ll][ic];
|
||||||
|
a[ll][ic] = 0;
|
||||||
|
local all, aic = a[ll], a[ic];
|
||||||
|
for l = 1, n do all[l] = all[l] - aic[l] * tmp end
|
||||||
|
if b then
|
||||||
|
local bll, bic = b[ll], b[ic];
|
||||||
|
for l = 1, m do bll[l] = bll[l] - bic[l] * tmp end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
for l = n, 1, -1 do
|
||||||
|
if xr[l] ~= xc[l] then
|
||||||
|
for k = 1, n do a[k][xr[l]], a[k][xc[l]] = a[k][xc[l]], a[k][xr[l]] end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return 0;
|
||||||
|
end
|
||||||
|
|
@ -0,0 +1,72 @@
|
||||||
|
CC=gcc
|
||||||
|
CXX=g++
|
||||||
|
CFLAGS=-g -Wall -O2 -I..
|
||||||
|
CXXFLAGS=$(CFLAGS)
|
||||||
|
PROGS=kbtree_test khash_keith khash_keith2 khash_test klist_test kseq_test kseq_bench \
|
||||||
|
kseq_bench2 ksort_test ksort_test-stl kvec_test kmin_test kstring_bench kstring_bench2 kstring_test \
|
||||||
|
kavl_test kavl-lite_test kthread_test2
|
||||||
|
|
||||||
|
all:$(PROGS)
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -fr $(PROGS) *.dSYM a.out *.o
|
||||||
|
|
||||||
|
kavl_test:kavl_test.c ../kavl.h
|
||||||
|
$(CC) $(CFLAGS) -o $@ kavl_test.c
|
||||||
|
|
||||||
|
kavl-lite_test:kavl-lite_test.c ../kavl-lite.h
|
||||||
|
$(CC) $(CFLAGS) -o $@ kavl-lite_test.c
|
||||||
|
|
||||||
|
kbtree_test:kbtree_test.c ../kbtree.h
|
||||||
|
$(CC) $(CFLAGS) -o $@ kbtree_test.c
|
||||||
|
|
||||||
|
khash_keith:khash_keith.c ../khash.h
|
||||||
|
$(CC) $(CFLAGS) -o $@ khash_keith.c
|
||||||
|
|
||||||
|
khash_keith2:khash_keith2.c ../khash.h
|
||||||
|
$(CC) $(CFLAGS) -o $@ khash_keith2.c
|
||||||
|
|
||||||
|
khash_test:khash_test.c ../khash.h
|
||||||
|
$(CC) $(CFLAGS) -o $@ khash_test.c
|
||||||
|
|
||||||
|
klist_test:klist_test.c ../klist.h
|
||||||
|
$(CC) $(CFLAGS) -o $@ klist_test.c
|
||||||
|
|
||||||
|
kseq_test:kseq_test.c ../kseq.h
|
||||||
|
$(CC) $(CFLAGS) -o $@ kseq_test.c -lz
|
||||||
|
|
||||||
|
kseq_bench:kseq_bench.c ../kseq.h
|
||||||
|
$(CC) $(CFLAGS) -o $@ kseq_bench.c -lz
|
||||||
|
|
||||||
|
kseq_bench2:kseq_bench2.c ../kseq.h
|
||||||
|
$(CC) $(CFLAGS) -o $@ kseq_bench2.c -lz
|
||||||
|
|
||||||
|
ksort_test:ksort_test.c ../ksort.h
|
||||||
|
$(CC) $(CFLAGS) -o $@ ksort_test.c
|
||||||
|
|
||||||
|
ksort_test-stl:ksort_test.cc ../ksort.h
|
||||||
|
$(CXX) $(CXXFLAGS) -o $@ ksort_test.cc
|
||||||
|
|
||||||
|
kvec_test:kvec_test.cc ../kvec.h
|
||||||
|
$(CXX) $(CXXFLAGS) -o $@ kvec_test.cc
|
||||||
|
|
||||||
|
kmin_test:kmin_test.c ../kmath.h ../kmath.c
|
||||||
|
$(CC) $(CFLAGS) -o $@ kmin_test.c ../kmath.c
|
||||||
|
|
||||||
|
kstring_bench:kstring_bench.c ../kstring.h ../kstring.c
|
||||||
|
$(CC) $(CFLAGS) -o $@ kstring_bench.c ../kstring.c
|
||||||
|
|
||||||
|
kstring_bench2:kstring_bench2.c ../kstring.h ../kstring.c
|
||||||
|
$(CC) $(CFLAGS) -o $@ kstring_bench2.c ../kstring.c
|
||||||
|
|
||||||
|
kstring_test:kstring_test.c ../kstring.h ../kstring.c
|
||||||
|
$(CC) $(CFLAGS) -o $@ kstring_test.c ../kstring.c
|
||||||
|
|
||||||
|
kthread_test:kthread_test.c ../kthread.c
|
||||||
|
$(CC) $(CFLAGS) -fopenmp -o $@ kthread_test.c ../kthread.c
|
||||||
|
|
||||||
|
kthread_test2:kthread_test2.c ../kthread.c
|
||||||
|
$(CC) $(CFLAGS) -o $@ kthread_test2.c ../kthread.c
|
||||||
|
|
||||||
|
ketopt_test:ketopt_test.c ../ketopt.h
|
||||||
|
$(CC) $(CFLAGS) -o $@ ketopt_test.c
|
||||||
|
|
@ -0,0 +1,60 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "kavl-lite.h"
|
||||||
|
|
||||||
|
#define CALLOC(type, num) ((type*)calloc(num, sizeof(type)))
|
||||||
|
|
||||||
|
struct my_node {
|
||||||
|
int key;
|
||||||
|
KAVLL_HEAD(struct my_node) head;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define my_cmp(p, q) (((p)->key > (q)->key) - ((p)->key < (q)->key))
|
||||||
|
KAVLL_INIT(my, struct my_node, head, my_cmp)
|
||||||
|
|
||||||
|
void shuffle(int n, char a[])
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = n; i > 1; --i) {
|
||||||
|
char tmp;
|
||||||
|
j = (int)(drand48() * i);
|
||||||
|
tmp = a[j]; a[j] = a[i-1]; a[i-1] = tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
char buf[256];
|
||||||
|
int i, n;
|
||||||
|
struct my_node *root = 0;
|
||||||
|
struct my_node *p, *q, t;
|
||||||
|
my_itr_t itr;
|
||||||
|
|
||||||
|
for (i = 33, n = 0; i <= 126; ++i)
|
||||||
|
if (i != '(' && i != ')' && i != '.' && i != ';')
|
||||||
|
buf[n++] = i;
|
||||||
|
shuffle(n, buf);
|
||||||
|
for (i = 0; i < n; ++i) {
|
||||||
|
p = CALLOC(struct my_node, 1);
|
||||||
|
p->key = buf[i];
|
||||||
|
q = my_insert(&root, p);
|
||||||
|
if (p != q) free(p);
|
||||||
|
}
|
||||||
|
shuffle(n, buf);
|
||||||
|
for (i = 0; i < n/2; ++i) {
|
||||||
|
t.key = buf[i];
|
||||||
|
q = my_erase(&root, &t);
|
||||||
|
if (q) free(q);
|
||||||
|
}
|
||||||
|
|
||||||
|
my_itr_first(root, &itr);
|
||||||
|
do {
|
||||||
|
const struct my_node *r = kavll_at(&itr);
|
||||||
|
putchar(r->key);
|
||||||
|
free((void*)r);
|
||||||
|
} while (my_itr_next(&itr));
|
||||||
|
putchar('\n');
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,104 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "kavl.h"
|
||||||
|
|
||||||
|
#define CALLOC(type, num) ((type*)calloc(num, sizeof(type)))
|
||||||
|
|
||||||
|
struct my_node {
|
||||||
|
int key;
|
||||||
|
KAVL_HEAD(struct my_node) head;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define my_cmp(p, q) (((p)->key > (q)->key) - ((p)->key < (q)->key))
|
||||||
|
KAVL_INIT(my, struct my_node, head, my_cmp)
|
||||||
|
|
||||||
|
int check(struct my_node *p, int *hh)
|
||||||
|
{
|
||||||
|
int c = 1, h[2] = {0, 0};
|
||||||
|
*hh = 0;
|
||||||
|
if (p) {
|
||||||
|
if (p->head.p[0]) c += check(p->head.p[0], &h[0]);
|
||||||
|
if (p->head.p[1]) c += check(p->head.p[1], &h[1]);
|
||||||
|
*hh = (h[0] > h[1]? h[0] : h[1]) + 1;
|
||||||
|
if (h[1] - h[0] != (int)p->head.balance)
|
||||||
|
fprintf(stderr, "%d - %d != %d at %c\n", h[1], h[0], p->head.balance, p->key);
|
||||||
|
if (c != (int)p->head.size)
|
||||||
|
fprintf(stderr, "%d != %d at %c\n", p->head.size, c, p->key);
|
||||||
|
return c;
|
||||||
|
} else return 0;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
int print_tree(const struct my_node *p)
|
||||||
|
{
|
||||||
|
int c = 1;
|
||||||
|
if (p == 0) return 0;
|
||||||
|
if (p->head.p[0] || p->head.p[1]) {
|
||||||
|
putchar('(');
|
||||||
|
if (p->head.p[0]) c += print_tree(p->head.p[0]);
|
||||||
|
else putchar('.');
|
||||||
|
putchar(',');
|
||||||
|
if (p->head.p[1]) c += print_tree(p->head.p[1]);
|
||||||
|
else putchar('.');
|
||||||
|
putchar(')');
|
||||||
|
}
|
||||||
|
putchar(p->key);
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
void check_and_print(struct my_node *root)
|
||||||
|
{
|
||||||
|
int h;
|
||||||
|
check(root, &h);
|
||||||
|
print_tree(root);
|
||||||
|
putchar('\n');
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
void shuffle(int n, char a[])
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = n; i > 1; --i) {
|
||||||
|
char tmp;
|
||||||
|
j = (int)(drand48() * i);
|
||||||
|
tmp = a[j]; a[j] = a[i-1]; a[i-1] = tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
char buf[256];
|
||||||
|
int i, n, h;
|
||||||
|
struct my_node *root = 0;
|
||||||
|
struct my_node *p, *q, t;
|
||||||
|
kavl_itr_t(my) itr;
|
||||||
|
unsigned cnt;
|
||||||
|
|
||||||
|
for (i = 33, n = 0; i <= 126; ++i)
|
||||||
|
if (i != '(' && i != ')' && i != '.' && i != ';')
|
||||||
|
buf[n++] = i;
|
||||||
|
shuffle(n, buf);
|
||||||
|
for (i = 0; i < n; ++i) {
|
||||||
|
p = CALLOC(struct my_node, 1);
|
||||||
|
p->key = buf[i];
|
||||||
|
q = kavl_insert(my, &root, p, &cnt);
|
||||||
|
if (p != q) free(p);
|
||||||
|
check(root, &h);
|
||||||
|
}
|
||||||
|
shuffle(n, buf);
|
||||||
|
for (i = 0; i < n/2; ++i) {
|
||||||
|
t.key = buf[i];
|
||||||
|
q = kavl_erase(my, &root, &t, 0);
|
||||||
|
if (q) free(q);
|
||||||
|
check(root, &h);
|
||||||
|
}
|
||||||
|
|
||||||
|
kavl_itr_first(my, root, &itr);
|
||||||
|
do {
|
||||||
|
const struct my_node *r = kavl_at(&itr);
|
||||||
|
putchar(r->key);
|
||||||
|
free((void*)r);
|
||||||
|
} while (kavl_itr_next(my, &itr));
|
||||||
|
putchar('\n');
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,137 @@
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <emmintrin.h>
|
||||||
|
#include "kbit.h"
|
||||||
|
|
||||||
|
// from bowtie-0.9.8.1
|
||||||
|
inline static int bt1_pop64(uint64_t x) // the kbi_popcount64() equivalence; similar to popcount_2() in wiki
|
||||||
|
{
|
||||||
|
x -= ((x >> 1) & 0x5555555555555555llu);
|
||||||
|
x = (x & 0x3333333333333333llu) + ((x >> 2) & 0x3333333333333333llu);
|
||||||
|
x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0Fllu;
|
||||||
|
x = x + (x >> 8);
|
||||||
|
x = x + (x >> 16);
|
||||||
|
x = x + (x >> 32);
|
||||||
|
return x & 0x3F;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline static int bt1_countInU64(uint64_t dw, int c) // the kbi_DNAcount64() equivalence
|
||||||
|
{
|
||||||
|
uint64_t dwA = dw & 0xAAAAAAAAAAAAAAAAllu;
|
||||||
|
uint64_t dwNA = dw & ~0xAAAAAAAAAAAAAAAAllu;
|
||||||
|
uint64_t tmp;
|
||||||
|
switch (c) {
|
||||||
|
case 0: tmp = (dwA >> 1) | dwNA; break;
|
||||||
|
case 1: tmp = ~(dwA >> 1) & dwNA; break;
|
||||||
|
case 2: tmp = (dwA >> 1) & ~dwNA; break;
|
||||||
|
default: tmp = (dwA >> 1) & dwNA;
|
||||||
|
}
|
||||||
|
tmp = bt1_pop64(tmp);
|
||||||
|
if (c == 0) tmp = 32 - tmp;
|
||||||
|
return (int)tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
// from bigmagic
|
||||||
|
static uint32_t sse2_bit_count32(const __m128i* block, const __m128i* block_end)
|
||||||
|
{
|
||||||
|
const unsigned mu1 = 0x55555555;
|
||||||
|
const unsigned mu2 = 0x33333333;
|
||||||
|
const unsigned mu3 = 0x0F0F0F0F;
|
||||||
|
const unsigned mu4 = 0x0000003F;
|
||||||
|
|
||||||
|
uint32_t tcnt[4];
|
||||||
|
|
||||||
|
// Loading masks
|
||||||
|
__m128i m1 = _mm_set_epi32 (mu1, mu1, mu1, mu1);
|
||||||
|
__m128i m2 = _mm_set_epi32 (mu2, mu2, mu2, mu2);
|
||||||
|
__m128i m3 = _mm_set_epi32 (mu3, mu3, mu3, mu3);
|
||||||
|
__m128i m4 = _mm_set_epi32 (mu4, mu4, mu4, mu4);
|
||||||
|
__m128i mcnt;
|
||||||
|
mcnt = _mm_xor_si128(m1, m1); // cnt = 0
|
||||||
|
|
||||||
|
__m128i tmp1, tmp2;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
__m128i b = _mm_load_si128(block);
|
||||||
|
++block;
|
||||||
|
|
||||||
|
// b = (b & 0x55555555) + (b >> 1 & 0x55555555);
|
||||||
|
tmp1 = _mm_srli_epi32(b, 1); // tmp1 = (b >> 1 & 0x55555555)
|
||||||
|
tmp1 = _mm_and_si128(tmp1, m1);
|
||||||
|
tmp2 = _mm_and_si128(b, m1); // tmp2 = (b & 0x55555555)
|
||||||
|
b = _mm_add_epi32(tmp1, tmp2); // b = tmp1 + tmp2
|
||||||
|
|
||||||
|
// b = (b & 0x33333333) + (b >> 2 & 0x33333333);
|
||||||
|
tmp1 = _mm_srli_epi32(b, 2); // (b >> 2 & 0x33333333)
|
||||||
|
tmp1 = _mm_and_si128(tmp1, m2);
|
||||||
|
tmp2 = _mm_and_si128(b, m2); // (b & 0x33333333)
|
||||||
|
b = _mm_add_epi32(tmp1, tmp2); // b = tmp1 + tmp2
|
||||||
|
|
||||||
|
// b = (b + (b >> 4)) & 0x0F0F0F0F;
|
||||||
|
tmp1 = _mm_srli_epi32(b, 4); // tmp1 = b >> 4
|
||||||
|
b = _mm_add_epi32(b, tmp1); // b = b + (b >> 4)
|
||||||
|
b = _mm_and_si128(b, m3); // & 0x0F0F0F0F
|
||||||
|
|
||||||
|
// b = b + (b >> 8);
|
||||||
|
tmp1 = _mm_srli_epi32 (b, 8); // tmp1 = b >> 8
|
||||||
|
b = _mm_add_epi32(b, tmp1); // b = b + (b >> 8)
|
||||||
|
|
||||||
|
// b = (b + (b >> 16)) & 0x0000003F;
|
||||||
|
tmp1 = _mm_srli_epi32 (b, 16); // b >> 16
|
||||||
|
b = _mm_add_epi32(b, tmp1); // b + (b >> 16)
|
||||||
|
b = _mm_and_si128(b, m4); // (b >> 16) & 0x0000003F;
|
||||||
|
|
||||||
|
mcnt = _mm_add_epi32(mcnt, b); // mcnt += b
|
||||||
|
|
||||||
|
} while (block < block_end);
|
||||||
|
|
||||||
|
_mm_store_si128((__m128i*)tcnt, mcnt);
|
||||||
|
|
||||||
|
return tcnt[0] + tcnt[1] + tcnt[2] + tcnt[3];
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
int i, N = 100000000;
|
||||||
|
uint64_t *x, cnt;
|
||||||
|
clock_t t;
|
||||||
|
int c = 1;
|
||||||
|
|
||||||
|
x = (uint64_t*)calloc(N, 8);
|
||||||
|
srand48(11);
|
||||||
|
for (i = 0; i < N; ++i)
|
||||||
|
x[i] = (uint64_t)lrand48() << 32 ^ lrand48();
|
||||||
|
|
||||||
|
fprintf(stderr, "\n===> Calculate # of 1 in an integer (popcount) <===\n");
|
||||||
|
|
||||||
|
t = clock(); cnt = 0;
|
||||||
|
for (i = 0; i < N; ++i) cnt += kbi_popcount64(x[i]);
|
||||||
|
fprintf(stderr, "%20s\t%20ld\t%10.6f\n", "kbit", (long)cnt, (double)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
|
||||||
|
t = clock(); cnt = 0;
|
||||||
|
for (i = 0; i < N; ++i) cnt += bt1_pop64(x[i]);
|
||||||
|
fprintf(stderr, "%20s\t%20ld\t%10.6f\n", "wiki-popcount_2", (long)cnt, (double)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
|
||||||
|
t = clock(); cnt = 0;
|
||||||
|
for (i = 0; i < N; ++i) cnt += __builtin_popcountl(x[i]);
|
||||||
|
fprintf(stderr, "%20s\t%20ld\t%10.6f\n", "__builtin_popcountl", (long)cnt, (double)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
|
||||||
|
t = clock(); cnt = 0;
|
||||||
|
cnt += sse2_bit_count32((__m128i*)x, (__m128i*)(x+N));
|
||||||
|
fprintf(stderr, "%20s\t%20ld\t%10.6f\n", "SSE2-32bit", (long)cnt, (double)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
|
||||||
|
fprintf(stderr, "\n===> Count '%c' in 2-bit encoded integers <===\n", "ACGT"[c]);
|
||||||
|
|
||||||
|
t = clock(); cnt = 0;
|
||||||
|
for (i = 0; i < N; ++i) cnt += kbi_DNAcount64(x[i], c);
|
||||||
|
fprintf(stderr, "%20s\t%20ld\t%10.6f\n", "kbit", (long)cnt, (double)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
|
||||||
|
t = clock(); cnt = 0;
|
||||||
|
for (i = 0; i < N; ++i) cnt += bt1_countInU64(x[i], c);
|
||||||
|
fprintf(stderr, "%20s\t%20ld\t%10.6f\n", "bowtie1", (long)cnt, (double)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
free(x);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,94 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
typedef const char *str_t;
|
||||||
|
|
||||||
|
#include "kbtree.h"
|
||||||
|
KBTREE_INIT(int, uint32_t, kb_generic_cmp)
|
||||||
|
KBTREE_INIT(str, str_t, kb_str_cmp)
|
||||||
|
|
||||||
|
static int data_size = 5000000;
|
||||||
|
static unsigned *int_data;
|
||||||
|
static char **str_data;
|
||||||
|
|
||||||
|
void ht_init_data()
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
char buf[256];
|
||||||
|
printf("--- generating data... ");
|
||||||
|
srand48(11);
|
||||||
|
int_data = (unsigned*)calloc(data_size, sizeof(unsigned));
|
||||||
|
str_data = (char**)calloc(data_size, sizeof(char*));
|
||||||
|
for (i = 0; i < data_size; ++i) {
|
||||||
|
int_data[i] = (unsigned)(data_size * drand48() / 4) * 271828183u;
|
||||||
|
sprintf(buf, "%x", int_data[i]);
|
||||||
|
str_data[i] = strdup(buf);
|
||||||
|
}
|
||||||
|
printf("done!\n");
|
||||||
|
}
|
||||||
|
void ht_destroy_data()
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < data_size; ++i) free(str_data[i]);
|
||||||
|
free(str_data); free(int_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ht_khash_int()
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
unsigned *data = int_data;
|
||||||
|
uint32_t *l, *u;
|
||||||
|
kbtree_t(int) *h;
|
||||||
|
|
||||||
|
h = kb_init(int, KB_DEFAULT_SIZE);
|
||||||
|
for (i = 0; i < data_size; ++i) {
|
||||||
|
if (kb_get(int, h, data[i]) == 0) kb_put(int, h, data[i]);
|
||||||
|
else kb_del(int, h, data[i]);
|
||||||
|
}
|
||||||
|
printf("[ht_khash_int] size: %d\n", kb_size(h));
|
||||||
|
if (1) {
|
||||||
|
int cnt = 0;
|
||||||
|
uint32_t x, y;
|
||||||
|
kb_interval(int, h, 2174625464u, &l, &u);
|
||||||
|
printf("interval for 2174625464: (%u, %u)\n", l? *l : 0, u? *u : 0);
|
||||||
|
#define traverse_f(p) { if (cnt == 0) y = *p; ++cnt; }
|
||||||
|
__kb_traverse(uint32_t, h, traverse_f);
|
||||||
|
__kb_get_first(uint32_t, h, x);
|
||||||
|
printf("# of elements from traversal: %d\n", cnt);
|
||||||
|
printf("first element: %d == %d\n", x, y);
|
||||||
|
}
|
||||||
|
__kb_destroy(h);
|
||||||
|
}
|
||||||
|
void ht_khash_str()
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
char **data = str_data;
|
||||||
|
kbtree_t(str) *h;
|
||||||
|
|
||||||
|
h = kb_init(str, KB_DEFAULT_SIZE);
|
||||||
|
for (i = 0; i < data_size; ++i) {
|
||||||
|
if (kb_get(str, h, data[i]) == 0) kb_put(str, h, data[i]);
|
||||||
|
else kb_del(str, h, data[i]);
|
||||||
|
}
|
||||||
|
printf("[ht_khash_int] size: %d\n", kb_size(h));
|
||||||
|
__kb_destroy(h);
|
||||||
|
}
|
||||||
|
void ht_timing(void (*f)(void))
|
||||||
|
{
|
||||||
|
clock_t t = clock();
|
||||||
|
(*f)();
|
||||||
|
printf("[ht_timing] %.3lf sec\n", (double)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
}
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
if (argc > 1) data_size = atoi(argv[1]);
|
||||||
|
ht_init_data();
|
||||||
|
ht_timing(ht_khash_int);
|
||||||
|
ht_timing(ht_khash_str);
|
||||||
|
ht_destroy_data();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,89 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <getopt.h>
|
||||||
|
#include "ketopt.h"
|
||||||
|
|
||||||
|
static void test_opt(int c, int opt, const char *arg)
|
||||||
|
{
|
||||||
|
if (c == 'x') fprintf(stderr, "-x\n");
|
||||||
|
else if (c == 'y') fprintf(stderr, "-y %s\n", arg);
|
||||||
|
else if (c == 301) fprintf(stderr, "--foo\n");
|
||||||
|
else if (c == 302) fprintf(stderr, "--bar %s\n", arg? arg : "(null)");
|
||||||
|
else if (c == 303) fprintf(stderr, "--opt %s\n", arg? arg : "(null)");
|
||||||
|
else if (c == '?') fprintf(stderr, "unknown option -%c\n", opt? opt : ':');
|
||||||
|
else if (c == ':') fprintf(stderr, "missing option argument: -%c\n", opt? opt : ':');
|
||||||
|
}
|
||||||
|
|
||||||
|
static void print_cmd(int argc, char *argv[], int ind)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
fprintf(stderr, "CMD: %s", argv[0]);
|
||||||
|
if (ind > 1) {
|
||||||
|
fputs(" [", stderr);
|
||||||
|
for (i = 1; i < ind; ++i) {
|
||||||
|
if (i != 1) fputc(' ', stderr);
|
||||||
|
fputs(argv[i], stderr);
|
||||||
|
}
|
||||||
|
fputc(']', stderr);
|
||||||
|
}
|
||||||
|
for (i = ind; i < argc; ++i)
|
||||||
|
fprintf(stderr, " %s", argv[i]);
|
||||||
|
fputc('\n', stderr);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_ketopt(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
static ko_longopt_t longopts[] = {
|
||||||
|
{ "foo", ko_no_argument, 301 },
|
||||||
|
{ "bar", ko_required_argument, 302 },
|
||||||
|
{ "opt", ko_optional_argument, 303 },
|
||||||
|
{ NULL, 0, 0 }
|
||||||
|
};
|
||||||
|
ketopt_t opt = KETOPT_INIT;
|
||||||
|
int c;
|
||||||
|
fprintf(stderr, "===> ketopt() <===\n");
|
||||||
|
while ((c = ketopt(&opt, argc, argv, 1, "xy:", longopts)) >= 0)
|
||||||
|
test_opt(c, opt.opt, opt.arg);
|
||||||
|
print_cmd(argc, argv, opt.ind);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_getopt(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
static struct option long_options[] = {
|
||||||
|
{ "foo", no_argument, 0, 301 },
|
||||||
|
{ "bar", required_argument, 0, 302 },
|
||||||
|
{ "opt", optional_argument, 0, 303 },
|
||||||
|
{0, 0, 0, 0}
|
||||||
|
};
|
||||||
|
int c, option_index;
|
||||||
|
fprintf(stderr, "===> getopt() <===\n");
|
||||||
|
while ((c = getopt_long(argc, argv, ":xy:", long_options, &option_index)) >= 0)
|
||||||
|
test_opt(c, optopt, optarg);
|
||||||
|
print_cmd(argc, argv, optind);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
char **argv2;
|
||||||
|
if (argc == 1) {
|
||||||
|
fprintf(stderr, "Usage: ketopt_test [options] <argument> [...]\n");
|
||||||
|
fprintf(stderr, "Options:\n");
|
||||||
|
fprintf(stderr, " -x no argument\n");
|
||||||
|
fprintf(stderr, " -y STR required argument\n");
|
||||||
|
fprintf(stderr, " --foo no argument\n");
|
||||||
|
fprintf(stderr, " --bar=STR required argument\n");
|
||||||
|
fprintf(stderr, " --opt[=STR] optional argument\n");
|
||||||
|
fprintf(stderr, "\nExamples:\n");
|
||||||
|
fprintf(stderr, " ketopt_test -xy1 -x arg1 -y -x -- arg2 -x\n");
|
||||||
|
fprintf(stderr, " ketopt_test --foo --bar=1 --bar 2 --opt arg1 --opt=3\n");
|
||||||
|
fprintf(stderr, " ketopt_test arg1 -y\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
argv2 = (char**)malloc(sizeof(char*) * argc);
|
||||||
|
for (i = 0; i < argc; ++i) argv2[i] = argv[i];
|
||||||
|
test_ketopt(argc, argv);
|
||||||
|
test_getopt(argc, argv2);
|
||||||
|
free(argv2);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,26 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "kgraph.h"
|
||||||
|
|
||||||
|
KHASH_INIT2(e32, extern, uint32_t, int, 1, kh_int_hash_func, kh_int_hash_equal)
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int i;
|
||||||
|
khash_t(e32) *_arc;
|
||||||
|
} vertex_t;
|
||||||
|
|
||||||
|
KGRAPH_INIT(g, extern, vertex_t, int, e32)
|
||||||
|
KGRAPH_PRINT(g, extern)
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
int *pb, *pe;
|
||||||
|
kgraph_t(g) *g;
|
||||||
|
g = kg_init_g();
|
||||||
|
kg_put_a_g(g, 10, 20, 0, &pb, &pe);
|
||||||
|
kg_put_a_g(g, 20, 30, 0, &pb, &pe);
|
||||||
|
kg_put_a_g(g, 30, 10, 1, &pb, &pe);
|
||||||
|
kg_del_v_g(g, 20);
|
||||||
|
kg_print_g(g);
|
||||||
|
kg_destroy_g(g);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,95 @@
|
||||||
|
/*
|
||||||
|
* This is an optimized version of the following C++ program:
|
||||||
|
*
|
||||||
|
* http://keithlea.com/javabench/src/cpp/hash.cpp
|
||||||
|
*
|
||||||
|
* Keith in his benchmark (http://keithlea.com/javabench/data) showed that the
|
||||||
|
* Java implementation is twice as fast as the C++ version. In fact, this is
|
||||||
|
* only because the C++ implementation is substandard. Most importantly, Keith
|
||||||
|
* is using "sprintf()" to convert an integer to a string, which is known to be
|
||||||
|
* extremely inefficient.
|
||||||
|
*/
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "khash.h"
|
||||||
|
KHASH_MAP_INIT_STR(str, int)
|
||||||
|
|
||||||
|
inline void int2str(int c, int base, char *ret)
|
||||||
|
{
|
||||||
|
const char *tab = "0123456789abcdef";
|
||||||
|
if (c == 0) ret[0] = '0', ret[1] = 0;
|
||||||
|
else {
|
||||||
|
int l, x, y;
|
||||||
|
char buf[16];
|
||||||
|
for (l = 0, x = c < 0? -c : c; x > 0; x /= base) buf[l++] = tab[x%base];
|
||||||
|
if (c < 0) buf[l++] = '-';
|
||||||
|
for (x = l - 1, y = 0; x >= 0; --x) ret[y++] = buf[x];
|
||||||
|
ret[y] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef _USE_STRDUP
|
||||||
|
#define BLOCK_SIZE 0x100000
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
char **mem = 0;
|
||||||
|
int i, l, n = 1000000, ret, block_end = 0, curr = 0, c = 0;
|
||||||
|
khash_t(str) *h;
|
||||||
|
h = kh_init(str);
|
||||||
|
if (argc > 1) n = atoi(argv[1]);
|
||||||
|
mem = malloc(sizeof(void*));
|
||||||
|
mem[0] = malloc(BLOCK_SIZE); // memory buffer to avoid memory fragmentation
|
||||||
|
curr = block_end = 0;
|
||||||
|
for (i = 1; i <= n; ++i) {
|
||||||
|
char buf[16];
|
||||||
|
int2str(i, 16, buf);
|
||||||
|
khint_t k = kh_put(str, h, buf, &ret);
|
||||||
|
l = strlen(buf) + 1;
|
||||||
|
if (block_end + l > BLOCK_SIZE) {
|
||||||
|
++curr; block_end = 0;
|
||||||
|
mem = realloc(mem, (curr + 1) * sizeof(void*));
|
||||||
|
mem[curr] = malloc(BLOCK_SIZE);
|
||||||
|
}
|
||||||
|
memcpy(mem[curr] + block_end, buf, l);
|
||||||
|
kh_key(h, k) = mem[curr] + block_end;
|
||||||
|
block_end += l;
|
||||||
|
kh_val(h, k) = i;
|
||||||
|
}
|
||||||
|
for (i = 1; i <= n; ++i) {
|
||||||
|
char buf[16];
|
||||||
|
int2str(i, 10, buf);
|
||||||
|
khint_t k = kh_get(str, h, buf);
|
||||||
|
if (k != kh_end(h)) ++c;
|
||||||
|
}
|
||||||
|
printf("%d\n", c);
|
||||||
|
for (ret = 0; ret <= curr; ++ret) free(mem[ret]);
|
||||||
|
free(mem);
|
||||||
|
kh_destroy(str, h);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#else // _USE_STRDUP
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i, l, n = 1000000, ret, c = 0;
|
||||||
|
khash_t(str) *h;
|
||||||
|
khint_t k;
|
||||||
|
h = kh_init(str);
|
||||||
|
if (argc > 1) n = atoi(argv[1]);
|
||||||
|
for (i = 1; i <= n; ++i) {
|
||||||
|
char buf[16];
|
||||||
|
int2str(i, 16, buf);
|
||||||
|
k = kh_put(str, h, strdup(buf), &ret);
|
||||||
|
kh_val(h, k) = i;
|
||||||
|
}
|
||||||
|
for (i = 1; i <= n; ++i) {
|
||||||
|
char buf[16];
|
||||||
|
int2str(i, 10, buf);
|
||||||
|
k = kh_get(str, h, buf);
|
||||||
|
if (k != kh_end(h)) ++c;
|
||||||
|
}
|
||||||
|
for (k = kh_begin(h); k != kh_end(h); ++k) // explicitly freeing memory takes 10-20% CPU time.
|
||||||
|
if (kh_exist(h, k)) free((char*)kh_key(h, k));
|
||||||
|
printf("%d\n", c);
|
||||||
|
kh_destroy(str, h);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,67 @@
|
||||||
|
/*
|
||||||
|
* This is an optimized version of the following C++ program:
|
||||||
|
*
|
||||||
|
* http://keithlea.com/javabench/src/cpp/hash.cpp
|
||||||
|
*
|
||||||
|
* Keith in his benchmark (http://keithlea.com/javabench/data) showed that the
|
||||||
|
* Java implementation is twice as fast as the C++ version. In fact, this is
|
||||||
|
* only because the C++ implementation is substandard. Most importantly, Keith
|
||||||
|
* is using "sprintf()" to convert an integer to a string, which is known to be
|
||||||
|
* extremely inefficient.
|
||||||
|
*/
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "khash.h"
|
||||||
|
KHASH_MAP_INIT_STR(str, int)
|
||||||
|
|
||||||
|
inline void int2str(int c, int base, char *ret)
|
||||||
|
{
|
||||||
|
const char *tab = "0123456789abcdef";
|
||||||
|
if (c == 0) ret[0] = '0', ret[1] = 0;
|
||||||
|
else {
|
||||||
|
int l, x, y;
|
||||||
|
char buf[16];
|
||||||
|
for (l = 0, x = c < 0? -c : c; x > 0; x /= base) buf[l++] = tab[x%base];
|
||||||
|
if (c < 0) buf[l++] = '-';
|
||||||
|
for (x = l - 1, y = 0; x >= 0; --x) ret[y++] = buf[x];
|
||||||
|
ret[y] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i, l, n = 1000, ret;
|
||||||
|
khash_t(str) *h, *h2;
|
||||||
|
khint_t k;
|
||||||
|
h = kh_init(str);
|
||||||
|
h2 = kh_init(str);
|
||||||
|
if (argc > 1) n = atoi(argv[1]);
|
||||||
|
for (i = 0; i < 10000; ++i) {
|
||||||
|
char buf[32];
|
||||||
|
strcpy(buf, "foo_");
|
||||||
|
int2str(i, 10, buf+4);
|
||||||
|
k = kh_put(str, h, strdup(buf), &ret);
|
||||||
|
kh_val(h, k) = i;
|
||||||
|
}
|
||||||
|
for (i = 0; i < n; ++i) {
|
||||||
|
for (k = kh_begin(h); k != kh_end(h); ++k) {
|
||||||
|
if (kh_exist(h, k)) {
|
||||||
|
khint_t k2 = kh_put(str, h2, kh_key(h, k), &ret);
|
||||||
|
if (ret) { // absent
|
||||||
|
kh_key(h2, k2) = strdup(kh_key(h, k));
|
||||||
|
kh_val(h2, k2) = kh_val(h, k);
|
||||||
|
} else kh_val(h2, k2) += kh_val(h, k);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
k = kh_get(str, h, "foo_1"); printf("%d", kh_val(h, k));
|
||||||
|
k = kh_get(str, h, "foo_9999"); printf(" %d", kh_val(h, k));
|
||||||
|
k = kh_get(str, h2, "foo_1"); printf(" %d", kh_val(h2, k));
|
||||||
|
k = kh_get(str, h2, "foo_9999"); printf(" %d\n", kh_val(h2, k));
|
||||||
|
for (k = kh_begin(h); k != kh_end(h); ++k)
|
||||||
|
if (kh_exist(h, k)) free((char*)kh_key(h, k));
|
||||||
|
for (k = kh_begin(h2); k != kh_end(h2); ++k)
|
||||||
|
if (kh_exist(h2, k)) free((char*)kh_key(h2, k));
|
||||||
|
kh_destroy(str, h);
|
||||||
|
kh_destroy(str, h2);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,141 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "khash.h"
|
||||||
|
KHASH_SET_INIT_STR(str)
|
||||||
|
KHASH_MAP_INIT_INT(int, unsigned char)
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
unsigned key;
|
||||||
|
unsigned char val;
|
||||||
|
} int_unpack_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
unsigned key;
|
||||||
|
unsigned char val;
|
||||||
|
} __attribute__ ((__packed__)) int_packed_t;
|
||||||
|
|
||||||
|
#define hash_eq(a, b) ((a).key == (b).key)
|
||||||
|
#define hash_func(a) ((a).key)
|
||||||
|
|
||||||
|
KHASH_INIT(iun, int_unpack_t, char, 0, hash_func, hash_eq)
|
||||||
|
KHASH_INIT(ipk, int_packed_t, char, 0, hash_func, hash_eq)
|
||||||
|
|
||||||
|
static int data_size = 5000000;
|
||||||
|
static unsigned *int_data;
|
||||||
|
static char **str_data;
|
||||||
|
|
||||||
|
void ht_init_data()
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
char buf[256];
|
||||||
|
khint32_t x = 11;
|
||||||
|
printf("--- generating data... ");
|
||||||
|
int_data = (unsigned*)calloc(data_size, sizeof(unsigned));
|
||||||
|
str_data = (char**)calloc(data_size, sizeof(char*));
|
||||||
|
for (i = 0; i < data_size; ++i) {
|
||||||
|
int_data[i] = (unsigned)(data_size * ((double)x / UINT_MAX) / 4) * 271828183u;
|
||||||
|
sprintf(buf, "%x", int_data[i]);
|
||||||
|
str_data[i] = strdup(buf);
|
||||||
|
x = 1664525L * x + 1013904223L;
|
||||||
|
}
|
||||||
|
printf("done!\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void ht_destroy_data()
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < data_size; ++i) free(str_data[i]);
|
||||||
|
free(str_data); free(int_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ht_khash_int()
|
||||||
|
{
|
||||||
|
int i, ret;
|
||||||
|
unsigned *data = int_data;
|
||||||
|
khash_t(int) *h;
|
||||||
|
unsigned k;
|
||||||
|
|
||||||
|
h = kh_init(int);
|
||||||
|
for (i = 0; i < data_size; ++i) {
|
||||||
|
k = kh_put(int, h, data[i], &ret);
|
||||||
|
kh_val(h, k) = i&0xff;
|
||||||
|
if (!ret) kh_del(int, h, k);
|
||||||
|
}
|
||||||
|
printf("[ht_khash_int] size: %u\n", kh_size(h));
|
||||||
|
kh_destroy(int, h);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ht_khash_str()
|
||||||
|
{
|
||||||
|
int i, ret;
|
||||||
|
char **data = str_data;
|
||||||
|
khash_t(str) *h;
|
||||||
|
unsigned k;
|
||||||
|
|
||||||
|
h = kh_init(str);
|
||||||
|
for (i = 0; i < data_size; ++i) {
|
||||||
|
k = kh_put(str, h, data[i], &ret);
|
||||||
|
if (!ret) kh_del(str, h, k);
|
||||||
|
}
|
||||||
|
printf("[ht_khash_int] size: %u\n", kh_size(h));
|
||||||
|
kh_destroy(str, h);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ht_khash_unpack()
|
||||||
|
{
|
||||||
|
int i, ret;
|
||||||
|
unsigned *data = int_data;
|
||||||
|
khash_t(iun) *h;
|
||||||
|
unsigned k;
|
||||||
|
|
||||||
|
h = kh_init(iun);
|
||||||
|
for (i = 0; i < data_size; ++i) {
|
||||||
|
int_unpack_t x;
|
||||||
|
x.key = data[i]; x.val = i&0xff;
|
||||||
|
k = kh_put(iun, h, x, &ret);
|
||||||
|
if (!ret) kh_del(iun, h, k);
|
||||||
|
}
|
||||||
|
printf("[ht_khash_unpack] size: %u (sizeof=%ld)\n", kh_size(h), sizeof(int_unpack_t));
|
||||||
|
kh_destroy(iun, h);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ht_khash_packed()
|
||||||
|
{
|
||||||
|
int i, ret;
|
||||||
|
unsigned *data = int_data;
|
||||||
|
khash_t(ipk) *h;
|
||||||
|
unsigned k;
|
||||||
|
|
||||||
|
h = kh_init(ipk);
|
||||||
|
for (i = 0; i < data_size; ++i) {
|
||||||
|
int_packed_t x;
|
||||||
|
x.key = data[i]; x.val = i&0xff;
|
||||||
|
k = kh_put(ipk, h, x, &ret);
|
||||||
|
if (!ret) kh_del(ipk, h, k);
|
||||||
|
}
|
||||||
|
printf("[ht_khash_packed] size: %u (sizeof=%ld)\n", kh_size(h), sizeof(int_packed_t));
|
||||||
|
kh_destroy(ipk, h);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ht_timing(void (*f)(void))
|
||||||
|
{
|
||||||
|
clock_t t = clock();
|
||||||
|
(*f)();
|
||||||
|
printf("[ht_timing] %.3lf sec\n", (double)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
if (argc > 1) data_size = atoi(argv[1]);
|
||||||
|
ht_init_data();
|
||||||
|
ht_timing(ht_khash_int);
|
||||||
|
ht_timing(ht_khash_str);
|
||||||
|
ht_timing(ht_khash_unpack);
|
||||||
|
ht_timing(ht_khash_packed);
|
||||||
|
ht_destroy_data();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,19 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "klist.h"
|
||||||
|
|
||||||
|
#define __int_free(x)
|
||||||
|
KLIST_INIT(32, int, __int_free)
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
klist_t(32) *kl;
|
||||||
|
kliter_t(32) *p;
|
||||||
|
kl = kl_init(32);
|
||||||
|
*kl_pushp(32, kl) = 1;
|
||||||
|
*kl_pushp(32, kl) = 10;
|
||||||
|
kl_shift(32, kl, 0);
|
||||||
|
for (p = kl_begin(kl); p != kl_end(kl); p = kl_next(p))
|
||||||
|
printf("%d\n", kl_val(p));
|
||||||
|
kl_destroy(32, kl);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,48 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include "kmath.h"
|
||||||
|
|
||||||
|
static int n_evals;
|
||||||
|
|
||||||
|
double f_Chebyquad(int n, double *x, void *data)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
double y[20][20], f;
|
||||||
|
int np, iw;
|
||||||
|
double sum;
|
||||||
|
for (j = 0; j != n; ++j) {
|
||||||
|
y[0][j] = 1.;
|
||||||
|
y[1][j] = 2. * x[j] - 1.;
|
||||||
|
}
|
||||||
|
for (i = 1; i != n; ++i)
|
||||||
|
for (j = 0; j != n; ++j)
|
||||||
|
y[i+1][j] = 2. * y[1][j] * y[i][j] - y[i-1][j];
|
||||||
|
f = 0.;
|
||||||
|
np = n + 1;
|
||||||
|
iw = 1;
|
||||||
|
for (i = 0; i != np; ++i) {
|
||||||
|
sum = 0.;
|
||||||
|
for (j = 0; j != n; ++j) sum += y[i][j];
|
||||||
|
sum /= n;
|
||||||
|
if (iw > 0) sum += 1. / ((i - 1) * (i + 1));
|
||||||
|
iw = -iw;
|
||||||
|
f += sum * sum;
|
||||||
|
}
|
||||||
|
++n_evals;
|
||||||
|
return f;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
double x[20], y;
|
||||||
|
int n, i;
|
||||||
|
printf("\nMinimizer: Hooke-Jeeves\n");
|
||||||
|
for (n = 2; n <= 8; n += 2) {
|
||||||
|
for (i = 0; i != n; ++i) x[i] = (double)(i + 1) / n;
|
||||||
|
n_evals = 0;
|
||||||
|
y = kmin_hj(f_Chebyquad, n, x, 0, KMIN_RADIUS, KMIN_EPS, KMIN_MAXCALL);
|
||||||
|
printf("n=%d,min=%.8lg,n_evals=%d\n", n, y, n_evals);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,151 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include "krmq.h"
|
||||||
|
|
||||||
|
#define CALLOC(type, num) ((type*)calloc(num, sizeof(type)))
|
||||||
|
|
||||||
|
struct my_node {
|
||||||
|
int key;
|
||||||
|
int val;
|
||||||
|
KRMQ_HEAD(struct my_node) head;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define my_cmp(p, q) (((p)->key > (q)->key) - ((p)->key < (q)->key))
|
||||||
|
#define my_lt2(p, q) ((p)->val < (q)->val)
|
||||||
|
KRMQ_INIT(my, struct my_node, head, my_cmp, my_lt2)
|
||||||
|
|
||||||
|
int check(struct my_node *p, int *hh, int *min)
|
||||||
|
{
|
||||||
|
*hh = 0, *min = INT_MAX;
|
||||||
|
if (p) {
|
||||||
|
int c = 1, h[2] = {0, 0}, m[2] = {INT_MAX, INT_MAX};
|
||||||
|
*min = p->val;
|
||||||
|
if (p->head.p[0]) c += check(p->head.p[0], &h[0], &m[0]);
|
||||||
|
if (p->head.p[1]) c += check(p->head.p[1], &h[1], &m[1]);
|
||||||
|
*min = *min < m[0]? *min : m[0];
|
||||||
|
*min = *min < m[1]? *min : m[1];
|
||||||
|
*hh = (h[0] > h[1]? h[0] : h[1]) + 1;
|
||||||
|
if (*min != p->head.s->val)
|
||||||
|
fprintf(stderr, "min %d != %d at %c\n", *min, p->head.s->val, p->key);
|
||||||
|
if (h[1] - h[0] != (int)p->head.balance)
|
||||||
|
fprintf(stderr, "%d - %d != %d at %c\n", h[1], h[0], p->head.balance, p->key);
|
||||||
|
if (c != (int)p->head.size)
|
||||||
|
fprintf(stderr, "%d != %d at %c\n", p->head.size, c, p->key);
|
||||||
|
return c;
|
||||||
|
} else return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int check_rmq(const struct my_node *root, int lo, int hi)
|
||||||
|
{
|
||||||
|
struct my_node s, t, *p, *q;
|
||||||
|
krmq_itr_t(my) itr;
|
||||||
|
int min = INT_MAX;
|
||||||
|
s.key = lo, t.key = hi;
|
||||||
|
p = krmq_rmq(my, root, &s, &t);
|
||||||
|
krmq_interval(my, root, &s, 0, &q);
|
||||||
|
if (p == 0) return -1;
|
||||||
|
krmq_itr_find(my, root, q, &itr);
|
||||||
|
do {
|
||||||
|
const struct my_node *r = krmq_at(&itr);
|
||||||
|
if (r->key > hi) break;
|
||||||
|
//fprintf(stderr, "%c\t%d\n", r->key, r->val);
|
||||||
|
if (r->val < min) min = r->val;
|
||||||
|
} while (krmq_itr_next(my, &itr));
|
||||||
|
assert((min == INT_MAX && p == 0) || (min < INT_MAX && p));
|
||||||
|
if (min != p->val) fprintf(stderr, "rmq_min %d != %d\n", p->val, min);
|
||||||
|
return min;
|
||||||
|
}
|
||||||
|
|
||||||
|
int print_tree(const struct my_node *p)
|
||||||
|
{
|
||||||
|
int c = 1;
|
||||||
|
if (p == 0) return 0;
|
||||||
|
if (p->head.p[0] || p->head.p[1]) {
|
||||||
|
putchar('(');
|
||||||
|
if (p->head.p[0]) c += print_tree(p->head.p[0]);
|
||||||
|
else putchar('.');
|
||||||
|
putchar(',');
|
||||||
|
if (p->head.p[1]) c += print_tree(p->head.p[1]);
|
||||||
|
else putchar('.');
|
||||||
|
putchar(')');
|
||||||
|
}
|
||||||
|
printf("%c:%d/%d", p->key, p->val, p->head.s->val);
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
void check_and_print(struct my_node *root)
|
||||||
|
{
|
||||||
|
int h, min;
|
||||||
|
check(root, &h, &min);
|
||||||
|
print_tree(root);
|
||||||
|
putchar('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
void shuffle(int n, char a[])
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = n; i > 1; --i) {
|
||||||
|
char tmp;
|
||||||
|
j = (int)(drand48() * i);
|
||||||
|
tmp = a[j]; a[j] = a[i-1]; a[i-1] = tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
char buf[256];
|
||||||
|
int i, n, h, min;
|
||||||
|
struct my_node *root = 0;
|
||||||
|
struct my_node *p, *q, t;
|
||||||
|
krmq_itr_t(my) itr;
|
||||||
|
unsigned cnt;
|
||||||
|
|
||||||
|
srand48(123);
|
||||||
|
for (i = 33, n = 0; i <= 126; ++i)
|
||||||
|
if (i != '(' && i != ')' && i != '.' && i != ';')
|
||||||
|
buf[n++] = i;
|
||||||
|
shuffle(n, buf);
|
||||||
|
for (i = 0; i < n; ++i) {
|
||||||
|
p = CALLOC(struct my_node, 1);
|
||||||
|
p->key = buf[i];
|
||||||
|
p->val = i;
|
||||||
|
q = krmq_insert(my, &root, p, &cnt);
|
||||||
|
if (p != q) free(p);
|
||||||
|
check(root, &h, &min);
|
||||||
|
}
|
||||||
|
|
||||||
|
shuffle(n, buf);
|
||||||
|
for (i = 0; i < n/2; ++i) {
|
||||||
|
t.key = buf[i];
|
||||||
|
//fprintf(stderr, "i=%d, key=%c, n/2=%d\n", i, t.key, n/2);
|
||||||
|
q = krmq_erase(my, &root, &t, 0);
|
||||||
|
if (q) free(q);
|
||||||
|
check(root, &h, &min);
|
||||||
|
}
|
||||||
|
check_and_print(root);
|
||||||
|
|
||||||
|
check_rmq(root, '0', '9');
|
||||||
|
check_rmq(root, '!', '~');
|
||||||
|
check_rmq(root, 'A', 'Z');
|
||||||
|
check_rmq(root, 'F', 'G');
|
||||||
|
check_rmq(root, 'a', 'z');
|
||||||
|
for (i = 0; i < n; ++i) {
|
||||||
|
int lo, hi;
|
||||||
|
lo = (int)(drand48() * n);
|
||||||
|
hi = (int)(drand48() * n);
|
||||||
|
check_rmq(root, lo, hi);
|
||||||
|
}
|
||||||
|
|
||||||
|
krmq_itr_first(my, root, &itr);
|
||||||
|
do {
|
||||||
|
const struct my_node *r = krmq_at(&itr);
|
||||||
|
putchar(r->key);
|
||||||
|
} while (krmq_itr_next(my, &itr));
|
||||||
|
putchar('\n');
|
||||||
|
krmq_free(struct my_node, head, root, free);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,69 @@
|
||||||
|
#include <zlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "kseq.h"
|
||||||
|
|
||||||
|
#define BUF_SIZE 4096
|
||||||
|
KSTREAM_INIT(gzFile, gzread, BUF_SIZE)
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
gzFile fp;
|
||||||
|
clock_t t;
|
||||||
|
if (argc == 1) {
|
||||||
|
fprintf(stderr, "Usage: kseq_bench <in.gz>\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
uint8_t *buf = malloc(BUF_SIZE);
|
||||||
|
fp = gzopen(argv[1], "r");
|
||||||
|
t = clock();
|
||||||
|
while (gzread(fp, buf, BUF_SIZE) > 0);
|
||||||
|
fprintf(stderr, "[gzread] %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
gzclose(fp);
|
||||||
|
free(buf);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
kstream_t *ks;
|
||||||
|
fp = gzopen(argv[1], "r");
|
||||||
|
ks = ks_init(fp);
|
||||||
|
t = clock();
|
||||||
|
while (ks_getc(ks) >= 0);
|
||||||
|
fprintf(stderr, "[ks_getc] %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
ks_destroy(ks);
|
||||||
|
gzclose(fp);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
kstream_t *ks;
|
||||||
|
kstring_t *s;
|
||||||
|
int dret;
|
||||||
|
s = calloc(1, sizeof(kstring_t));
|
||||||
|
fp = gzopen(argv[1], "r");
|
||||||
|
ks = ks_init(fp);
|
||||||
|
t = clock();
|
||||||
|
while (ks_getuntil(ks, '\n', s, &dret) >= 0);
|
||||||
|
fprintf(stderr, "[ks_getuntil] %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
ks_destroy(ks);
|
||||||
|
gzclose(fp);
|
||||||
|
free(s->s); free(s);
|
||||||
|
}
|
||||||
|
if (argc == 2) {
|
||||||
|
fp = gzopen(argv[1], "r");
|
||||||
|
t = clock();
|
||||||
|
while (gzgetc(fp) >= 0);
|
||||||
|
fprintf(stderr, "[gzgetc] %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
gzclose(fp);
|
||||||
|
}
|
||||||
|
if (argc == 2) {
|
||||||
|
char *buf = malloc(BUF_SIZE);
|
||||||
|
fp = gzopen(argv[1], "r");
|
||||||
|
t = clock();
|
||||||
|
while (gzgets(fp, buf, BUF_SIZE) > 0);
|
||||||
|
fprintf(stderr, "[gzgets] %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
gzclose(fp);
|
||||||
|
free(buf);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,44 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include "kseq.h"
|
||||||
|
KSTREAM_INIT(int, read, 4096)
|
||||||
|
|
||||||
|
#define BUF_SIZE 65536
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
clock_t t;
|
||||||
|
if (argc == 1) {
|
||||||
|
fprintf(stderr, "Usage: %s <in.txt>\n", argv[0]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
FILE *fp;
|
||||||
|
char *s;
|
||||||
|
t = clock();
|
||||||
|
s = malloc(BUF_SIZE);
|
||||||
|
fp = fopen(argv[1], "r");
|
||||||
|
while (fgets(s, BUF_SIZE, fp));
|
||||||
|
fclose(fp);
|
||||||
|
fprintf(stderr, "[fgets] %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
int fd, dret;
|
||||||
|
kstream_t *ks;
|
||||||
|
kstring_t s;
|
||||||
|
t = clock();
|
||||||
|
s.l = s.m = 0; s.s = 0;
|
||||||
|
fd = open(argv[1], O_RDONLY);
|
||||||
|
ks = ks_init(fd);
|
||||||
|
while (ks_getuntil(ks, '\n', &s, &dret) >= 0);
|
||||||
|
free(s.s);
|
||||||
|
ks_destroy(ks);
|
||||||
|
close(fd);
|
||||||
|
fprintf(stderr, "[kstream] %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,27 @@
|
||||||
|
#include <zlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "kseq.h"
|
||||||
|
KSEQ_INIT(gzFile, gzread)
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
gzFile fp;
|
||||||
|
kseq_t *seq;
|
||||||
|
int l;
|
||||||
|
if (argc == 1) {
|
||||||
|
fprintf(stderr, "Usage: %s <in.fasta>\n", argv[0]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
fp = gzopen(argv[1], "r");
|
||||||
|
seq = kseq_init(fp);
|
||||||
|
while ((l = kseq_read(seq)) >= 0) {
|
||||||
|
printf("name: %s\n", seq->name.s);
|
||||||
|
if (seq->comment.l) printf("comment: %s\n", seq->comment.s);
|
||||||
|
printf("seq: %s\n", seq->seq.s);
|
||||||
|
if (seq->qual.l) printf("qual: %s\n", seq->qual.s);
|
||||||
|
}
|
||||||
|
printf("return value: %d\n", l);
|
||||||
|
kseq_destroy(seq);
|
||||||
|
gzclose(fp);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,12 @@
|
||||||
|
>1
|
||||||
|
acgtacgtacgtagc
|
||||||
|
>2 test
|
||||||
|
acgatcgatc
|
||||||
|
@3 test2
|
||||||
|
cgctagcatagc
|
||||||
|
cgatatgactta
|
||||||
|
+
|
||||||
|
78wo82usd980
|
||||||
|
d88fau
|
||||||
|
|
||||||
|
238ud8
|
||||||
|
|
@ -0,0 +1,104 @@
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include "ksort.h"
|
||||||
|
|
||||||
|
KSORT_INIT_GENERIC(int)
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i, N = 10000000;
|
||||||
|
int *array, x;
|
||||||
|
clock_t t1, t2;
|
||||||
|
if (argc > 1) N = atoi(argv[1]);
|
||||||
|
array = (int*)malloc(sizeof(int) * N);
|
||||||
|
|
||||||
|
srand48(11);
|
||||||
|
for (i = 0; i < N; ++i) array[i] = (int)lrand48();
|
||||||
|
t1 = clock();
|
||||||
|
x = ks_ksmall(int, N, array, 10500);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "ksmall [%d]: %.3lf\n", x, (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
|
||||||
|
srand48(11);
|
||||||
|
for (i = 0; i < N; ++i) array[i] = (int)lrand48();
|
||||||
|
t1 = clock();
|
||||||
|
ks_introsort(int, N, array);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "introsort [%d]: %.3lf\n", array[10500], (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
for (i = 0; i < N-1; ++i) {
|
||||||
|
if (array[i] > array[i+1]) {
|
||||||
|
fprintf(stderr, "Bug in introsort!\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef _ALIGNED_ONLY
|
||||||
|
{ // test unaligned ksmall
|
||||||
|
srand48(11);
|
||||||
|
unsigned char *a;
|
||||||
|
int *b;
|
||||||
|
a = malloc(N * sizeof(int) + 1);
|
||||||
|
b = (int*)(a + 1);
|
||||||
|
for (i = 0; i < N; ++i) b[i] = (int)lrand48();
|
||||||
|
t1 = clock();
|
||||||
|
ks_introsort(int, N, b);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "introsort [%d]: %.3lf (unaligned: 0x%lx) \n", b[10500], (double)(t2-t1)/CLOCKS_PER_SEC, (size_t)b);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
t1 = clock();
|
||||||
|
ks_introsort(int, N, array);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "introsort (sorted): %.3lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
|
||||||
|
srand48(11);
|
||||||
|
for (i = 0; i < N; ++i) array[i] = (int)lrand48();
|
||||||
|
t1 = clock();
|
||||||
|
ks_combsort(int, N, array);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "combsort: %.3lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
for (i = 0; i < N-1; ++i) {
|
||||||
|
if (array[i] > array[i+1]) {
|
||||||
|
fprintf(stderr, "Bug in combsort!\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
srand48(11);
|
||||||
|
for (i = 0; i < N; ++i) array[i] = (int)lrand48();
|
||||||
|
t1 = clock();
|
||||||
|
ks_mergesort(int, N, array, 0);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "mergesort: %.3lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
for (i = 0; i < N-1; ++i) {
|
||||||
|
if (array[i] > array[i+1]) {
|
||||||
|
fprintf(stderr, "Bug in mergesort!\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
t1 = clock();
|
||||||
|
ks_mergesort(int, N, array, 0);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "mergesort (sorted): %.3lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
|
||||||
|
srand48(11);
|
||||||
|
for (i = 0; i < N; ++i) array[i] = (int)lrand48();
|
||||||
|
t1 = clock();
|
||||||
|
ks_heapmake(int, N, array);
|
||||||
|
ks_heapsort(int, N, array);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "heapsort: %.3lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
for (i = 0; i < N-1; ++i) {
|
||||||
|
if (array[i] > array[i+1]) {
|
||||||
|
fprintf(stderr, "Bug in heapsort!\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
free(array);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,997 @@
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include "ksort.h"
|
||||||
|
KSORT_INIT_GENERIC(int)
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
/**********************************
|
||||||
|
* BEGIN OF PAUL'S IMPLEMENTATION *
|
||||||
|
**********************************/
|
||||||
|
|
||||||
|
/* Attractive Chaos: I have added inline where necessary. */
|
||||||
|
|
||||||
|
/*
|
||||||
|
Copyright (c) 2004 Paul Hsieh
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer in the documentation
|
||||||
|
and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
Neither the name of sorttest nor the names of its contributors may be
|
||||||
|
used to endorse or promote products derived from this software without
|
||||||
|
specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
|
||||||
|
Recommended flags:
|
||||||
|
------------------
|
||||||
|
|
||||||
|
Intel C/C++:
|
||||||
|
icl /O2 /G6 /Qaxi /Qxi /Qip sorttest.c
|
||||||
|
|
||||||
|
WATCOM C/C++:
|
||||||
|
wcl386 /otexan /6r sorttest.c
|
||||||
|
|
||||||
|
GCC:
|
||||||
|
gcc -O3 -mcpu=athlon-xp -march=athlon-xp sorttest.c
|
||||||
|
|
||||||
|
MSVC:
|
||||||
|
cl /O2 /Ot /Og /G6 sorttest.c
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
static inline void sort2 (int * numbers) {
|
||||||
|
int tmp;
|
||||||
|
|
||||||
|
if (numbers[0] <= numbers[1]) return;
|
||||||
|
tmp = numbers[0];
|
||||||
|
numbers[0] = numbers[1];
|
||||||
|
numbers[1] = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void sort3 (int * numbers) {
|
||||||
|
int tmp;
|
||||||
|
|
||||||
|
if (numbers[0] <= numbers[1]) {
|
||||||
|
if (numbers[1] <= numbers[2]) return;
|
||||||
|
if (numbers[2] <= numbers[0]) {
|
||||||
|
tmp = numbers[0];
|
||||||
|
numbers[0] = numbers[2];
|
||||||
|
numbers[2] = numbers[1];
|
||||||
|
numbers[1] = tmp;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
tmp = numbers[1];
|
||||||
|
} else {
|
||||||
|
tmp = numbers[0];
|
||||||
|
if (numbers[0] <= numbers[2]) {
|
||||||
|
numbers[0] = numbers[1];
|
||||||
|
numbers[1] = tmp;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (numbers[2] <= numbers[1]) {
|
||||||
|
numbers[0] = numbers[2];
|
||||||
|
numbers[2] = tmp;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
numbers[0] = numbers[1];
|
||||||
|
}
|
||||||
|
numbers[1] = numbers[2];
|
||||||
|
numbers[2] = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void sort4 (int * num) {
|
||||||
|
int tmp;
|
||||||
|
if (num[0] < num[1]) {
|
||||||
|
if (num[1] < num[2]) {
|
||||||
|
if (num[1] < num[3]) {
|
||||||
|
if (num[2] >= num[3]) {
|
||||||
|
tmp = num[2];
|
||||||
|
num[2] = num[3];
|
||||||
|
num[3] = tmp;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
tmp = num[1];
|
||||||
|
if (num[0] < num[3]) {
|
||||||
|
num[1] = num[3];
|
||||||
|
} else {
|
||||||
|
num[1] = num[0];
|
||||||
|
num[0] = num[3];
|
||||||
|
}
|
||||||
|
num[3] = num[2];
|
||||||
|
num[2] = tmp;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (num[0] < num[2]) {
|
||||||
|
if (num[2] < num[3]) {
|
||||||
|
if (num[1] < num[3]) {
|
||||||
|
tmp = num[1];
|
||||||
|
} else {
|
||||||
|
tmp = num[3];
|
||||||
|
num[3] = num[1];
|
||||||
|
}
|
||||||
|
num[1] = num[2];
|
||||||
|
num[2] = tmp;
|
||||||
|
} else {
|
||||||
|
if (num[0] < num[3]) {
|
||||||
|
tmp = num[3];
|
||||||
|
} else {
|
||||||
|
tmp = num[0];
|
||||||
|
num[0] = num[3];
|
||||||
|
}
|
||||||
|
num[3] = num[1];
|
||||||
|
num[1] = tmp;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (num[0] < num[3]) {
|
||||||
|
tmp = num[0];
|
||||||
|
num[0] = num[2];
|
||||||
|
if (num[1] < num[3]) {
|
||||||
|
num[2] = num[1];
|
||||||
|
} else {
|
||||||
|
num[2] = num[3];
|
||||||
|
num[3] = num[1];
|
||||||
|
}
|
||||||
|
num[1] = tmp;
|
||||||
|
} else {
|
||||||
|
if (num[2] < num[3]) {
|
||||||
|
tmp = num[0];
|
||||||
|
num[0] = num[2];
|
||||||
|
num[2] = tmp;
|
||||||
|
tmp = num[1];
|
||||||
|
num[1] = num[3];
|
||||||
|
} else {
|
||||||
|
tmp = num[1];
|
||||||
|
num[1] = num[2];
|
||||||
|
num[2] = num[0];
|
||||||
|
num[0] = num[3];
|
||||||
|
}
|
||||||
|
num[3] = tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
tmp = num[0];
|
||||||
|
if (tmp < num[2]) {
|
||||||
|
if (tmp < num[3]) {
|
||||||
|
num[0] = num[1];
|
||||||
|
num[1] = tmp;
|
||||||
|
if (num[2] >= num[3]) {
|
||||||
|
tmp = num[2];
|
||||||
|
num[2] = num[3];
|
||||||
|
num[3] = tmp;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (num[1] < num[3]) {
|
||||||
|
num[0] = num[1];
|
||||||
|
num[1] = num[3];
|
||||||
|
} else {
|
||||||
|
num[0] = num[3];
|
||||||
|
}
|
||||||
|
num[3] = num[2];
|
||||||
|
num[2] = tmp;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (num[1] < num[2]) {
|
||||||
|
if (num[2] < num[3]) {
|
||||||
|
num[0] = num[1];
|
||||||
|
num[1] = num[2];
|
||||||
|
if (tmp < num[3]) {
|
||||||
|
num[2] = tmp;
|
||||||
|
} else {
|
||||||
|
num[2] = num[3];
|
||||||
|
num[3] = tmp;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (num[1] < num[3]) {
|
||||||
|
num[0] = num[1];
|
||||||
|
num[1] = num[3];
|
||||||
|
} else {
|
||||||
|
num[0] = num[3];
|
||||||
|
}
|
||||||
|
num[3] = tmp;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (num[1] < num[3]) {
|
||||||
|
num[0] = num[2];
|
||||||
|
if (tmp < num[3]) {
|
||||||
|
num[2] = tmp;
|
||||||
|
} else {
|
||||||
|
num[2] = num[3];
|
||||||
|
num[3] = tmp;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (num[2] < num[3]) {
|
||||||
|
num[0] = num[2];
|
||||||
|
num[2] = num[1];
|
||||||
|
num[1] = num[3];
|
||||||
|
num[3] = tmp;
|
||||||
|
} else {
|
||||||
|
num[0] = num[3];
|
||||||
|
num[3] = tmp;
|
||||||
|
tmp = num[1];
|
||||||
|
num[1] = num[2];
|
||||||
|
num[2] = tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void sortAlt2 (int * numbers, int * altNumbers) {
|
||||||
|
if (numbers[0] <= numbers[1]) {
|
||||||
|
altNumbers[0] = numbers[0];
|
||||||
|
altNumbers[1] = numbers[1];
|
||||||
|
} else {
|
||||||
|
altNumbers[0] = numbers[1];
|
||||||
|
altNumbers[1] = numbers[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void sortAlt3 (int * numbers, int * altNumbers) {
|
||||||
|
if (numbers[0] <= numbers[1]) {
|
||||||
|
if (numbers[1] <= numbers[2]) {
|
||||||
|
altNumbers[0] = numbers[0];
|
||||||
|
altNumbers[1] = numbers[1];
|
||||||
|
altNumbers[2] = numbers[2];
|
||||||
|
} else if (numbers[2] <= numbers[0]) {
|
||||||
|
altNumbers[0] = numbers[2];
|
||||||
|
altNumbers[1] = numbers[0];
|
||||||
|
altNumbers[2] = numbers[1];
|
||||||
|
} else {
|
||||||
|
altNumbers[0] = numbers[0];
|
||||||
|
altNumbers[1] = numbers[2];
|
||||||
|
altNumbers[2] = numbers[1];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (numbers[0] <= numbers[2]) {
|
||||||
|
altNumbers[0] = numbers[1];
|
||||||
|
altNumbers[1] = numbers[0];
|
||||||
|
altNumbers[2] = numbers[2];
|
||||||
|
} else if (numbers[2] <= numbers[1]) {
|
||||||
|
altNumbers[0] = numbers[2];
|
||||||
|
altNumbers[1] = numbers[1];
|
||||||
|
altNumbers[2] = numbers[0];
|
||||||
|
} else {
|
||||||
|
altNumbers[0] = numbers[1];
|
||||||
|
altNumbers[1] = numbers[2];
|
||||||
|
altNumbers[2] = numbers[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Insert Sort
|
||||||
|
*/
|
||||||
|
|
||||||
|
inline void insertSort (int numbers[], int qty) {
|
||||||
|
int i, j, idx, q4;
|
||||||
|
int tmp;
|
||||||
|
|
||||||
|
if (qty <= 4) {
|
||||||
|
if (qty == 4) sort4 (numbers);
|
||||||
|
else if (qty == 3) sort3 (numbers);
|
||||||
|
else if (qty == 2) sort2 (numbers);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
q4 = qty - 4;
|
||||||
|
|
||||||
|
for (i=0; i < q4; i++) {
|
||||||
|
idx = i;
|
||||||
|
for (j=i+1; j < qty; j++) {
|
||||||
|
if (numbers[j] < numbers[idx]) idx = j;
|
||||||
|
}
|
||||||
|
if (idx != i) {
|
||||||
|
tmp = numbers[idx];
|
||||||
|
numbers[idx] = numbers[i];
|
||||||
|
numbers[i] = tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sort4 (numbers + q4);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Heap Sort
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Assure the heap property for entries from top to last */
|
||||||
|
static void siftDown (int numbers[], int top, int last) {
|
||||||
|
int tmp = numbers[top];
|
||||||
|
int maxIdx = top;
|
||||||
|
|
||||||
|
while (last >= (maxIdx += maxIdx)) {
|
||||||
|
|
||||||
|
/* This is where the comparison occurrs and where a sufficiently
|
||||||
|
good compiler can use a computed conditional result rather
|
||||||
|
than using control logic. */
|
||||||
|
if (maxIdx != last && numbers[maxIdx] < numbers[maxIdx + 1]) maxIdx++;
|
||||||
|
|
||||||
|
if (tmp >= numbers[maxIdx]) break;
|
||||||
|
numbers[top] = numbers[maxIdx];
|
||||||
|
top = maxIdx;
|
||||||
|
}
|
||||||
|
numbers[top] = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Peel off the top siftDown operation since its parameters are trivial to
|
||||||
|
fill in directly (and this saves us some moves.) */
|
||||||
|
static void siftDown0 (int numbers[], int last) {
|
||||||
|
int tmp;
|
||||||
|
|
||||||
|
if (numbers[0] < numbers[1]) {
|
||||||
|
tmp = numbers[1];
|
||||||
|
numbers[1] = numbers[0];
|
||||||
|
siftDown (numbers, 1, last);
|
||||||
|
} else {
|
||||||
|
tmp = numbers[0];
|
||||||
|
}
|
||||||
|
numbers[0] = numbers[last];
|
||||||
|
numbers[last] = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
void heapSort (int numbers[], int qty) {
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (qty <= 4) {
|
||||||
|
if (qty == 4) sort4 (numbers);
|
||||||
|
else if (qty == 3) sort3 (numbers);
|
||||||
|
else if (qty == 2) sort2 (numbers);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
i = qty / 2;
|
||||||
|
/* Enforce the heap property for each position in the tree */
|
||||||
|
for ( qty--; i > 0; i--) siftDown (numbers, i, qty);
|
||||||
|
for (i = qty; i > 0; i--) siftDown0 (numbers, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Quick Sort
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int medianOf3 (int * numbers, int i, int j) {
|
||||||
|
int tmp;
|
||||||
|
|
||||||
|
if (numbers[0] <= numbers[i]) {
|
||||||
|
if (numbers[j] <= numbers[0]) return numbers[0]; /* j 0 i */
|
||||||
|
if (numbers[i] <= numbers[j]) j = i; /* 0 i j */
|
||||||
|
/* 0 j i */
|
||||||
|
} else {
|
||||||
|
if (numbers[0] <= numbers[j]) return numbers[0]; /* i 0 j */
|
||||||
|
if (numbers[j] <= numbers[i]) j = i; /* j i 0 */
|
||||||
|
/* i j 0 */
|
||||||
|
}
|
||||||
|
tmp = numbers[j];
|
||||||
|
numbers[j] = numbers[0];
|
||||||
|
numbers[0] = tmp;
|
||||||
|
return tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void quickSortRecurse (int * numbers, int left, int right) {
|
||||||
|
int pivot, lTmp, rTmp;
|
||||||
|
|
||||||
|
qsrStart:;
|
||||||
|
|
||||||
|
#if defined(__GNUC__)
|
||||||
|
if (right <= left + 8) {
|
||||||
|
insertSort (numbers + left, right - left + 1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (right <= left + 3) {
|
||||||
|
if (right == left + 1) {
|
||||||
|
sort2 (numbers + left);
|
||||||
|
} else if (right == left + 2) {
|
||||||
|
sort3 (numbers + left);
|
||||||
|
} else if (right == left + 3) {
|
||||||
|
sort4 (numbers + left);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
lTmp = left;
|
||||||
|
rTmp = right;
|
||||||
|
|
||||||
|
pivot = medianOf3 (numbers + left, (right-left) >> 1, right-1-left);
|
||||||
|
|
||||||
|
goto QStart;
|
||||||
|
while (1) {
|
||||||
|
do {
|
||||||
|
right--;
|
||||||
|
if (left >= right) goto QEnd;
|
||||||
|
QStart:;
|
||||||
|
} while (numbers[right] > pivot);
|
||||||
|
numbers[left] = numbers[right];
|
||||||
|
do {
|
||||||
|
left++;
|
||||||
|
if (left >= right) {
|
||||||
|
left = right;
|
||||||
|
goto QEnd;
|
||||||
|
}
|
||||||
|
} while (numbers[ left] < pivot);
|
||||||
|
numbers[right] = numbers[left];
|
||||||
|
}
|
||||||
|
QEnd:;
|
||||||
|
numbers[left] = pivot;
|
||||||
|
|
||||||
|
/* Only recurse the smaller partition */
|
||||||
|
|
||||||
|
if (left-1 - lTmp <= rTmp - left - 1) {
|
||||||
|
if (lTmp < left) quickSortRecurse (numbers, lTmp, left-1);
|
||||||
|
|
||||||
|
/* Set up for larger partition */
|
||||||
|
left++;
|
||||||
|
right = rTmp;
|
||||||
|
} else {
|
||||||
|
if (rTmp > left) quickSortRecurse (numbers, left+1, rTmp);
|
||||||
|
|
||||||
|
/* Set up for larger partition */
|
||||||
|
right = left - 1;
|
||||||
|
left = lTmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Rerun with larger partition (recursion not required.) */
|
||||||
|
goto qsrStart;
|
||||||
|
}
|
||||||
|
|
||||||
|
void quickSort (int numbers[], int qty) {
|
||||||
|
if (qty < 2) return;
|
||||||
|
quickSortRecurse (numbers, 0, qty - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Merge Sort
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void mergesortInPlace (int * numbers, int * altNumbers, int qty);
|
||||||
|
|
||||||
|
/* Perform mergesort, but store results in altNumbers */
|
||||||
|
|
||||||
|
static void mergesortExchange (int * numbers, int * altNumbers, int qty) {
|
||||||
|
int half, i0, i1, i;
|
||||||
|
|
||||||
|
if (qty == 2) {
|
||||||
|
sortAlt2 (numbers, altNumbers);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (qty == 3) {
|
||||||
|
sortAlt3 (numbers, altNumbers);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
half = (qty + 1)/2;
|
||||||
|
|
||||||
|
mergesortInPlace (numbers, altNumbers, half);
|
||||||
|
mergesortInPlace (numbers + half, altNumbers, qty - half);
|
||||||
|
|
||||||
|
i0 = 0; i1 = half;
|
||||||
|
|
||||||
|
for (i=0; i < qty; i++) {
|
||||||
|
if (i1 >= qty || (i0 < half && numbers[i0] < numbers[i1])) {
|
||||||
|
altNumbers[i] = numbers[i0];
|
||||||
|
i0++;
|
||||||
|
} else {
|
||||||
|
altNumbers[i] = numbers[i1];
|
||||||
|
i1++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Perform mergesort and store results in numbers */
|
||||||
|
|
||||||
|
static void mergesortInPlace (int * numbers, int * altNumbers, int qty) {
|
||||||
|
int half, i0, i1, i;
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
if (qty == 2) {
|
||||||
|
sort2 (numbers);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (qty == 3) {
|
||||||
|
sort3 (numbers);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (qty == 4) {
|
||||||
|
sort4 (numbers);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (qty <= 12) {
|
||||||
|
insertSort (numbers, qty);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
half = (qty + 1)/2;
|
||||||
|
|
||||||
|
mergesortExchange (numbers, altNumbers, half);
|
||||||
|
mergesortExchange (numbers + half, altNumbers + half, qty - half);
|
||||||
|
|
||||||
|
i0 = 0; i1 = half;
|
||||||
|
|
||||||
|
for (i=0; i < qty; i++) {
|
||||||
|
if (i1 >= qty || (i0 < half && altNumbers[i0] < altNumbers[i1])) {
|
||||||
|
numbers[i] = altNumbers[i0];
|
||||||
|
i0++;
|
||||||
|
} else {
|
||||||
|
numbers[i] = altNumbers[i1];
|
||||||
|
i1++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
void mergeSort (int numbers[], int qty) {
|
||||||
|
int * tmpArray;
|
||||||
|
|
||||||
|
if (qty <= 12) {
|
||||||
|
insertSort (numbers, qty);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
tmpArray = (int *) malloc (qty * sizeof (int));
|
||||||
|
mergesortInPlace (numbers, tmpArray, qty);
|
||||||
|
free (tmpArray);
|
||||||
|
}
|
||||||
|
|
||||||
|
/********************************
|
||||||
|
* END OF PAUL'S IMPLEMENTATION *
|
||||||
|
********************************/
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
*** Implementation 1: faster on sorted arrays ***
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
#define rstype_t unsigned
|
||||||
|
#define rskey(x) (x)
|
||||||
|
|
||||||
|
#define RS_MIN_SIZE 64
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
rstype_t *b, *e;
|
||||||
|
} rsbucket_t;
|
||||||
|
|
||||||
|
void rs_sort(rstype_t *beg, rstype_t *end, int n_bits, int s)
|
||||||
|
{
|
||||||
|
rstype_t *i;
|
||||||
|
int size = 1<<n_bits, m = size - 1;
|
||||||
|
rsbucket_t *k, b[size], *be = b + size;
|
||||||
|
|
||||||
|
for (k = b; k != be; ++k) k->b = k->e = beg;
|
||||||
|
for (i = beg; i != end; ++i) ++b[rskey(*i)>>s&m].e;
|
||||||
|
for (k = b + 1; k != be; ++k)
|
||||||
|
k->e += (k-1)->e - beg, k->b = (k-1)->e;
|
||||||
|
for (k = b; k != be;) {
|
||||||
|
if (k->b != k->e) {
|
||||||
|
rsbucket_t *l;
|
||||||
|
if ((l = b + (rskey(*k->b)>>s&m)) != k) {
|
||||||
|
rstype_t tmp = *k->b, swap;
|
||||||
|
do {
|
||||||
|
swap = tmp; tmp = *l->b; *l->b++ = swap;
|
||||||
|
l = b + (rskey(tmp)>>s&m);
|
||||||
|
} while (l != k);
|
||||||
|
*k->b++ = tmp;
|
||||||
|
} else ++k->b;
|
||||||
|
} else ++k;
|
||||||
|
}
|
||||||
|
for (b->b = beg, k = b + 1; k != be; ++k) k->b = (k-1)->e;
|
||||||
|
if (s) {
|
||||||
|
s = s > n_bits? s - n_bits : 0;
|
||||||
|
for (k = b; k != be; ++k)
|
||||||
|
if (k->e - k->b > RS_MIN_SIZE) rs_sort(k->b, k->e, n_bits, s);
|
||||||
|
else if (k->e - k->b > 1)
|
||||||
|
for (i = k->b + 1; i < k->e; ++i)
|
||||||
|
if (rskey(*i) < rskey(*(i - 1))) {
|
||||||
|
rstype_t *j, tmp = *i;
|
||||||
|
for (j = i; j > k->b && rskey(tmp) < rskey(*(j-1)); --j)
|
||||||
|
*j = *(j - 1);
|
||||||
|
*j = tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
*** Implementation 2: faster on random arrays ***
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
static inline void rs_insertsort(rstype_t *s, rstype_t *t)
|
||||||
|
{
|
||||||
|
rstype_t *i;
|
||||||
|
for (i = s + 1; i < t; ++i) {
|
||||||
|
if (rskey(*i) < rskey(*(i - 1))) {
|
||||||
|
rstype_t *j, tmp = *i;
|
||||||
|
for (j = i; j > s && rskey(tmp) < rskey(*(j-1)); --j)
|
||||||
|
*j = *(j - 1);
|
||||||
|
*j = tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
void rs_sort2(rstype_t *beg, rstype_t *end, int n_bits, int s)
|
||||||
|
{
|
||||||
|
int j, size = 1<<n_bits, m = size - 1;
|
||||||
|
unsigned long c[size];
|
||||||
|
rstype_t *i, *b[size], *e[size];
|
||||||
|
|
||||||
|
for (j = 0; j < size; ++j) c[j] = 0;
|
||||||
|
for (i = beg; i != end; ++i) ++c[rskey(*i)>>s&m];
|
||||||
|
b[0] = e[0] = beg;
|
||||||
|
for (j = 1; j != size; ++j) b[j] = e[j] = b[j - 1] + c[j - 1];
|
||||||
|
for (i = beg, j = 0; i != end;) {
|
||||||
|
rstype_t tmp = *i, swap;
|
||||||
|
int x;
|
||||||
|
for (;;) {
|
||||||
|
x = rskey(tmp)>>s&m;
|
||||||
|
if (e[x] == i) break;
|
||||||
|
swap = tmp; tmp = *e[x]; *e[x]++ = swap;
|
||||||
|
}
|
||||||
|
*i++ = tmp;
|
||||||
|
++e[x];
|
||||||
|
while (j != size && i >= b[j]) ++j;
|
||||||
|
while (j != size && e[j-1] == b[j]) ++j;
|
||||||
|
if (i < e[j-1]) i = e[j-1];
|
||||||
|
}
|
||||||
|
if (s) {
|
||||||
|
s = s > n_bits? s - n_bits : 0;
|
||||||
|
for (j = 0; j < size; ++j) {
|
||||||
|
if (c[j] >= RS_MIN_SIZE) rs_sort2(b[j], e[j], n_bits, s);
|
||||||
|
else if (c[j] >= 2) rs_insertsort(b[j], e[j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
void radix_sort(unsigned *array, int offset, int end, int shift) {
|
||||||
|
int x, y, value, temp;
|
||||||
|
int last[256] = { 0 }, pointer[256];
|
||||||
|
|
||||||
|
for (x=offset; x<end; ++x) {
|
||||||
|
++last[(array[x] >> shift) & 0xFF];
|
||||||
|
}
|
||||||
|
|
||||||
|
last[0] += offset;
|
||||||
|
pointer[0] = offset;
|
||||||
|
for (x=1; x<256; ++x) {
|
||||||
|
pointer[x] = last[x-1];
|
||||||
|
last[x] += last[x-1];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (x=0; x<256; ++x) {
|
||||||
|
while (pointer[x] != last[x]) {
|
||||||
|
value = array[pointer[x]];
|
||||||
|
y = (value >> shift) & 0xFF;
|
||||||
|
while (x != y) {
|
||||||
|
temp = array[pointer[y]];
|
||||||
|
array[pointer[y]++] = value;
|
||||||
|
value = temp;
|
||||||
|
y = (value >> shift) & 0xFF;
|
||||||
|
}
|
||||||
|
array[pointer[x]++] = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (shift > 0) {
|
||||||
|
shift -= 8;
|
||||||
|
for (x=0; x<256; ++x) {
|
||||||
|
temp = x > 0 ? pointer[x] - pointer[x-1] : pointer[0] - offset;
|
||||||
|
if (temp > 64) {
|
||||||
|
radix_sort(array, pointer[x] - temp, pointer[x], shift);
|
||||||
|
} else if (temp > 1) rs_insertsort(array + pointer[x] - temp, array + pointer[x]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*************************
|
||||||
|
*** END OF RADIX SORT ***
|
||||||
|
*************************/
|
||||||
|
|
||||||
|
template< class _Type, unsigned long PowerOfTwoRadix, unsigned long Log2ofPowerOfTwoRadix, long Threshold >
|
||||||
|
inline void _RadixSort_Unsigned_PowerOf2Radix_1( _Type* a, long last, _Type bitMask, unsigned long shiftRightAmount )
|
||||||
|
{
|
||||||
|
const unsigned long numberOfBins = PowerOfTwoRadix;
|
||||||
|
unsigned long count[ numberOfBins ];
|
||||||
|
for( unsigned long i = 0; i < numberOfBins; i++ )
|
||||||
|
count[ i ] = 0;
|
||||||
|
for ( long _current = 0; _current <= last; _current++ ) // Scan the array and count the number of times each value appears
|
||||||
|
{
|
||||||
|
unsigned long digit = (unsigned long)(( a[ _current ] & bitMask ) >> shiftRightAmount ); // extract the digit we are sorting based on
|
||||||
|
count[ digit ]++;
|
||||||
|
}
|
||||||
|
long startOfBin[ numberOfBins ], endOfBin[ numberOfBins ], nextBin;
|
||||||
|
startOfBin[ 0 ] = endOfBin[ 0 ] = nextBin = 0;
|
||||||
|
for( unsigned long i = 1; i < numberOfBins; i++ )
|
||||||
|
startOfBin[ i ] = endOfBin[ i ] = startOfBin[ i - 1 ] + count[ i - 1 ];
|
||||||
|
for ( long _current = 0; _current <= last; )
|
||||||
|
{
|
||||||
|
unsigned long digit;
|
||||||
|
_Type tmp = a[ _current ]; // get the compiler to recognize that a register can be used for the loop instead of a[_current] memory location
|
||||||
|
while ( true ) {
|
||||||
|
digit = (unsigned long)(( tmp & bitMask ) >> shiftRightAmount ); // extract the digit we are sorting based on
|
||||||
|
if ( endOfBin[ digit ] == _current )
|
||||||
|
break;
|
||||||
|
_Type tmp2;
|
||||||
|
//_swap( tmp, a[ endOfBin[ digit ] ] );
|
||||||
|
tmp2 = a[endOfBin[digit]]; a[endOfBin[digit]] = tmp; tmp = tmp2;
|
||||||
|
endOfBin[ digit ]++;
|
||||||
|
}
|
||||||
|
a[ _current ] = tmp;
|
||||||
|
endOfBin[ digit ]++; // leave the element at its location and grow the bin
|
||||||
|
_current++; // advance the current pointer to the next element
|
||||||
|
while( _current >= startOfBin[ nextBin ] && nextBin < numberOfBins )
|
||||||
|
nextBin++;
|
||||||
|
while( endOfBin[ nextBin - 1 ] == startOfBin[ nextBin ] && nextBin < numberOfBins )
|
||||||
|
nextBin++;
|
||||||
|
if ( _current < endOfBin[ nextBin - 1 ] )
|
||||||
|
_current = endOfBin[ nextBin - 1 ];
|
||||||
|
}
|
||||||
|
bitMask >>= Log2ofPowerOfTwoRadix;
|
||||||
|
if ( bitMask != 0 ) // end recursion when all the bits have been processes
|
||||||
|
{
|
||||||
|
if ( shiftRightAmount >= Log2ofPowerOfTwoRadix ) shiftRightAmount -= Log2ofPowerOfTwoRadix;
|
||||||
|
else shiftRightAmount = 0;
|
||||||
|
for( unsigned long i = 0; i < numberOfBins; i++ )
|
||||||
|
{
|
||||||
|
long numberOfElements = endOfBin[ i ] - startOfBin[ i ];
|
||||||
|
if ( numberOfElements >= Threshold ) // endOfBin actually points to one beyond the bin
|
||||||
|
_RadixSort_Unsigned_PowerOf2Radix_1< _Type, PowerOfTwoRadix, Log2ofPowerOfTwoRadix, Threshold >( &a[ startOfBin[ i ]], numberOfElements - 1, bitMask, shiftRightAmount );
|
||||||
|
else if ( numberOfElements >= 2 )
|
||||||
|
rs_insertsort(&a[ startOfBin[ i ]], &a[ endOfBin[ i ]]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
inline void RadixSortInPlace_HybridUnsigned_Radix256( unsigned* a, unsigned long a_size )
|
||||||
|
{
|
||||||
|
if ( a_size < 2 ) return;
|
||||||
|
unsigned long bitMask = 0xFF000000; // bitMask controls how many bits we process at a time
|
||||||
|
unsigned long shiftRightAmount = 24;
|
||||||
|
if ( a_size >= 32 )
|
||||||
|
_RadixSort_Unsigned_PowerOf2Radix_1<unsigned, 256, 8, 32>(a, a_size - 1, bitMask, shiftRightAmount );
|
||||||
|
else
|
||||||
|
rs_insertsort(a, a + a_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct intcmp_t {
|
||||||
|
inline int operator() (int a, int b) const {
|
||||||
|
return a < b? -1 : a > b? 1 : 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
int compare_int(int a, int b)
|
||||||
|
{
|
||||||
|
return a < b? -1 : a > b? 1 : 0;
|
||||||
|
}
|
||||||
|
int compare(const void *a, const void *b)
|
||||||
|
{
|
||||||
|
return *((int*)a) - *((int*)b);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i, N = 50000000;
|
||||||
|
int *array, *temp;
|
||||||
|
clock_t t1, t2;
|
||||||
|
if (argc == 1) fprintf(stderr, "Usage: %s [%d]\n", argv[0], N);
|
||||||
|
if (argc > 1) N = atoi(argv[1]);
|
||||||
|
temp = (int*)malloc(sizeof(int) * N);
|
||||||
|
array = (int*)malloc(sizeof(int) * N);
|
||||||
|
|
||||||
|
srand48(11);
|
||||||
|
for (i = 0; i < N; ++i) array[i] = (int)lrand48();
|
||||||
|
t1 = clock();
|
||||||
|
rs_sort((unsigned*)array, (unsigned*)array + N, 8, 24);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "radix sort: %.3lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
for (i = 0; i < N-1; ++i) {
|
||||||
|
if (array[i] > array[i+1]) {
|
||||||
|
fprintf(stderr, "Bug in radix sort!\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t1 = clock();
|
||||||
|
rs_sort((unsigned*)array, (unsigned*)array + N, 8, 24);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "radix sort (sorted): %.3lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
|
||||||
|
srand48(11);
|
||||||
|
for (i = 0; i < N; ++i) array[i] = (int)lrand48();
|
||||||
|
t1 = clock();
|
||||||
|
RadixSortInPlace_HybridUnsigned_Radix256((unsigned*)array, N);
|
||||||
|
// radix_sort((unsigned*)array, 0, N, 24);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "vd's radix sort: %.3lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
for (i = 0; i < N-1; ++i) {
|
||||||
|
if (array[i] > array[i+1]) {
|
||||||
|
fprintf(stderr, "Bug in radix sort!\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t1 = clock();
|
||||||
|
RadixSortInPlace_HybridUnsigned_Radix256((unsigned*)array, N);
|
||||||
|
// radix_sort((unsigned*)array, 0, N, 24);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "vd's radix sort (sorted): %.3lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
|
||||||
|
srand48(11);
|
||||||
|
for (i = 0; i < N; ++i) array[i] = (int)lrand48();
|
||||||
|
t1 = clock();
|
||||||
|
sort(array, array+N);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "STL introsort: %.3lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
t1 = clock();
|
||||||
|
sort(array, array+N);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "STL introsort (sorted): %.3lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
|
||||||
|
srand48(11);
|
||||||
|
for (i = 0; i < N; ++i) array[i] = (int)lrand48();
|
||||||
|
t1 = clock();
|
||||||
|
stable_sort(array, array+N);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "STL stablesort: %.3lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
t1 = clock();
|
||||||
|
stable_sort(array, array+N);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "STL stablesort (sorted): %.3lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
|
||||||
|
srand48(11);
|
||||||
|
for (i = 0; i < N; ++i) array[i] = (int)lrand48();
|
||||||
|
t1 = clock();
|
||||||
|
make_heap(array, array+N);
|
||||||
|
sort_heap(array, array+N);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "STL heapsort: %.3lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
for (i = 0; i < N-1; ++i) {
|
||||||
|
if (array[i] > array[i+1]) {
|
||||||
|
fprintf(stderr, "Bug in heap_sort!\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t1 = clock();
|
||||||
|
make_heap(array, array+N);
|
||||||
|
sort_heap(array, array+N);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "STL heapsort (sorted): %.3lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
|
||||||
|
srand48(11);
|
||||||
|
for (i = 0; i < N; ++i) array[i] = (int)lrand48();
|
||||||
|
t1 = clock();
|
||||||
|
ks_combsort(int, N, array);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "combsort: %.3lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
for (i = 0; i < N-1; ++i) {
|
||||||
|
if (array[i] > array[i+1]) {
|
||||||
|
fprintf(stderr, "Bug in combsort!\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
srand48(11);
|
||||||
|
for (i = 0; i < N; ++i) array[i] = (int)lrand48();
|
||||||
|
t1 = clock();
|
||||||
|
qsort(array, N, sizeof(int), compare);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "libc qsort: %.3lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
|
||||||
|
srand48(11);
|
||||||
|
for (i = 0; i < N; ++i) array[i] = (int)lrand48();
|
||||||
|
t1 = clock();
|
||||||
|
ks_introsort(int, N, array);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "my introsort: %.3lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
for (i = 0; i < N-1; ++i) {
|
||||||
|
if (array[i] > array[i+1]) {
|
||||||
|
fprintf(stderr, "Bug in intro_sort!\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t1 = clock();
|
||||||
|
ks_introsort(int, N, array);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "introsort (sorted): %.3lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
|
||||||
|
srand48(11);
|
||||||
|
for (i = 0; i < N; ++i) array[i] = (int)lrand48();
|
||||||
|
t1 = clock();
|
||||||
|
ks_mergesort(int, N, array, 0);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "iterative mergesort: %.3lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
for (i = 0; i < N-1; ++i) {
|
||||||
|
if (array[i] > array[i+1]) {
|
||||||
|
fprintf(stderr, "Bug in merge_sort!\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t1 = clock();
|
||||||
|
ks_mergesort(int, N, array, 0);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "iterative mergesort (sorted): %.3lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
|
||||||
|
srand48(11);
|
||||||
|
for (i = 0; i < N; ++i) array[i] = (int)lrand48();
|
||||||
|
t1 = clock();
|
||||||
|
ks_heapmake(int, N, array);
|
||||||
|
ks_heapsort(int, N, array);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "my heapsort: %.3lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
for (i = 0; i < N-1; ++i) {
|
||||||
|
if (array[i] > array[i+1]) {
|
||||||
|
fprintf(stderr, "Bug in heap_sort!\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t1 = clock();
|
||||||
|
ks_heapmake(int, N, array);
|
||||||
|
ks_heapsort(int, N, array);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "heapsort (sorted): %.3lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
|
||||||
|
srand48(11);
|
||||||
|
for (i = 0; i < N; ++i) array[i] = (int)lrand48();
|
||||||
|
t1 = clock();
|
||||||
|
heapSort(array, N);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "Paul's heapsort: %.3lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
for (i = 0; i < N-1; ++i) {
|
||||||
|
if (array[i] > array[i+1]) {
|
||||||
|
fprintf(stderr, "Bug in intro_sort!\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
srand48(11);
|
||||||
|
for (i = 0; i < N; ++i) array[i] = (int)lrand48();
|
||||||
|
t1 = clock();
|
||||||
|
quickSort(array, N);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "Paul's quicksort: %.3lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
for (i = 0; i < N-1; ++i) {
|
||||||
|
if (array[i] > array[i+1]) {
|
||||||
|
fprintf(stderr, "Bug in intro_sort!\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
srand48(11);
|
||||||
|
for (i = 0; i < N; ++i) array[i] = (int)lrand48();
|
||||||
|
t1 = clock();
|
||||||
|
mergeSort(array, N);
|
||||||
|
t2 = clock();
|
||||||
|
fprintf(stderr, "Paul's mergesort: %.3lf\n", (double)(t2-t1)/CLOCKS_PER_SEC);
|
||||||
|
for (i = 0; i < N-1; ++i) {
|
||||||
|
if (array[i] > array[i+1]) {
|
||||||
|
fprintf(stderr, "Bug in intro_sort!\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
free(array); free(temp);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,51 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include "kstring.h"
|
||||||
|
|
||||||
|
#define N 10000000
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
clock_t t;
|
||||||
|
kstring_t s, s2;
|
||||||
|
srand48(11);
|
||||||
|
s.l = s.m = 0; s.s = 0;
|
||||||
|
t = clock();
|
||||||
|
for (i = 0; i < N; ++i) {
|
||||||
|
int x = lrand48();
|
||||||
|
s.l = 0;
|
||||||
|
kputw(x, &s);
|
||||||
|
}
|
||||||
|
fprintf(stderr, "kputw: %lf\n", (double)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
srand48(11);
|
||||||
|
t = clock();
|
||||||
|
for (i = 0; i < N; ++i) {
|
||||||
|
int x = lrand48();
|
||||||
|
s.l = 0;
|
||||||
|
ksprintf(&s, "%d", x);
|
||||||
|
}
|
||||||
|
fprintf(stderr, "ksprintf: %lf\n", (double)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
|
||||||
|
srand48(11);
|
||||||
|
s2.l = s2.m = 0; s2.s = 0;
|
||||||
|
t = clock();
|
||||||
|
for (i = 0; i < N; ++i) {
|
||||||
|
int x = lrand48();
|
||||||
|
s2.l = s.l = 0;
|
||||||
|
kputw(x, &s2);
|
||||||
|
kputs(s2.s, &s);
|
||||||
|
}
|
||||||
|
fprintf(stderr, "kputw+kputs: %lf\n", (double)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
srand48(11);
|
||||||
|
t = clock();
|
||||||
|
for (i = 0; i < N; ++i) {
|
||||||
|
int x = lrand48();
|
||||||
|
s2.l = s.l = 0;
|
||||||
|
kputw(x, &s2);
|
||||||
|
ksprintf(&s, "%s", s2.s);
|
||||||
|
}
|
||||||
|
fprintf(stderr, "kputw+ksprintf: %lf\n", (double)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,131 @@
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include "kstring.h"
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
#define HAVE_STRNSTR
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __linux__
|
||||||
|
#define HAVE_MEMMEM
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static int str_len = 1024*1024*128;
|
||||||
|
static int pat_len = 30;
|
||||||
|
static int alphabet = 2;
|
||||||
|
static int repeat = 50;
|
||||||
|
|
||||||
|
char *gen_data(int len, int a)
|
||||||
|
{
|
||||||
|
char *data;
|
||||||
|
int i;
|
||||||
|
long x;
|
||||||
|
srand48(11);
|
||||||
|
data = malloc(len);
|
||||||
|
for (i = 0; i < len; ++i)
|
||||||
|
data[i] = (int)(a * drand48()) + '!';
|
||||||
|
data[str_len - 1] = 0;
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
// http://srcvault.scali.eu.org/cgi-bin/Syntax/c/BoyerMoore.c
|
||||||
|
char *BoyerMoore( unsigned char *data, unsigned int dataLength, unsigned char *string, unsigned int strLength )
|
||||||
|
{
|
||||||
|
unsigned int skipTable[256], i;
|
||||||
|
unsigned char *search;
|
||||||
|
register unsigned char lastChar;
|
||||||
|
|
||||||
|
if (strLength == 0)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
for (i = 0; i < 256; i++)
|
||||||
|
skipTable[i] = strLength;
|
||||||
|
search = string;
|
||||||
|
i = --strLength;
|
||||||
|
do {
|
||||||
|
skipTable[*search++] = i;
|
||||||
|
} while (i--);
|
||||||
|
lastChar = *--search;
|
||||||
|
search = data + strLength;
|
||||||
|
dataLength -= strLength+(strLength-1);
|
||||||
|
while ((int)dataLength > 0 ) {
|
||||||
|
unsigned int skip;
|
||||||
|
skip = skipTable[*search];
|
||||||
|
search += skip;
|
||||||
|
dataLength -= skip;
|
||||||
|
skip = skipTable[*search];
|
||||||
|
search += skip;
|
||||||
|
dataLength -= skip;
|
||||||
|
skip = skipTable[*search];
|
||||||
|
if (*search != lastChar) {
|
||||||
|
search += skip;
|
||||||
|
dataLength -= skip;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
i = strLength;
|
||||||
|
do {
|
||||||
|
if (i-- == 0) return search;
|
||||||
|
} while (*--search == string[i]);
|
||||||
|
search += (strLength - i + 1);
|
||||||
|
dataLength--;
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
char *data;
|
||||||
|
int i;
|
||||||
|
clock_t t;
|
||||||
|
t = clock();
|
||||||
|
data = gen_data(str_len, alphabet);
|
||||||
|
fprintf(stderr, "Generate data in %.3f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
{
|
||||||
|
t = clock(); srand48(1331);
|
||||||
|
for (i = 0; i < repeat; ++i) {
|
||||||
|
int y = lrand48() % (str_len - pat_len);
|
||||||
|
char *ret;
|
||||||
|
ret = kmemmem(data, str_len, data + y, pat_len, 0);
|
||||||
|
// printf("%d, %d\n", (int)(ret - data), y);
|
||||||
|
}
|
||||||
|
fprintf(stderr, "Search patterns in %.3f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
}
|
||||||
|
if (1) {
|
||||||
|
t = clock(); srand48(1331);
|
||||||
|
for (i = 0; i < repeat; ++i) {
|
||||||
|
int y = lrand48() % (str_len - pat_len);
|
||||||
|
char *ret;
|
||||||
|
ret = BoyerMoore(data, str_len, data + y, pat_len);
|
||||||
|
// printf("%d, %d\n", (int)(ret - data), y);
|
||||||
|
}
|
||||||
|
fprintf(stderr, "Search patterns in %.3f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
}
|
||||||
|
#ifdef HAVE_STRNSTR
|
||||||
|
if (1) {
|
||||||
|
char *tmp;
|
||||||
|
t = clock(); srand48(1331);
|
||||||
|
tmp = calloc(pat_len+1, 1);
|
||||||
|
for (i = 0; i < repeat; ++i) {
|
||||||
|
int y = lrand48() % (str_len - pat_len);
|
||||||
|
char *ret;
|
||||||
|
memcpy(tmp, data + y, pat_len);
|
||||||
|
ret = strnstr(data, tmp, str_len);
|
||||||
|
}
|
||||||
|
fprintf(stderr, "Search patterns in %.3f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_MEMMEM
|
||||||
|
if (1) {
|
||||||
|
t = clock(); srand48(1331);
|
||||||
|
for (i = 0; i < repeat; ++i) {
|
||||||
|
int y = lrand48() % (str_len - pat_len);
|
||||||
|
char *ret;
|
||||||
|
ret = memmem(data, str_len, data + y, pat_len);
|
||||||
|
// printf("%d, %d\n", (int)(ret - data), y);
|
||||||
|
}
|
||||||
|
fprintf(stderr, "Search patterns in %.3f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,132 @@
|
||||||
|
#include <limits.h>
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "kstring.h"
|
||||||
|
|
||||||
|
int nfail = 0;
|
||||||
|
|
||||||
|
void check(const char *what, const kstring_t *ks, const char *correct)
|
||||||
|
{
|
||||||
|
if (ks->l != strlen(correct) || strcmp(ks->s, correct) != 0) {
|
||||||
|
fprintf(stderr, "%s produced \"%.*s\" (\"%s\" is correct)\tFAIL\n", what, (int)(ks->l), ks->s, correct);
|
||||||
|
nfail++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_kputw(kstring_t *ks, int n)
|
||||||
|
{
|
||||||
|
char buf[16];
|
||||||
|
|
||||||
|
ks->l = 0;
|
||||||
|
kputw(n, ks);
|
||||||
|
|
||||||
|
sprintf(buf, "%d", n);
|
||||||
|
check("kputw()", ks, buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_kputl(kstring_t *ks, long n)
|
||||||
|
{
|
||||||
|
char buf[24];
|
||||||
|
|
||||||
|
ks->l = 0;
|
||||||
|
kputl(n, ks);
|
||||||
|
|
||||||
|
sprintf(buf, "%ld", n);
|
||||||
|
check("kputl()", ks, buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
static char *mem_gets(char *buf, int buflen, void *vtextp)
|
||||||
|
{
|
||||||
|
const char **textp = (const char **) vtextp;
|
||||||
|
|
||||||
|
const char *nl = strchr(*textp, '\n');
|
||||||
|
size_t n = nl? nl - *textp + 1 : strlen(*textp);
|
||||||
|
|
||||||
|
if (n == 0) return NULL;
|
||||||
|
|
||||||
|
if (n > buflen-1) n = buflen-1;
|
||||||
|
memcpy(buf, *textp, n);
|
||||||
|
buf[n] = '\0';
|
||||||
|
*textp += n;
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_kgetline(kstring_t *ks, const char *text, ...)
|
||||||
|
{
|
||||||
|
const char *exp;
|
||||||
|
va_list arg;
|
||||||
|
|
||||||
|
va_start(arg, text);
|
||||||
|
while ((exp = va_arg(arg, const char *)) != NULL) {
|
||||||
|
ks->l = 0;
|
||||||
|
if (kgetline(ks, mem_gets, &text) != 0) kputs("EOF", ks);
|
||||||
|
check("kgetline()", ks, exp);
|
||||||
|
}
|
||||||
|
va_end(arg);
|
||||||
|
|
||||||
|
ks->l = 0;
|
||||||
|
if (kgetline(ks, mem_gets, &text) == 0) check("kgetline()", ks, "EOF");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
kstring_t ks;
|
||||||
|
|
||||||
|
ks.l = ks.m = 0;
|
||||||
|
ks.s = NULL;
|
||||||
|
|
||||||
|
test_kputw(&ks, 0);
|
||||||
|
test_kputw(&ks, 1);
|
||||||
|
test_kputw(&ks, 37);
|
||||||
|
test_kputw(&ks, 12345);
|
||||||
|
test_kputw(&ks, -12345);
|
||||||
|
test_kputw(&ks, INT_MAX);
|
||||||
|
test_kputw(&ks, -INT_MAX);
|
||||||
|
test_kputw(&ks, INT_MIN);
|
||||||
|
|
||||||
|
test_kputl(&ks, 0);
|
||||||
|
test_kputl(&ks, 1);
|
||||||
|
test_kputl(&ks, 37);
|
||||||
|
test_kputl(&ks, 12345);
|
||||||
|
test_kputl(&ks, -12345);
|
||||||
|
test_kputl(&ks, INT_MAX);
|
||||||
|
test_kputl(&ks, -INT_MAX);
|
||||||
|
test_kputl(&ks, INT_MIN);
|
||||||
|
test_kputl(&ks, LONG_MAX);
|
||||||
|
test_kputl(&ks, -LONG_MAX);
|
||||||
|
test_kputl(&ks, LONG_MIN);
|
||||||
|
|
||||||
|
test_kgetline(&ks, "", NULL);
|
||||||
|
test_kgetline(&ks, "apple", "apple", NULL);
|
||||||
|
test_kgetline(&ks, "banana\n", "banana", NULL);
|
||||||
|
test_kgetline(&ks, "carrot\r\n", "carrot", NULL);
|
||||||
|
test_kgetline(&ks, "\n", "", NULL);
|
||||||
|
test_kgetline(&ks, "\n\n", "", "", NULL);
|
||||||
|
test_kgetline(&ks, "foo\nbar", "foo", "bar", NULL);
|
||||||
|
test_kgetline(&ks, "foo\nbar\n", "foo", "bar", NULL);
|
||||||
|
test_kgetline(&ks,
|
||||||
|
"abcdefghijklmnopqrstuvwxyz0123456789\nABCDEFGHIJKLMNOPQRSTUVWXYZ\n",
|
||||||
|
"abcdefghijklmnopqrstuvwxyz0123456789",
|
||||||
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZ", NULL);
|
||||||
|
|
||||||
|
if (argc > 1) {
|
||||||
|
FILE *f = fopen(argv[1], "r");
|
||||||
|
if (f) {
|
||||||
|
for (ks.l = 0; kgetline(&ks, (kgets_func *)fgets, f) == 0; ks.l = 0)
|
||||||
|
puts(ks.s);
|
||||||
|
fclose(f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
free(ks.s);
|
||||||
|
|
||||||
|
if (nfail > 0) {
|
||||||
|
fprintf(stderr, "Total failures: %d\n", nfail);
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,69 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
#if HAVE_CILK
|
||||||
|
#include <cilk/cilk.h>
|
||||||
|
#include <cilk/cilk_api.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int max_iter, w, h;
|
||||||
|
double xmin, xmax, ymin, ymax;
|
||||||
|
int *k;
|
||||||
|
} global_t;
|
||||||
|
|
||||||
|
static void compute(void *_g, int i, int tid)
|
||||||
|
{
|
||||||
|
global_t *g = (global_t*)_g;
|
||||||
|
double x, x0 = g->xmin + (g->xmax - g->xmin) * (i%g->w) / g->w;
|
||||||
|
double y, y0 = g->ymin + (g->ymax - g->ymin) * (i/g->w) / g->h;
|
||||||
|
int k;
|
||||||
|
|
||||||
|
assert(g->k[i] < 0);
|
||||||
|
x = x0, y = y0;
|
||||||
|
for (k = 0; k < g->max_iter; ++k) {
|
||||||
|
double z = x * y;
|
||||||
|
x *= x; y *= y;
|
||||||
|
if (x + y >= 4) break;
|
||||||
|
x = x - y + x0;
|
||||||
|
y = z + z + y0;
|
||||||
|
}
|
||||||
|
g->k[i] = k;
|
||||||
|
}
|
||||||
|
|
||||||
|
void kt_for(int n_threads, int n_items, void (*func)(void*,int,int), void *data);
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i, tmp, tot, type = 0, n_threads = 2;
|
||||||
|
global_t global = { 10240*100, 800, 600, -2., -1.2, -1.2, 1.2, 0 };
|
||||||
|
// global_t global = { 10240*1, 8, 6, -2., -1.2, -1.2, 1.2, 0 };
|
||||||
|
|
||||||
|
if (argc > 1) {
|
||||||
|
type = argv[1][0] == 'o'? 2 : argv[1][0] == 'c'? 3 : argv[1][0] == 'n'? 1 : 0;
|
||||||
|
if (argv[1][0] >= '0' && argv[1][0] <= '9')
|
||||||
|
n_threads = atoi(argv[1]);
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "Usage: ./a.out [openmp | cilk | #threads]\n");
|
||||||
|
}
|
||||||
|
tot = global.w * global.h;
|
||||||
|
global.k = calloc(tot, sizeof(int));
|
||||||
|
for (i = 0; i < tot; ++i) global.k[i] = -1;
|
||||||
|
if (type == 0) {
|
||||||
|
kt_for(n_threads, tot, compute, &global);
|
||||||
|
} else if (type == 2) {
|
||||||
|
#pragma omp parallel for
|
||||||
|
for (i = 0; i < tot; ++i)
|
||||||
|
compute(&global, i, 0);
|
||||||
|
} else if (type == 3) {
|
||||||
|
#if HAVE_CILK
|
||||||
|
cilk_for (i = 0; i < tot; ++i)
|
||||||
|
compute(&global, i, 0);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
for (i = tmp = 0; i < tot; ++i) tmp += (global.k[i] < 0);
|
||||||
|
free(global.k);
|
||||||
|
assert(tmp == 0);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,80 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
void kt_for(int n_threads, void (*func)(void*,long,int), void *data, long n);
|
||||||
|
void kt_pipeline(int n_threads, void *(*func)(void*, int, void*), void *shared_data, int n_steps);
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
FILE *fp;
|
||||||
|
int max_lines, buf_size, n_threads;
|
||||||
|
char *buf;
|
||||||
|
} pipeline_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int n_lines;
|
||||||
|
char **lines;
|
||||||
|
} step_t;
|
||||||
|
|
||||||
|
static void worker_for(void *_data, long i, int tid) // kt_for() callback
|
||||||
|
{
|
||||||
|
step_t *step = (step_t*)_data;
|
||||||
|
char *s = step->lines[i];
|
||||||
|
int t, l, j;
|
||||||
|
l = strlen(s) - 1;
|
||||||
|
assert(s[l] == '\n'); // not supporting long lines
|
||||||
|
for (j = 0; j < l>>1; ++j)
|
||||||
|
t = s[j], s[j] = s[l - 1 - j], s[l - 1 - j] = t;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *worker_pipeline(void *shared, int step, void *in) // kt_pipeline() callback
|
||||||
|
{
|
||||||
|
pipeline_t *p = (pipeline_t*)shared;
|
||||||
|
if (step == 0) { // step 0: read lines into the buffer
|
||||||
|
step_t *s;
|
||||||
|
s = calloc(1, sizeof(step_t));
|
||||||
|
s->lines = calloc(p->max_lines, sizeof(char*));
|
||||||
|
while (fgets(p->buf, p->buf_size, p->fp) != 0) {
|
||||||
|
s->lines[s->n_lines] = strdup(p->buf);
|
||||||
|
if (++s->n_lines >= p->max_lines)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (s->n_lines) return s;
|
||||||
|
} else if (step == 1) { // step 1: reverse lines
|
||||||
|
kt_for(p->n_threads, worker_for, in, ((step_t*)in)->n_lines);
|
||||||
|
return in;
|
||||||
|
} else if (step == 2) { // step 3: write the buffer to output
|
||||||
|
step_t *s = (step_t*)in;
|
||||||
|
while (s->n_lines > 0) {
|
||||||
|
fputs(s->lines[--s->n_lines], stdout);
|
||||||
|
free(s->lines[s->n_lines]);
|
||||||
|
}
|
||||||
|
free(s->lines); free(s);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
pipeline_t pl;
|
||||||
|
int pl_threads;
|
||||||
|
if (argc == 1) {
|
||||||
|
fprintf(stderr, "Usage: reverse <in.txt> [pipeline_threads [for_threads]]\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
pl.fp = strcmp(argv[1], "-")? fopen(argv[1], "r") : stdin;
|
||||||
|
if (pl.fp == 0) {
|
||||||
|
fprintf(stderr, "ERROR: failed to open the input file.\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
pl_threads = argc > 2? atoi(argv[2]) : 3;
|
||||||
|
pl.max_lines = 4096;
|
||||||
|
pl.buf_size = 0x10000;
|
||||||
|
pl.n_threads = argc > 3? atoi(argv[3]) : 1;
|
||||||
|
pl.buf = calloc(pl.buf_size, 1);
|
||||||
|
kt_pipeline(pl_threads, worker_pipeline, &pl, 3);
|
||||||
|
free(pl.buf);
|
||||||
|
if (pl.fp != stdin) fclose(pl.fp);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,69 @@
|
||||||
|
#include <vector>
|
||||||
|
#include <time.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "kvec.h"
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
int M = 10, N = 20000000, i, j;
|
||||||
|
clock_t t;
|
||||||
|
t = clock();
|
||||||
|
for (i = 0; i < M; ++i) {
|
||||||
|
int *array = (int*)malloc(N * sizeof(int));
|
||||||
|
for (j = 0; j < N; ++j) array[j] = j;
|
||||||
|
free(array);
|
||||||
|
}
|
||||||
|
printf("C array, preallocated: %.3f sec\n",
|
||||||
|
(float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
t = clock();
|
||||||
|
for (i = 0; i < M; ++i) {
|
||||||
|
int *array = 0, max = 0;
|
||||||
|
for (j = 0; j < N; ++j) {
|
||||||
|
if (j == max) {
|
||||||
|
max = !max? 1 : max << 1;
|
||||||
|
array = (int*)realloc(array, sizeof(int)*max);
|
||||||
|
}
|
||||||
|
array[j] = j;
|
||||||
|
}
|
||||||
|
free(array);
|
||||||
|
}
|
||||||
|
printf("C array, dynamic: %.3f sec\n",
|
||||||
|
(float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
t = clock();
|
||||||
|
for (i = 0; i < M; ++i) {
|
||||||
|
kvec_t(int) array;
|
||||||
|
kv_init(array);
|
||||||
|
kv_resize(int, array, N);
|
||||||
|
for (j = 0; j < N; ++j) kv_a(int, array, j) = j;
|
||||||
|
kv_destroy(array);
|
||||||
|
}
|
||||||
|
printf("C vector, dynamic(kv_a): %.3f sec\n",
|
||||||
|
(float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
t = clock();
|
||||||
|
for (i = 0; i < M; ++i) {
|
||||||
|
kvec_t(int) array;
|
||||||
|
kv_init(array);
|
||||||
|
for (j = 0; j < N; ++j)
|
||||||
|
kv_push(int, array, j);
|
||||||
|
kv_destroy(array);
|
||||||
|
}
|
||||||
|
printf("C vector, dynamic(kv_push): %.3f sec\n",
|
||||||
|
(float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
t = clock();
|
||||||
|
for (i = 0; i < M; ++i) {
|
||||||
|
std::vector<int> array;
|
||||||
|
array.reserve(N);
|
||||||
|
for (j = 0; j < N; ++j) array[j] = j;
|
||||||
|
}
|
||||||
|
printf("C++ vector, preallocated: %.3f sec\n",
|
||||||
|
(float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
t = clock();
|
||||||
|
for (i = 0; i < M; ++i) {
|
||||||
|
std::vector<int> array;
|
||||||
|
for (j = 0; j < N; ++j) array.push_back(j);
|
||||||
|
}
|
||||||
|
printf("C++ vector, dynamic: %.3f sec\n",
|
||||||
|
(float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,416 @@
|
||||||
|
/**
|
||||||
|
* MIT License
|
||||||
|
*
|
||||||
|
* Copyright (c) 2017 Thibaut Goetghebuer-Planchon <tessil@gmx.com>
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*/
|
||||||
|
#ifndef TSL_ROBIN_GROWTH_POLICY_H
|
||||||
|
#define TSL_ROBIN_GROWTH_POLICY_H
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <array>
|
||||||
|
#include <climits>
|
||||||
|
#include <cmath>
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <iterator>
|
||||||
|
#include <limits>
|
||||||
|
#include <ratio>
|
||||||
|
#include <stdexcept>
|
||||||
|
|
||||||
|
// A change of the major version indicates an API and/or ABI break (change of
|
||||||
|
// in-memory layout of the data structure)
|
||||||
|
#define TSL_RH_VERSION_MAJOR 1
|
||||||
|
// A change of the minor version indicates the addition of a feature without
|
||||||
|
// impact on the API/ABI
|
||||||
|
#define TSL_RH_VERSION_MINOR 3
|
||||||
|
// A change of the patch version indicates a bugfix without additional
|
||||||
|
// functionality
|
||||||
|
#define TSL_RH_VERSION_PATCH 0
|
||||||
|
|
||||||
|
#ifdef TSL_DEBUG
|
||||||
|
#define tsl_rh_assert(expr) assert(expr)
|
||||||
|
#else
|
||||||
|
#define tsl_rh_assert(expr) (static_cast<void>(0))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If exceptions are enabled, throw the exception passed in parameter, otherwise
|
||||||
|
* call std::terminate.
|
||||||
|
*/
|
||||||
|
#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || \
|
||||||
|
(defined(_MSC_VER) && defined(_CPPUNWIND))) && \
|
||||||
|
!defined(TSL_NO_EXCEPTIONS)
|
||||||
|
#define TSL_RH_THROW_OR_TERMINATE(ex, msg) throw ex(msg)
|
||||||
|
#else
|
||||||
|
#define TSL_RH_NO_EXCEPTIONS
|
||||||
|
#ifdef TSL_DEBUG
|
||||||
|
#include <iostream>
|
||||||
|
#define TSL_RH_THROW_OR_TERMINATE(ex, msg) \
|
||||||
|
do { \
|
||||||
|
std::cerr << msg << std::endl; \
|
||||||
|
std::terminate(); \
|
||||||
|
} while (0)
|
||||||
|
#else
|
||||||
|
#define TSL_RH_THROW_OR_TERMINATE(ex, msg) std::terminate()
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__GNUC__) || defined(__clang__)
|
||||||
|
#define TSL_RH_LIKELY(exp) (__builtin_expect(!!(exp), true))
|
||||||
|
#else
|
||||||
|
#define TSL_RH_LIKELY(exp) (exp)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define TSL_RH_UNUSED(x) static_cast<void>(x)
|
||||||
|
|
||||||
|
namespace tsl {
|
||||||
|
namespace rh {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Grow the hash table by a factor of GrowthFactor keeping the bucket count to a
|
||||||
|
* power of two. It allows the table to use a mask operation instead of a modulo
|
||||||
|
* operation to map a hash to a bucket.
|
||||||
|
*
|
||||||
|
* GrowthFactor must be a power of two >= 2.
|
||||||
|
*/
|
||||||
|
template <std::size_t GrowthFactor>
|
||||||
|
class power_of_two_growth_policy {
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* Called on the hash table creation and on rehash. The number of buckets for
|
||||||
|
* the table is passed in parameter. This number is a minimum, the policy may
|
||||||
|
* update this value with a higher value if needed (but not lower).
|
||||||
|
*
|
||||||
|
* If 0 is given, min_bucket_count_in_out must still be 0 after the policy
|
||||||
|
* creation and bucket_for_hash must always return 0 in this case.
|
||||||
|
*/
|
||||||
|
explicit power_of_two_growth_policy(std::size_t& min_bucket_count_in_out) {
|
||||||
|
if (min_bucket_count_in_out > max_bucket_count()) {
|
||||||
|
TSL_RH_THROW_OR_TERMINATE(std::length_error,
|
||||||
|
"The hash table exceeds its maximum size.");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (min_bucket_count_in_out > 0) {
|
||||||
|
min_bucket_count_in_out =
|
||||||
|
round_up_to_power_of_two(min_bucket_count_in_out);
|
||||||
|
m_mask = min_bucket_count_in_out - 1;
|
||||||
|
} else {
|
||||||
|
m_mask = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the bucket [0, bucket_count()) to which the hash belongs.
|
||||||
|
* If bucket_count() is 0, it must always return 0.
|
||||||
|
*/
|
||||||
|
std::size_t bucket_for_hash(std::size_t hash) const noexcept {
|
||||||
|
return hash & m_mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the number of buckets that should be used on next growth.
|
||||||
|
*/
|
||||||
|
std::size_t next_bucket_count() const {
|
||||||
|
if ((m_mask + 1) > max_bucket_count() / GrowthFactor) {
|
||||||
|
TSL_RH_THROW_OR_TERMINATE(std::length_error,
|
||||||
|
"The hash table exceeds its maximum size.");
|
||||||
|
}
|
||||||
|
|
||||||
|
return (m_mask + 1) * GrowthFactor;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the maximum number of buckets supported by the policy.
|
||||||
|
*/
|
||||||
|
std::size_t max_bucket_count() const {
|
||||||
|
// Largest power of two.
|
||||||
|
return (std::numeric_limits<std::size_t>::max() / 2) + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reset the growth policy as if it was created with a bucket count of 0.
|
||||||
|
* After a clear, the policy must always return 0 when bucket_for_hash is
|
||||||
|
* called.
|
||||||
|
*/
|
||||||
|
void clear() noexcept { m_mask = 0; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
static std::size_t round_up_to_power_of_two(std::size_t value) {
|
||||||
|
if (is_power_of_two(value)) {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (value == 0) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
--value;
|
||||||
|
for (std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) {
|
||||||
|
value |= value >> i;
|
||||||
|
}
|
||||||
|
|
||||||
|
return value + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr bool is_power_of_two(std::size_t value) {
|
||||||
|
return value != 0 && (value & (value - 1)) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2,
|
||||||
|
"GrowthFactor must be a power of two >= 2.");
|
||||||
|
|
||||||
|
std::size_t m_mask;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo
|
||||||
|
* to map a hash to a bucket. Slower but it can be useful if you want a slower
|
||||||
|
* growth.
|
||||||
|
*/
|
||||||
|
template <class GrowthFactor = std::ratio<3, 2>>
|
||||||
|
class mod_growth_policy {
|
||||||
|
public:
|
||||||
|
explicit mod_growth_policy(std::size_t& min_bucket_count_in_out) {
|
||||||
|
if (min_bucket_count_in_out > max_bucket_count()) {
|
||||||
|
TSL_RH_THROW_OR_TERMINATE(std::length_error,
|
||||||
|
"The hash table exceeds its maximum size.");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (min_bucket_count_in_out > 0) {
|
||||||
|
m_mod = min_bucket_count_in_out;
|
||||||
|
} else {
|
||||||
|
m_mod = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t bucket_for_hash(std::size_t hash) const noexcept {
|
||||||
|
return hash % m_mod;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t next_bucket_count() const {
|
||||||
|
if (m_mod == max_bucket_count()) {
|
||||||
|
TSL_RH_THROW_OR_TERMINATE(std::length_error,
|
||||||
|
"The hash table exceeds its maximum size.");
|
||||||
|
}
|
||||||
|
|
||||||
|
const double next_bucket_count =
|
||||||
|
std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR);
|
||||||
|
if (!std::isnormal(next_bucket_count)) {
|
||||||
|
TSL_RH_THROW_OR_TERMINATE(std::length_error,
|
||||||
|
"The hash table exceeds its maximum size.");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (next_bucket_count > double(max_bucket_count())) {
|
||||||
|
return max_bucket_count();
|
||||||
|
} else {
|
||||||
|
return std::size_t(next_bucket_count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t max_bucket_count() const { return MAX_BUCKET_COUNT; }
|
||||||
|
|
||||||
|
void clear() noexcept { m_mod = 1; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR =
|
||||||
|
1.0 * GrowthFactor::num / GrowthFactor::den;
|
||||||
|
static const std::size_t MAX_BUCKET_COUNT =
|
||||||
|
std::size_t(double(std::numeric_limits<std::size_t>::max() /
|
||||||
|
REHASH_SIZE_MULTIPLICATION_FACTOR));
|
||||||
|
|
||||||
|
static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1,
|
||||||
|
"Growth factor should be >= 1.1.");
|
||||||
|
|
||||||
|
std::size_t m_mod;
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace detail {
|
||||||
|
|
||||||
|
#if SIZE_MAX >= ULLONG_MAX
|
||||||
|
#define TSL_RH_NB_PRIMES 51
|
||||||
|
#elif SIZE_MAX >= ULONG_MAX
|
||||||
|
#define TSL_RH_NB_PRIMES 40
|
||||||
|
#else
|
||||||
|
#define TSL_RH_NB_PRIMES 23
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static constexpr const std::array<std::size_t, TSL_RH_NB_PRIMES> PRIMES = {{
|
||||||
|
1u,
|
||||||
|
5u,
|
||||||
|
17u,
|
||||||
|
29u,
|
||||||
|
37u,
|
||||||
|
53u,
|
||||||
|
67u,
|
||||||
|
79u,
|
||||||
|
97u,
|
||||||
|
131u,
|
||||||
|
193u,
|
||||||
|
257u,
|
||||||
|
389u,
|
||||||
|
521u,
|
||||||
|
769u,
|
||||||
|
1031u,
|
||||||
|
1543u,
|
||||||
|
2053u,
|
||||||
|
3079u,
|
||||||
|
6151u,
|
||||||
|
12289u,
|
||||||
|
24593u,
|
||||||
|
49157u,
|
||||||
|
#if SIZE_MAX >= ULONG_MAX
|
||||||
|
98317ul,
|
||||||
|
196613ul,
|
||||||
|
393241ul,
|
||||||
|
786433ul,
|
||||||
|
1572869ul,
|
||||||
|
3145739ul,
|
||||||
|
6291469ul,
|
||||||
|
12582917ul,
|
||||||
|
25165843ul,
|
||||||
|
50331653ul,
|
||||||
|
100663319ul,
|
||||||
|
201326611ul,
|
||||||
|
402653189ul,
|
||||||
|
805306457ul,
|
||||||
|
1610612741ul,
|
||||||
|
3221225473ul,
|
||||||
|
4294967291ul,
|
||||||
|
#endif
|
||||||
|
#if SIZE_MAX >= ULLONG_MAX
|
||||||
|
6442450939ull,
|
||||||
|
12884901893ull,
|
||||||
|
25769803751ull,
|
||||||
|
51539607551ull,
|
||||||
|
103079215111ull,
|
||||||
|
206158430209ull,
|
||||||
|
412316860441ull,
|
||||||
|
824633720831ull,
|
||||||
|
1649267441651ull,
|
||||||
|
3298534883309ull,
|
||||||
|
6597069766657ull,
|
||||||
|
#endif
|
||||||
|
}};
|
||||||
|
|
||||||
|
template <unsigned int IPrime>
|
||||||
|
static constexpr std::size_t mod(std::size_t hash) {
|
||||||
|
return hash % PRIMES[IPrime];
|
||||||
|
}
|
||||||
|
|
||||||
|
// MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for
|
||||||
|
// faster modulo as the compiler can optimize the modulo code better with a
|
||||||
|
// constant known at the compilation.
|
||||||
|
static constexpr const std::array<std::size_t (*)(std::size_t),
|
||||||
|
TSL_RH_NB_PRIMES>
|
||||||
|
MOD_PRIME = {{
|
||||||
|
&mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>,
|
||||||
|
&mod<6>, &mod<7>, &mod<8>, &mod<9>, &mod<10>, &mod<11>,
|
||||||
|
&mod<12>, &mod<13>, &mod<14>, &mod<15>, &mod<16>, &mod<17>,
|
||||||
|
&mod<18>, &mod<19>, &mod<20>, &mod<21>, &mod<22>,
|
||||||
|
#if SIZE_MAX >= ULONG_MAX
|
||||||
|
&mod<23>, &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>,
|
||||||
|
&mod<29>, &mod<30>, &mod<31>, &mod<32>, &mod<33>, &mod<34>,
|
||||||
|
&mod<35>, &mod<36>, &mod<37>, &mod<38>, &mod<39>,
|
||||||
|
#endif
|
||||||
|
#if SIZE_MAX >= ULLONG_MAX
|
||||||
|
&mod<40>, &mod<41>, &mod<42>, &mod<43>, &mod<44>, &mod<45>,
|
||||||
|
&mod<46>, &mod<47>, &mod<48>, &mod<49>, &mod<50>,
|
||||||
|
#endif
|
||||||
|
}};
|
||||||
|
|
||||||
|
} // namespace detail
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Grow the hash table by using prime numbers as bucket count. Slower than
|
||||||
|
* tsl::rh::power_of_two_growth_policy in general but will probably distribute
|
||||||
|
* the values around better in the buckets with a poor hash function.
|
||||||
|
*
|
||||||
|
* To allow the compiler to optimize the modulo operation, a lookup table is
|
||||||
|
* used with constant primes numbers.
|
||||||
|
*
|
||||||
|
* With a switch the code would look like:
|
||||||
|
* \code
|
||||||
|
* switch(iprime) { // iprime is the current prime of the hash table
|
||||||
|
* case 0: hash % 5ul;
|
||||||
|
* break;
|
||||||
|
* case 1: hash % 17ul;
|
||||||
|
* break;
|
||||||
|
* case 2: hash % 29ul;
|
||||||
|
* break;
|
||||||
|
* ...
|
||||||
|
* }
|
||||||
|
* \endcode
|
||||||
|
*
|
||||||
|
* Due to the constant variable in the modulo the compiler is able to optimize
|
||||||
|
* the operation by a series of multiplications, substractions and shifts.
|
||||||
|
*
|
||||||
|
* The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34)
|
||||||
|
* * 5' in a 64 bits environment.
|
||||||
|
*/
|
||||||
|
class prime_growth_policy {
|
||||||
|
public:
|
||||||
|
explicit prime_growth_policy(std::size_t& min_bucket_count_in_out) {
|
||||||
|
auto it_prime = std::lower_bound(
|
||||||
|
detail::PRIMES.begin(), detail::PRIMES.end(), min_bucket_count_in_out);
|
||||||
|
if (it_prime == detail::PRIMES.end()) {
|
||||||
|
TSL_RH_THROW_OR_TERMINATE(std::length_error,
|
||||||
|
"The hash table exceeds its maximum size.");
|
||||||
|
}
|
||||||
|
|
||||||
|
m_iprime = static_cast<unsigned int>(
|
||||||
|
std::distance(detail::PRIMES.begin(), it_prime));
|
||||||
|
if (min_bucket_count_in_out > 0) {
|
||||||
|
min_bucket_count_in_out = *it_prime;
|
||||||
|
} else {
|
||||||
|
min_bucket_count_in_out = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t bucket_for_hash(std::size_t hash) const noexcept {
|
||||||
|
return detail::MOD_PRIME[m_iprime](hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t next_bucket_count() const {
|
||||||
|
if (m_iprime + 1 >= detail::PRIMES.size()) {
|
||||||
|
TSL_RH_THROW_OR_TERMINATE(std::length_error,
|
||||||
|
"The hash table exceeds its maximum size.");
|
||||||
|
}
|
||||||
|
|
||||||
|
return detail::PRIMES[m_iprime + 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t max_bucket_count() const { return detail::PRIMES.back(); }
|
||||||
|
|
||||||
|
void clear() noexcept { m_iprime = 0; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
unsigned int m_iprime;
|
||||||
|
|
||||||
|
static_assert(std::numeric_limits<decltype(m_iprime)>::max() >=
|
||||||
|
detail::PRIMES.size(),
|
||||||
|
"The type of m_iprime is not big enough.");
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace rh
|
||||||
|
} // namespace tsl
|
||||||
|
|
||||||
|
#endif
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,815 @@
|
||||||
|
/**
|
||||||
|
* MIT License
|
||||||
|
*
|
||||||
|
* Copyright (c) 2017 Thibaut Goetghebuer-Planchon <tessil@gmx.com>
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*/
|
||||||
|
#ifndef TSL_ROBIN_MAP_H
|
||||||
|
#define TSL_ROBIN_MAP_H
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <functional>
|
||||||
|
#include <initializer_list>
|
||||||
|
#include <memory>
|
||||||
|
#include <type_traits>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
#include "robin_hash.h"
|
||||||
|
|
||||||
|
namespace tsl {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implementation of a hash map using open-addressing and the robin hood hashing
|
||||||
|
* algorithm with backward shift deletion.
|
||||||
|
*
|
||||||
|
* For operations modifying the hash map (insert, erase, rehash, ...), the
|
||||||
|
* strong exception guarantee is only guaranteed when the expression
|
||||||
|
* `std::is_nothrow_swappable<std::pair<Key, T>>::value &&
|
||||||
|
* std::is_nothrow_move_constructible<std::pair<Key, T>>::value` is true,
|
||||||
|
* otherwise if an exception is thrown during the swap or the move, the hash map
|
||||||
|
* may end up in a undefined state. Per the standard a `Key` or `T` with a
|
||||||
|
* noexcept copy constructor and no move constructor also satisfies the
|
||||||
|
* `std::is_nothrow_move_constructible<std::pair<Key, T>>::value` criterion (and
|
||||||
|
* will thus guarantee the strong exception for the map).
|
||||||
|
*
|
||||||
|
* When `StoreHash` is true, 32 bits of the hash are stored alongside the
|
||||||
|
* values. It can improve the performance during lookups if the `KeyEqual`
|
||||||
|
* function takes time (if it engenders a cache-miss for example) as we then
|
||||||
|
* compare the stored hashes before comparing the keys. When
|
||||||
|
* `tsl::rh::power_of_two_growth_policy` is used as `GrowthPolicy`, it may also
|
||||||
|
* speed-up the rehash process as we can avoid to recalculate the hash. When it
|
||||||
|
* is detected that storing the hash will not incur any memory penalty due to
|
||||||
|
* alignment (i.e. `sizeof(tsl::detail_robin_hash::bucket_entry<ValueType,
|
||||||
|
* true>) == sizeof(tsl::detail_robin_hash::bucket_entry<ValueType, false>)`)
|
||||||
|
* and `tsl::rh::power_of_two_growth_policy` is used, the hash will be stored
|
||||||
|
* even if `StoreHash` is false so that we can speed-up the rehash (but it will
|
||||||
|
* not be used on lookups unless `StoreHash` is true).
|
||||||
|
*
|
||||||
|
* `GrowthPolicy` defines how the map grows and consequently how a hash value is
|
||||||
|
* mapped to a bucket. By default the map uses
|
||||||
|
* `tsl::rh::power_of_two_growth_policy`. This policy keeps the number of
|
||||||
|
* buckets to a power of two and uses a mask to map the hash to a bucket instead
|
||||||
|
* of the slow modulo. Other growth policies are available and you may define
|
||||||
|
* your own growth policy, check `tsl::rh::power_of_two_growth_policy` for the
|
||||||
|
* interface.
|
||||||
|
*
|
||||||
|
* `std::pair<Key, T>` must be swappable.
|
||||||
|
*
|
||||||
|
* `Key` and `T` must be copy and/or move constructible.
|
||||||
|
*
|
||||||
|
* If the destructor of `Key` or `T` throws an exception, the behaviour of the
|
||||||
|
* class is undefined.
|
||||||
|
*
|
||||||
|
* Iterators invalidation:
|
||||||
|
* - clear, operator=, reserve, rehash: always invalidate the iterators.
|
||||||
|
* - insert, emplace, emplace_hint, operator[]: if there is an effective
|
||||||
|
* insert, invalidate the iterators.
|
||||||
|
* - erase: always invalidate the iterators.
|
||||||
|
*/
|
||||||
|
template <class Key, class T, class Hash = std::hash<Key>,
|
||||||
|
class KeyEqual = std::equal_to<Key>,
|
||||||
|
class Allocator = std::allocator<std::pair<Key, T>>,
|
||||||
|
bool StoreHash = false,
|
||||||
|
class GrowthPolicy = tsl::rh::power_of_two_growth_policy<2>>
|
||||||
|
class robin_map {
|
||||||
|
private:
|
||||||
|
template <typename U>
|
||||||
|
using has_is_transparent = tsl::detail_robin_hash::has_is_transparent<U>;
|
||||||
|
|
||||||
|
class KeySelect {
|
||||||
|
public:
|
||||||
|
using key_type = Key;
|
||||||
|
|
||||||
|
const key_type& operator()(
|
||||||
|
const std::pair<Key, T>& key_value) const noexcept {
|
||||||
|
return key_value.first;
|
||||||
|
}
|
||||||
|
|
||||||
|
key_type& operator()(std::pair<Key, T>& key_value) noexcept {
|
||||||
|
return key_value.first;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class ValueSelect {
|
||||||
|
public:
|
||||||
|
using value_type = T;
|
||||||
|
|
||||||
|
const value_type& operator()(
|
||||||
|
const std::pair<Key, T>& key_value) const noexcept {
|
||||||
|
return key_value.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
value_type& operator()(std::pair<Key, T>& key_value) noexcept {
|
||||||
|
return key_value.second;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
using ht = detail_robin_hash::robin_hash<std::pair<Key, T>, KeySelect,
|
||||||
|
ValueSelect, Hash, KeyEqual,
|
||||||
|
Allocator, StoreHash, GrowthPolicy>;
|
||||||
|
|
||||||
|
public:
|
||||||
|
using key_type = typename ht::key_type;
|
||||||
|
using mapped_type = T;
|
||||||
|
using value_type = typename ht::value_type;
|
||||||
|
using size_type = typename ht::size_type;
|
||||||
|
using difference_type = typename ht::difference_type;
|
||||||
|
using hasher = typename ht::hasher;
|
||||||
|
using key_equal = typename ht::key_equal;
|
||||||
|
using allocator_type = typename ht::allocator_type;
|
||||||
|
using reference = typename ht::reference;
|
||||||
|
using const_reference = typename ht::const_reference;
|
||||||
|
using pointer = typename ht::pointer;
|
||||||
|
using const_pointer = typename ht::const_pointer;
|
||||||
|
using iterator = typename ht::iterator;
|
||||||
|
using const_iterator = typename ht::const_iterator;
|
||||||
|
|
||||||
|
public:
|
||||||
|
/*
|
||||||
|
* Constructors
|
||||||
|
*/
|
||||||
|
robin_map() : robin_map(ht::DEFAULT_INIT_BUCKETS_SIZE) {}
|
||||||
|
|
||||||
|
explicit robin_map(size_type bucket_count, const Hash& hash = Hash(),
|
||||||
|
const KeyEqual& equal = KeyEqual(),
|
||||||
|
const Allocator& alloc = Allocator())
|
||||||
|
: m_ht(bucket_count, hash, equal, alloc) {}
|
||||||
|
|
||||||
|
robin_map(size_type bucket_count, const Allocator& alloc)
|
||||||
|
: robin_map(bucket_count, Hash(), KeyEqual(), alloc) {}
|
||||||
|
|
||||||
|
robin_map(size_type bucket_count, const Hash& hash, const Allocator& alloc)
|
||||||
|
: robin_map(bucket_count, hash, KeyEqual(), alloc) {}
|
||||||
|
|
||||||
|
explicit robin_map(const Allocator& alloc)
|
||||||
|
: robin_map(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) {}
|
||||||
|
|
||||||
|
template <class InputIt>
|
||||||
|
robin_map(InputIt first, InputIt last,
|
||||||
|
size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE,
|
||||||
|
const Hash& hash = Hash(), const KeyEqual& equal = KeyEqual(),
|
||||||
|
const Allocator& alloc = Allocator())
|
||||||
|
: robin_map(bucket_count, hash, equal, alloc) {
|
||||||
|
insert(first, last);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class InputIt>
|
||||||
|
robin_map(InputIt first, InputIt last, size_type bucket_count,
|
||||||
|
const Allocator& alloc)
|
||||||
|
: robin_map(first, last, bucket_count, Hash(), KeyEqual(), alloc) {}
|
||||||
|
|
||||||
|
template <class InputIt>
|
||||||
|
robin_map(InputIt first, InputIt last, size_type bucket_count,
|
||||||
|
const Hash& hash, const Allocator& alloc)
|
||||||
|
: robin_map(first, last, bucket_count, hash, KeyEqual(), alloc) {}
|
||||||
|
|
||||||
|
robin_map(std::initializer_list<value_type> init,
|
||||||
|
size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE,
|
||||||
|
const Hash& hash = Hash(), const KeyEqual& equal = KeyEqual(),
|
||||||
|
const Allocator& alloc = Allocator())
|
||||||
|
: robin_map(init.begin(), init.end(), bucket_count, hash, equal, alloc) {}
|
||||||
|
|
||||||
|
robin_map(std::initializer_list<value_type> init, size_type bucket_count,
|
||||||
|
const Allocator& alloc)
|
||||||
|
: robin_map(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(),
|
||||||
|
alloc) {}
|
||||||
|
|
||||||
|
robin_map(std::initializer_list<value_type> init, size_type bucket_count,
|
||||||
|
const Hash& hash, const Allocator& alloc)
|
||||||
|
: robin_map(init.begin(), init.end(), bucket_count, hash, KeyEqual(),
|
||||||
|
alloc) {}
|
||||||
|
|
||||||
|
robin_map& operator=(std::initializer_list<value_type> ilist) {
|
||||||
|
m_ht.clear();
|
||||||
|
|
||||||
|
m_ht.reserve(ilist.size());
|
||||||
|
m_ht.insert(ilist.begin(), ilist.end());
|
||||||
|
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
allocator_type get_allocator() const { return m_ht.get_allocator(); }
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Iterators
|
||||||
|
*/
|
||||||
|
iterator begin() noexcept { return m_ht.begin(); }
|
||||||
|
const_iterator begin() const noexcept { return m_ht.begin(); }
|
||||||
|
const_iterator cbegin() const noexcept { return m_ht.cbegin(); }
|
||||||
|
|
||||||
|
iterator end() noexcept { return m_ht.end(); }
|
||||||
|
const_iterator end() const noexcept { return m_ht.end(); }
|
||||||
|
const_iterator cend() const noexcept { return m_ht.cend(); }
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Capacity
|
||||||
|
*/
|
||||||
|
bool empty() const noexcept { return m_ht.empty(); }
|
||||||
|
size_type size() const noexcept { return m_ht.size(); }
|
||||||
|
size_type max_size() const noexcept { return m_ht.max_size(); }
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Modifiers
|
||||||
|
*/
|
||||||
|
void clear() noexcept { m_ht.clear(); }
|
||||||
|
|
||||||
|
std::pair<iterator, bool> insert(const value_type& value) {
|
||||||
|
return m_ht.insert(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class P, typename std::enable_if<std::is_constructible<
|
||||||
|
value_type, P&&>::value>::type* = nullptr>
|
||||||
|
std::pair<iterator, bool> insert(P&& value) {
|
||||||
|
return m_ht.emplace(std::forward<P>(value));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<iterator, bool> insert(value_type&& value) {
|
||||||
|
return m_ht.insert(std::move(value));
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator insert(const_iterator hint, const value_type& value) {
|
||||||
|
return m_ht.insert_hint(hint, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class P, typename std::enable_if<std::is_constructible<
|
||||||
|
value_type, P&&>::value>::type* = nullptr>
|
||||||
|
iterator insert(const_iterator hint, P&& value) {
|
||||||
|
return m_ht.emplace_hint(hint, std::forward<P>(value));
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator insert(const_iterator hint, value_type&& value) {
|
||||||
|
return m_ht.insert_hint(hint, std::move(value));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class InputIt>
|
||||||
|
void insert(InputIt first, InputIt last) {
|
||||||
|
m_ht.insert(first, last);
|
||||||
|
}
|
||||||
|
|
||||||
|
void insert(std::initializer_list<value_type> ilist) {
|
||||||
|
m_ht.insert(ilist.begin(), ilist.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class M>
|
||||||
|
std::pair<iterator, bool> insert_or_assign(const key_type& k, M&& obj) {
|
||||||
|
return m_ht.insert_or_assign(k, std::forward<M>(obj));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class M>
|
||||||
|
std::pair<iterator, bool> insert_or_assign(key_type&& k, M&& obj) {
|
||||||
|
return m_ht.insert_or_assign(std::move(k), std::forward<M>(obj));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class M>
|
||||||
|
iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj) {
|
||||||
|
return m_ht.insert_or_assign(hint, k, std::forward<M>(obj));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class M>
|
||||||
|
iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj) {
|
||||||
|
return m_ht.insert_or_assign(hint, std::move(k), std::forward<M>(obj));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Due to the way elements are stored, emplace will need to move or copy the
|
||||||
|
* key-value once. The method is equivalent to
|
||||||
|
* insert(value_type(std::forward<Args>(args)...));
|
||||||
|
*
|
||||||
|
* Mainly here for compatibility with the std::unordered_map interface.
|
||||||
|
*/
|
||||||
|
template <class... Args>
|
||||||
|
std::pair<iterator, bool> emplace(Args&&... args) {
|
||||||
|
return m_ht.emplace(std::forward<Args>(args)...);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Due to the way elements are stored, emplace_hint will need to move or copy
|
||||||
|
* the key-value once. The method is equivalent to insert(hint,
|
||||||
|
* value_type(std::forward<Args>(args)...));
|
||||||
|
*
|
||||||
|
* Mainly here for compatibility with the std::unordered_map interface.
|
||||||
|
*/
|
||||||
|
template <class... Args>
|
||||||
|
iterator emplace_hint(const_iterator hint, Args&&... args) {
|
||||||
|
return m_ht.emplace_hint(hint, std::forward<Args>(args)...);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class... Args>
|
||||||
|
std::pair<iterator, bool> try_emplace(const key_type& k, Args&&... args) {
|
||||||
|
return m_ht.try_emplace(k, std::forward<Args>(args)...);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class... Args>
|
||||||
|
std::pair<iterator, bool> try_emplace(key_type&& k, Args&&... args) {
|
||||||
|
return m_ht.try_emplace(std::move(k), std::forward<Args>(args)...);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class... Args>
|
||||||
|
iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args) {
|
||||||
|
return m_ht.try_emplace_hint(hint, k, std::forward<Args>(args)...);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class... Args>
|
||||||
|
iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args) {
|
||||||
|
return m_ht.try_emplace_hint(hint, std::move(k),
|
||||||
|
std::forward<Args>(args)...);
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator erase(iterator pos) { return m_ht.erase(pos); }
|
||||||
|
iterator erase(const_iterator pos) { return m_ht.erase(pos); }
|
||||||
|
iterator erase(const_iterator first, const_iterator last) {
|
||||||
|
return m_ht.erase(first, last);
|
||||||
|
}
|
||||||
|
size_type erase(const key_type& key) { return m_ht.erase(key); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Erase the element at position 'pos'. In contrast to the regular erase()
|
||||||
|
* function, erase_fast() does not return an iterator. This allows it to be
|
||||||
|
* faster especially in hash tables with a low load factor, where finding the
|
||||||
|
* next nonempty bucket would be costly.
|
||||||
|
*/
|
||||||
|
void erase_fast(iterator pos) { return m_ht.erase_fast(pos); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use the hash value 'precalculated_hash' instead of hashing the key. The
|
||||||
|
* hash value should be the same as hash_function()(key). Useful to speed-up
|
||||||
|
* the lookup to the value if you already have the hash.
|
||||||
|
*/
|
||||||
|
size_type erase(const key_type& key, std::size_t precalculated_hash) {
|
||||||
|
return m_ht.erase(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This overload only participates in the overload resolution if the typedef
|
||||||
|
* KeyEqual::is_transparent exists. If so, K must be hashable and comparable
|
||||||
|
* to Key.
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
size_type erase(const K& key) {
|
||||||
|
return m_ht.erase(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @copydoc erase(const K& key)
|
||||||
|
*
|
||||||
|
* Use the hash value 'precalculated_hash' instead of hashing the key. The
|
||||||
|
* hash value should be the same as hash_function()(key). Useful to speed-up
|
||||||
|
* the lookup to the value if you already have the hash.
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
size_type erase(const K& key, std::size_t precalculated_hash) {
|
||||||
|
return m_ht.erase(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
void swap(robin_map& other) { other.m_ht.swap(m_ht); }
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Lookup
|
||||||
|
*/
|
||||||
|
T& at(const Key& key) { return m_ht.at(key); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use the hash value 'precalculated_hash' instead of hashing the key. The
|
||||||
|
* hash value should be the same as hash_function()(key). Useful to speed-up
|
||||||
|
* the lookup if you already have the hash.
|
||||||
|
*/
|
||||||
|
T& at(const Key& key, std::size_t precalculated_hash) {
|
||||||
|
return m_ht.at(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
const T& at(const Key& key) const { return m_ht.at(key); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @copydoc at(const Key& key, std::size_t precalculated_hash)
|
||||||
|
*/
|
||||||
|
const T& at(const Key& key, std::size_t precalculated_hash) const {
|
||||||
|
return m_ht.at(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This overload only participates in the overload resolution if the typedef
|
||||||
|
* KeyEqual::is_transparent exists. If so, K must be hashable and comparable
|
||||||
|
* to Key.
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
T& at(const K& key) {
|
||||||
|
return m_ht.at(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @copydoc at(const K& key)
|
||||||
|
*
|
||||||
|
* Use the hash value 'precalculated_hash' instead of hashing the key. The
|
||||||
|
* hash value should be the same as hash_function()(key). Useful to speed-up
|
||||||
|
* the lookup if you already have the hash.
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
T& at(const K& key, std::size_t precalculated_hash) {
|
||||||
|
return m_ht.at(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @copydoc at(const K& key)
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
const T& at(const K& key) const {
|
||||||
|
return m_ht.at(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @copydoc at(const K& key, std::size_t precalculated_hash)
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
const T& at(const K& key, std::size_t precalculated_hash) const {
|
||||||
|
return m_ht.at(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
T& operator[](const Key& key) { return m_ht[key]; }
|
||||||
|
T& operator[](Key&& key) { return m_ht[std::move(key)]; }
|
||||||
|
|
||||||
|
size_type count(const Key& key) const { return m_ht.count(key); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use the hash value 'precalculated_hash' instead of hashing the key. The
|
||||||
|
* hash value should be the same as hash_function()(key). Useful to speed-up
|
||||||
|
* the lookup if you already have the hash.
|
||||||
|
*/
|
||||||
|
size_type count(const Key& key, std::size_t precalculated_hash) const {
|
||||||
|
return m_ht.count(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This overload only participates in the overload resolution if the typedef
|
||||||
|
* KeyEqual::is_transparent exists. If so, K must be hashable and comparable
|
||||||
|
* to Key.
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
size_type count(const K& key) const {
|
||||||
|
return m_ht.count(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @copydoc count(const K& key) const
|
||||||
|
*
|
||||||
|
* Use the hash value 'precalculated_hash' instead of hashing the key. The
|
||||||
|
* hash value should be the same as hash_function()(key). Useful to speed-up
|
||||||
|
* the lookup if you already have the hash.
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
size_type count(const K& key, std::size_t precalculated_hash) const {
|
||||||
|
return m_ht.count(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator find(const Key& key) { return m_ht.find(key); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use the hash value 'precalculated_hash' instead of hashing the key. The
|
||||||
|
* hash value should be the same as hash_function()(key). Useful to speed-up
|
||||||
|
* the lookup if you already have the hash.
|
||||||
|
*/
|
||||||
|
iterator find(const Key& key, std::size_t precalculated_hash) {
|
||||||
|
return m_ht.find(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
const_iterator find(const Key& key) const { return m_ht.find(key); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @copydoc find(const Key& key, std::size_t precalculated_hash)
|
||||||
|
*/
|
||||||
|
const_iterator find(const Key& key, std::size_t precalculated_hash) const {
|
||||||
|
return m_ht.find(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This overload only participates in the overload resolution if the typedef
|
||||||
|
* KeyEqual::is_transparent exists. If so, K must be hashable and comparable
|
||||||
|
* to Key.
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
iterator find(const K& key) {
|
||||||
|
return m_ht.find(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @copydoc find(const K& key)
|
||||||
|
*
|
||||||
|
* Use the hash value 'precalculated_hash' instead of hashing the key. The
|
||||||
|
* hash value should be the same as hash_function()(key). Useful to speed-up
|
||||||
|
* the lookup if you already have the hash.
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
iterator find(const K& key, std::size_t precalculated_hash) {
|
||||||
|
return m_ht.find(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @copydoc find(const K& key)
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
const_iterator find(const K& key) const {
|
||||||
|
return m_ht.find(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @copydoc find(const K& key)
|
||||||
|
*
|
||||||
|
* Use the hash value 'precalculated_hash' instead of hashing the key. The
|
||||||
|
* hash value should be the same as hash_function()(key). Useful to speed-up
|
||||||
|
* the lookup if you already have the hash.
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
const_iterator find(const K& key, std::size_t precalculated_hash) const {
|
||||||
|
return m_ht.find(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool contains(const Key& key) const { return m_ht.contains(key); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use the hash value 'precalculated_hash' instead of hashing the key. The
|
||||||
|
* hash value should be the same as hash_function()(key). Useful to speed-up
|
||||||
|
* the lookup if you already have the hash.
|
||||||
|
*/
|
||||||
|
bool contains(const Key& key, std::size_t precalculated_hash) const {
|
||||||
|
return m_ht.contains(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This overload only participates in the overload resolution if the typedef
|
||||||
|
* KeyEqual::is_transparent exists. If so, K must be hashable and comparable
|
||||||
|
* to Key.
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
bool contains(const K& key) const {
|
||||||
|
return m_ht.contains(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @copydoc contains(const K& key) const
|
||||||
|
*
|
||||||
|
* Use the hash value 'precalculated_hash' instead of hashing the key. The
|
||||||
|
* hash value should be the same as hash_function()(key). Useful to speed-up
|
||||||
|
* the lookup if you already have the hash.
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
bool contains(const K& key, std::size_t precalculated_hash) const {
|
||||||
|
return m_ht.contains(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<iterator, iterator> equal_range(const Key& key) {
|
||||||
|
return m_ht.equal_range(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use the hash value 'precalculated_hash' instead of hashing the key. The
|
||||||
|
* hash value should be the same as hash_function()(key). Useful to speed-up
|
||||||
|
* the lookup if you already have the hash.
|
||||||
|
*/
|
||||||
|
std::pair<iterator, iterator> equal_range(const Key& key,
|
||||||
|
std::size_t precalculated_hash) {
|
||||||
|
return m_ht.equal_range(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<const_iterator, const_iterator> equal_range(const Key& key) const {
|
||||||
|
return m_ht.equal_range(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @copydoc equal_range(const Key& key, std::size_t precalculated_hash)
|
||||||
|
*/
|
||||||
|
std::pair<const_iterator, const_iterator> equal_range(
|
||||||
|
const Key& key, std::size_t precalculated_hash) const {
|
||||||
|
return m_ht.equal_range(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This overload only participates in the overload resolution if the typedef
|
||||||
|
* KeyEqual::is_transparent exists. If so, K must be hashable and comparable
|
||||||
|
* to Key.
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
std::pair<iterator, iterator> equal_range(const K& key) {
|
||||||
|
return m_ht.equal_range(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @copydoc equal_range(const K& key)
|
||||||
|
*
|
||||||
|
* Use the hash value 'precalculated_hash' instead of hashing the key. The
|
||||||
|
* hash value should be the same as hash_function()(key). Useful to speed-up
|
||||||
|
* the lookup if you already have the hash.
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
std::pair<iterator, iterator> equal_range(const K& key,
|
||||||
|
std::size_t precalculated_hash) {
|
||||||
|
return m_ht.equal_range(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @copydoc equal_range(const K& key)
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
std::pair<const_iterator, const_iterator> equal_range(const K& key) const {
|
||||||
|
return m_ht.equal_range(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @copydoc equal_range(const K& key, std::size_t precalculated_hash)
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
std::pair<const_iterator, const_iterator> equal_range(
|
||||||
|
const K& key, std::size_t precalculated_hash) const {
|
||||||
|
return m_ht.equal_range(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Bucket interface
|
||||||
|
*/
|
||||||
|
size_type bucket_count() const { return m_ht.bucket_count(); }
|
||||||
|
size_type max_bucket_count() const { return m_ht.max_bucket_count(); }
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Hash policy
|
||||||
|
*/
|
||||||
|
float load_factor() const { return m_ht.load_factor(); }
|
||||||
|
|
||||||
|
float min_load_factor() const { return m_ht.min_load_factor(); }
|
||||||
|
float max_load_factor() const { return m_ht.max_load_factor(); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the `min_load_factor` to `ml`. When the `load_factor` of the map goes
|
||||||
|
* below `min_load_factor` after some erase operations, the map will be
|
||||||
|
* shrunk when an insertion occurs. The erase method itself never shrinks
|
||||||
|
* the map.
|
||||||
|
*
|
||||||
|
* The default value of `min_load_factor` is 0.0f, the map never shrinks by
|
||||||
|
* default.
|
||||||
|
*/
|
||||||
|
void min_load_factor(float ml) { m_ht.min_load_factor(ml); }
|
||||||
|
void max_load_factor(float ml) { m_ht.max_load_factor(ml); }
|
||||||
|
|
||||||
|
void rehash(size_type count_) { m_ht.rehash(count_); }
|
||||||
|
void reserve(size_type count_) { m_ht.reserve(count_); }
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Observers
|
||||||
|
*/
|
||||||
|
hasher hash_function() const { return m_ht.hash_function(); }
|
||||||
|
key_equal key_eq() const { return m_ht.key_eq(); }
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Other
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert a const_iterator to an iterator.
|
||||||
|
*/
|
||||||
|
iterator mutable_iterator(const_iterator pos) {
|
||||||
|
return m_ht.mutable_iterator(pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Serialize the map through the `serializer` parameter.
|
||||||
|
*
|
||||||
|
* The `serializer` parameter must be a function object that supports the
|
||||||
|
* following call:
|
||||||
|
* - `template<typename U> void operator()(const U& value);` where the types
|
||||||
|
* `std::int16_t`, `std::uint32_t`, `std::uint64_t`, `float` and
|
||||||
|
* `std::pair<Key, T>` must be supported for U.
|
||||||
|
*
|
||||||
|
* The implementation leaves binary compatibility (endianness, IEEE 754 for
|
||||||
|
* floats, ...) of the types it serializes in the hands of the `Serializer`
|
||||||
|
* function object if compatibility is required.
|
||||||
|
*/
|
||||||
|
template <class Serializer>
|
||||||
|
void serialize(Serializer& serializer) const {
|
||||||
|
m_ht.serialize(serializer);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Deserialize a previously serialized map through the `deserializer`
|
||||||
|
* parameter.
|
||||||
|
*
|
||||||
|
* The `deserializer` parameter must be a function object that supports the
|
||||||
|
* following call:
|
||||||
|
* - `template<typename U> U operator()();` where the types `std::int16_t`,
|
||||||
|
* `std::uint32_t`, `std::uint64_t`, `float` and `std::pair<Key, T>` must be
|
||||||
|
* supported for U.
|
||||||
|
*
|
||||||
|
* If the deserialized hash map type is hash compatible with the serialized
|
||||||
|
* map, the deserialization process can be sped up by setting
|
||||||
|
* `hash_compatible` to true. To be hash compatible, the Hash, KeyEqual and
|
||||||
|
* GrowthPolicy must behave the same way than the ones used on the serialized
|
||||||
|
* map and the StoreHash must have the same value. The `std::size_t` must also
|
||||||
|
* be of the same size as the one on the platform used to serialize the map.
|
||||||
|
* If these criteria are not met, the behaviour is undefined with
|
||||||
|
* `hash_compatible` sets to true.
|
||||||
|
*
|
||||||
|
* The behaviour is undefined if the type `Key` and `T` of the `robin_map` are
|
||||||
|
* not the same as the types used during serialization.
|
||||||
|
*
|
||||||
|
* The implementation leaves binary compatibility (endianness, IEEE 754 for
|
||||||
|
* floats, size of int, ...) of the types it deserializes in the hands of the
|
||||||
|
* `Deserializer` function object if compatibility is required.
|
||||||
|
*/
|
||||||
|
template <class Deserializer>
|
||||||
|
static robin_map deserialize(Deserializer& deserializer,
|
||||||
|
bool hash_compatible = false) {
|
||||||
|
robin_map map(0);
|
||||||
|
map.m_ht.deserialize(deserializer, hash_compatible);
|
||||||
|
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
friend bool operator==(const robin_map& lhs, const robin_map& rhs) {
|
||||||
|
if (lhs.size() != rhs.size()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto& element_lhs : lhs) {
|
||||||
|
const auto it_element_rhs = rhs.find(element_lhs.first);
|
||||||
|
if (it_element_rhs == rhs.cend() ||
|
||||||
|
element_lhs.second != it_element_rhs->second) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
friend bool operator!=(const robin_map& lhs, const robin_map& rhs) {
|
||||||
|
return !operator==(lhs, rhs);
|
||||||
|
}
|
||||||
|
|
||||||
|
friend void swap(robin_map& lhs, robin_map& rhs) { lhs.swap(rhs); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
ht m_ht;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Same as `tsl::robin_map<Key, T, Hash, KeyEqual, Allocator, StoreHash,
|
||||||
|
* tsl::rh::prime_growth_policy>`.
|
||||||
|
*/
|
||||||
|
template <class Key, class T, class Hash = std::hash<Key>,
|
||||||
|
class KeyEqual = std::equal_to<Key>,
|
||||||
|
class Allocator = std::allocator<std::pair<Key, T>>,
|
||||||
|
bool StoreHash = false>
|
||||||
|
using robin_pg_map = robin_map<Key, T, Hash, KeyEqual, Allocator, StoreHash,
|
||||||
|
tsl::rh::prime_growth_policy>;
|
||||||
|
|
||||||
|
} // end namespace tsl
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,668 @@
|
||||||
|
/**
|
||||||
|
* MIT License
|
||||||
|
*
|
||||||
|
* Copyright (c) 2017 Thibaut Goetghebuer-Planchon <tessil@gmx.com>
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*/
|
||||||
|
#ifndef TSL_ROBIN_SET_H
|
||||||
|
#define TSL_ROBIN_SET_H
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <functional>
|
||||||
|
#include <initializer_list>
|
||||||
|
#include <memory>
|
||||||
|
#include <type_traits>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
#include "robin_hash.h"
|
||||||
|
|
||||||
|
namespace tsl {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implementation of a hash set using open-addressing and the robin hood hashing
|
||||||
|
* algorithm with backward shift deletion.
|
||||||
|
*
|
||||||
|
* For operations modifying the hash set (insert, erase, rehash, ...), the
|
||||||
|
* strong exception guarantee is only guaranteed when the expression
|
||||||
|
* `std::is_nothrow_swappable<Key>::value &&
|
||||||
|
* std::is_nothrow_move_constructible<Key>::value` is true, otherwise if an
|
||||||
|
* exception is thrown during the swap or the move, the hash set may end up in a
|
||||||
|
* undefined state. Per the standard a `Key` with a noexcept copy constructor
|
||||||
|
* and no move constructor also satisfies the
|
||||||
|
* `std::is_nothrow_move_constructible<Key>::value` criterion (and will thus
|
||||||
|
* guarantee the strong exception for the set).
|
||||||
|
*
|
||||||
|
* When `StoreHash` is true, 32 bits of the hash are stored alongside the
|
||||||
|
* values. It can improve the performance during lookups if the `KeyEqual`
|
||||||
|
* function takes time (or engenders a cache-miss for example) as we then
|
||||||
|
* compare the stored hashes before comparing the keys. When
|
||||||
|
* `tsl::rh::power_of_two_growth_policy` is used as `GrowthPolicy`, it may also
|
||||||
|
* speed-up the rehash process as we can avoid to recalculate the hash. When it
|
||||||
|
* is detected that storing the hash will not incur any memory penalty due to
|
||||||
|
* alignment (i.e. `sizeof(tsl::detail_robin_hash::bucket_entry<ValueType,
|
||||||
|
* true>) == sizeof(tsl::detail_robin_hash::bucket_entry<ValueType, false>)`)
|
||||||
|
* and `tsl::rh::power_of_two_growth_policy` is used, the hash will be stored
|
||||||
|
* even if `StoreHash` is false so that we can speed-up the rehash (but it will
|
||||||
|
* not be used on lookups unless `StoreHash` is true).
|
||||||
|
*
|
||||||
|
* `GrowthPolicy` defines how the set grows and consequently how a hash value is
|
||||||
|
* mapped to a bucket. By default the set uses
|
||||||
|
* `tsl::rh::power_of_two_growth_policy`. This policy keeps the number of
|
||||||
|
* buckets to a power of two and uses a mask to set the hash to a bucket instead
|
||||||
|
* of the slow modulo. Other growth policies are available and you may define
|
||||||
|
* your own growth policy, check `tsl::rh::power_of_two_growth_policy` for the
|
||||||
|
* interface.
|
||||||
|
*
|
||||||
|
* `Key` must be swappable.
|
||||||
|
*
|
||||||
|
* `Key` must be copy and/or move constructible.
|
||||||
|
*
|
||||||
|
* If the destructor of `Key` throws an exception, the behaviour of the class is
|
||||||
|
* undefined.
|
||||||
|
*
|
||||||
|
* Iterators invalidation:
|
||||||
|
* - clear, operator=, reserve, rehash: always invalidate the iterators.
|
||||||
|
* - insert, emplace, emplace_hint, operator[]: if there is an effective
|
||||||
|
* insert, invalidate the iterators.
|
||||||
|
* - erase: always invalidate the iterators.
|
||||||
|
*/
|
||||||
|
template <class Key, class Hash = std::hash<Key>,
|
||||||
|
class KeyEqual = std::equal_to<Key>,
|
||||||
|
class Allocator = std::allocator<Key>, bool StoreHash = false,
|
||||||
|
class GrowthPolicy = tsl::rh::power_of_two_growth_policy<2>>
|
||||||
|
class robin_set {
|
||||||
|
private:
|
||||||
|
template <typename U>
|
||||||
|
using has_is_transparent = tsl::detail_robin_hash::has_is_transparent<U>;
|
||||||
|
|
||||||
|
class KeySelect {
|
||||||
|
public:
|
||||||
|
using key_type = Key;
|
||||||
|
|
||||||
|
const key_type& operator()(const Key& key) const noexcept { return key; }
|
||||||
|
|
||||||
|
key_type& operator()(Key& key) noexcept { return key; }
|
||||||
|
};
|
||||||
|
|
||||||
|
using ht = detail_robin_hash::robin_hash<Key, KeySelect, void, Hash, KeyEqual,
|
||||||
|
Allocator, StoreHash, GrowthPolicy>;
|
||||||
|
|
||||||
|
public:
|
||||||
|
using key_type = typename ht::key_type;
|
||||||
|
using value_type = typename ht::value_type;
|
||||||
|
using size_type = typename ht::size_type;
|
||||||
|
using difference_type = typename ht::difference_type;
|
||||||
|
using hasher = typename ht::hasher;
|
||||||
|
using key_equal = typename ht::key_equal;
|
||||||
|
using allocator_type = typename ht::allocator_type;
|
||||||
|
using reference = typename ht::reference;
|
||||||
|
using const_reference = typename ht::const_reference;
|
||||||
|
using pointer = typename ht::pointer;
|
||||||
|
using const_pointer = typename ht::const_pointer;
|
||||||
|
using iterator = typename ht::iterator;
|
||||||
|
using const_iterator = typename ht::const_iterator;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Constructors
|
||||||
|
*/
|
||||||
|
robin_set() : robin_set(ht::DEFAULT_INIT_BUCKETS_SIZE) {}
|
||||||
|
|
||||||
|
explicit robin_set(size_type bucket_count, const Hash& hash = Hash(),
|
||||||
|
const KeyEqual& equal = KeyEqual(),
|
||||||
|
const Allocator& alloc = Allocator())
|
||||||
|
: m_ht(bucket_count, hash, equal, alloc) {}
|
||||||
|
|
||||||
|
robin_set(size_type bucket_count, const Allocator& alloc)
|
||||||
|
: robin_set(bucket_count, Hash(), KeyEqual(), alloc) {}
|
||||||
|
|
||||||
|
robin_set(size_type bucket_count, const Hash& hash, const Allocator& alloc)
|
||||||
|
: robin_set(bucket_count, hash, KeyEqual(), alloc) {}
|
||||||
|
|
||||||
|
explicit robin_set(const Allocator& alloc)
|
||||||
|
: robin_set(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) {}
|
||||||
|
|
||||||
|
template <class InputIt>
|
||||||
|
robin_set(InputIt first, InputIt last,
|
||||||
|
size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE,
|
||||||
|
const Hash& hash = Hash(), const KeyEqual& equal = KeyEqual(),
|
||||||
|
const Allocator& alloc = Allocator())
|
||||||
|
: robin_set(bucket_count, hash, equal, alloc) {
|
||||||
|
insert(first, last);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class InputIt>
|
||||||
|
robin_set(InputIt first, InputIt last, size_type bucket_count,
|
||||||
|
const Allocator& alloc)
|
||||||
|
: robin_set(first, last, bucket_count, Hash(), KeyEqual(), alloc) {}
|
||||||
|
|
||||||
|
template <class InputIt>
|
||||||
|
robin_set(InputIt first, InputIt last, size_type bucket_count,
|
||||||
|
const Hash& hash, const Allocator& alloc)
|
||||||
|
: robin_set(first, last, bucket_count, hash, KeyEqual(), alloc) {}
|
||||||
|
|
||||||
|
robin_set(std::initializer_list<value_type> init,
|
||||||
|
size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE,
|
||||||
|
const Hash& hash = Hash(), const KeyEqual& equal = KeyEqual(),
|
||||||
|
const Allocator& alloc = Allocator())
|
||||||
|
: robin_set(init.begin(), init.end(), bucket_count, hash, equal, alloc) {}
|
||||||
|
|
||||||
|
robin_set(std::initializer_list<value_type> init, size_type bucket_count,
|
||||||
|
const Allocator& alloc)
|
||||||
|
: robin_set(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(),
|
||||||
|
alloc) {}
|
||||||
|
|
||||||
|
robin_set(std::initializer_list<value_type> init, size_type bucket_count,
|
||||||
|
const Hash& hash, const Allocator& alloc)
|
||||||
|
: robin_set(init.begin(), init.end(), bucket_count, hash, KeyEqual(),
|
||||||
|
alloc) {}
|
||||||
|
|
||||||
|
robin_set& operator=(std::initializer_list<value_type> ilist) {
|
||||||
|
m_ht.clear();
|
||||||
|
|
||||||
|
m_ht.reserve(ilist.size());
|
||||||
|
m_ht.insert(ilist.begin(), ilist.end());
|
||||||
|
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
allocator_type get_allocator() const { return m_ht.get_allocator(); }
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Iterators
|
||||||
|
*/
|
||||||
|
iterator begin() noexcept { return m_ht.begin(); }
|
||||||
|
const_iterator begin() const noexcept { return m_ht.begin(); }
|
||||||
|
const_iterator cbegin() const noexcept { return m_ht.cbegin(); }
|
||||||
|
|
||||||
|
iterator end() noexcept { return m_ht.end(); }
|
||||||
|
const_iterator end() const noexcept { return m_ht.end(); }
|
||||||
|
const_iterator cend() const noexcept { return m_ht.cend(); }
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Capacity
|
||||||
|
*/
|
||||||
|
bool empty() const noexcept { return m_ht.empty(); }
|
||||||
|
size_type size() const noexcept { return m_ht.size(); }
|
||||||
|
size_type max_size() const noexcept { return m_ht.max_size(); }
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Modifiers
|
||||||
|
*/
|
||||||
|
void clear() noexcept { m_ht.clear(); }
|
||||||
|
|
||||||
|
std::pair<iterator, bool> insert(const value_type& value) {
|
||||||
|
return m_ht.insert(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<iterator, bool> insert(value_type&& value) {
|
||||||
|
return m_ht.insert(std::move(value));
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator insert(const_iterator hint, const value_type& value) {
|
||||||
|
return m_ht.insert_hint(hint, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator insert(const_iterator hint, value_type&& value) {
|
||||||
|
return m_ht.insert_hint(hint, std::move(value));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class InputIt>
|
||||||
|
void insert(InputIt first, InputIt last) {
|
||||||
|
m_ht.insert(first, last);
|
||||||
|
}
|
||||||
|
|
||||||
|
void insert(std::initializer_list<value_type> ilist) {
|
||||||
|
m_ht.insert(ilist.begin(), ilist.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Due to the way elements are stored, emplace will need to move or copy the
|
||||||
|
* key-value once. The method is equivalent to
|
||||||
|
* insert(value_type(std::forward<Args>(args)...));
|
||||||
|
*
|
||||||
|
* Mainly here for compatibility with the std::unordered_map interface.
|
||||||
|
*/
|
||||||
|
template <class... Args>
|
||||||
|
std::pair<iterator, bool> emplace(Args&&... args) {
|
||||||
|
return m_ht.emplace(std::forward<Args>(args)...);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Due to the way elements are stored, emplace_hint will need to move or copy
|
||||||
|
* the key-value once. The method is equivalent to insert(hint,
|
||||||
|
* value_type(std::forward<Args>(args)...));
|
||||||
|
*
|
||||||
|
* Mainly here for compatibility with the std::unordered_map interface.
|
||||||
|
*/
|
||||||
|
template <class... Args>
|
||||||
|
iterator emplace_hint(const_iterator hint, Args&&... args) {
|
||||||
|
return m_ht.emplace_hint(hint, std::forward<Args>(args)...);
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator erase(iterator pos) { return m_ht.erase(pos); }
|
||||||
|
iterator erase(const_iterator pos) { return m_ht.erase(pos); }
|
||||||
|
iterator erase(const_iterator first, const_iterator last) {
|
||||||
|
return m_ht.erase(first, last);
|
||||||
|
}
|
||||||
|
size_type erase(const key_type& key) { return m_ht.erase(key); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Erase the element at position 'pos'. In contrast to the regular erase()
|
||||||
|
* function, erase_fast() does not return an iterator. This allows it to be
|
||||||
|
* faster especially in hash sets with a low load factor, where finding the
|
||||||
|
* next nonempty bucket would be costly.
|
||||||
|
*/
|
||||||
|
void erase_fast(iterator pos) { return m_ht.erase_fast(pos); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use the hash value 'precalculated_hash' instead of hashing the key. The
|
||||||
|
* hash value should be the same as hash_function()(key). Useful to speed-up
|
||||||
|
* the lookup to the value if you already have the hash.
|
||||||
|
*/
|
||||||
|
size_type erase(const key_type& key, std::size_t precalculated_hash) {
|
||||||
|
return m_ht.erase(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This overload only participates in the overload resolution if the typedef
|
||||||
|
* KeyEqual::is_transparent exists. If so, K must be hashable and comparable
|
||||||
|
* to Key.
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
size_type erase(const K& key) {
|
||||||
|
return m_ht.erase(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @copydoc erase(const K& key)
|
||||||
|
*
|
||||||
|
* Use the hash value 'precalculated_hash' instead of hashing the key. The
|
||||||
|
* hash value should be the same as hash_function()(key). Useful to speed-up
|
||||||
|
* the lookup to the value if you already have the hash.
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
size_type erase(const K& key, std::size_t precalculated_hash) {
|
||||||
|
return m_ht.erase(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
void swap(robin_set& other) { other.m_ht.swap(m_ht); }
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Lookup
|
||||||
|
*/
|
||||||
|
size_type count(const Key& key) const { return m_ht.count(key); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use the hash value 'precalculated_hash' instead of hashing the key. The
|
||||||
|
* hash value should be the same as hash_function()(key). Useful to speed-up
|
||||||
|
* the lookup if you already have the hash.
|
||||||
|
*/
|
||||||
|
size_type count(const Key& key, std::size_t precalculated_hash) const {
|
||||||
|
return m_ht.count(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This overload only participates in the overload resolution if the typedef
|
||||||
|
* KeyEqual::is_transparent exists. If so, K must be hashable and comparable
|
||||||
|
* to Key.
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
size_type count(const K& key) const {
|
||||||
|
return m_ht.count(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @copydoc count(const K& key) const
|
||||||
|
*
|
||||||
|
* Use the hash value 'precalculated_hash' instead of hashing the key. The
|
||||||
|
* hash value should be the same as hash_function()(key). Useful to speed-up
|
||||||
|
* the lookup if you already have the hash.
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
size_type count(const K& key, std::size_t precalculated_hash) const {
|
||||||
|
return m_ht.count(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator find(const Key& key) { return m_ht.find(key); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use the hash value 'precalculated_hash' instead of hashing the key. The
|
||||||
|
* hash value should be the same as hash_function()(key). Useful to speed-up
|
||||||
|
* the lookup if you already have the hash.
|
||||||
|
*/
|
||||||
|
iterator find(const Key& key, std::size_t precalculated_hash) {
|
||||||
|
return m_ht.find(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
const_iterator find(const Key& key) const { return m_ht.find(key); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @copydoc find(const Key& key, std::size_t precalculated_hash)
|
||||||
|
*/
|
||||||
|
const_iterator find(const Key& key, std::size_t precalculated_hash) const {
|
||||||
|
return m_ht.find(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This overload only participates in the overload resolution if the typedef
|
||||||
|
* KeyEqual::is_transparent exists. If so, K must be hashable and comparable
|
||||||
|
* to Key.
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
iterator find(const K& key) {
|
||||||
|
return m_ht.find(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @copydoc find(const K& key)
|
||||||
|
*
|
||||||
|
* Use the hash value 'precalculated_hash' instead of hashing the key. The
|
||||||
|
* hash value should be the same as hash_function()(key). Useful to speed-up
|
||||||
|
* the lookup if you already have the hash.
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
iterator find(const K& key, std::size_t precalculated_hash) {
|
||||||
|
return m_ht.find(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @copydoc find(const K& key)
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
const_iterator find(const K& key) const {
|
||||||
|
return m_ht.find(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @copydoc find(const K& key)
|
||||||
|
*
|
||||||
|
* Use the hash value 'precalculated_hash' instead of hashing the key. The
|
||||||
|
* hash value should be the same as hash_function()(key). Useful to speed-up
|
||||||
|
* the lookup if you already have the hash.
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
const_iterator find(const K& key, std::size_t precalculated_hash) const {
|
||||||
|
return m_ht.find(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool contains(const Key& key) const { return m_ht.contains(key); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use the hash value 'precalculated_hash' instead of hashing the key. The
|
||||||
|
* hash value should be the same as hash_function()(key). Useful to speed-up
|
||||||
|
* the lookup if you already have the hash.
|
||||||
|
*/
|
||||||
|
bool contains(const Key& key, std::size_t precalculated_hash) const {
|
||||||
|
return m_ht.contains(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This overload only participates in the overload resolution if the typedef
|
||||||
|
* KeyEqual::is_transparent exists. If so, K must be hashable and comparable
|
||||||
|
* to Key.
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
bool contains(const K& key) const {
|
||||||
|
return m_ht.contains(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @copydoc contains(const K& key) const
|
||||||
|
*
|
||||||
|
* Use the hash value 'precalculated_hash' instead of hashing the key. The
|
||||||
|
* hash value should be the same as hash_function()(key). Useful to speed-up
|
||||||
|
* the lookup if you already have the hash.
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
bool contains(const K& key, std::size_t precalculated_hash) const {
|
||||||
|
return m_ht.contains(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<iterator, iterator> equal_range(const Key& key) {
|
||||||
|
return m_ht.equal_range(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use the hash value 'precalculated_hash' instead of hashing the key. The
|
||||||
|
* hash value should be the same as hash_function()(key). Useful to speed-up
|
||||||
|
* the lookup if you already have the hash.
|
||||||
|
*/
|
||||||
|
std::pair<iterator, iterator> equal_range(const Key& key,
|
||||||
|
std::size_t precalculated_hash) {
|
||||||
|
return m_ht.equal_range(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<const_iterator, const_iterator> equal_range(const Key& key) const {
|
||||||
|
return m_ht.equal_range(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @copydoc equal_range(const Key& key, std::size_t precalculated_hash)
|
||||||
|
*/
|
||||||
|
std::pair<const_iterator, const_iterator> equal_range(
|
||||||
|
const Key& key, std::size_t precalculated_hash) const {
|
||||||
|
return m_ht.equal_range(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This overload only participates in the overload resolution if the typedef
|
||||||
|
* KeyEqual::is_transparent exists. If so, K must be hashable and comparable
|
||||||
|
* to Key.
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
std::pair<iterator, iterator> equal_range(const K& key) {
|
||||||
|
return m_ht.equal_range(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @copydoc equal_range(const K& key)
|
||||||
|
*
|
||||||
|
* Use the hash value 'precalculated_hash' instead of hashing the key. The
|
||||||
|
* hash value should be the same as hash_function()(key). Useful to speed-up
|
||||||
|
* the lookup if you already have the hash.
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
std::pair<iterator, iterator> equal_range(const K& key,
|
||||||
|
std::size_t precalculated_hash) {
|
||||||
|
return m_ht.equal_range(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @copydoc equal_range(const K& key)
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
std::pair<const_iterator, const_iterator> equal_range(const K& key) const {
|
||||||
|
return m_ht.equal_range(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @copydoc equal_range(const K& key, std::size_t precalculated_hash)
|
||||||
|
*/
|
||||||
|
template <
|
||||||
|
class K, class KE = KeyEqual,
|
||||||
|
typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr>
|
||||||
|
std::pair<const_iterator, const_iterator> equal_range(
|
||||||
|
const K& key, std::size_t precalculated_hash) const {
|
||||||
|
return m_ht.equal_range(key, precalculated_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Bucket interface
|
||||||
|
*/
|
||||||
|
size_type bucket_count() const { return m_ht.bucket_count(); }
|
||||||
|
size_type max_bucket_count() const { return m_ht.max_bucket_count(); }
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Hash policy
|
||||||
|
*/
|
||||||
|
float load_factor() const { return m_ht.load_factor(); }
|
||||||
|
|
||||||
|
float min_load_factor() const { return m_ht.min_load_factor(); }
|
||||||
|
float max_load_factor() const { return m_ht.max_load_factor(); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the `min_load_factor` to `ml`. When the `load_factor` of the set goes
|
||||||
|
* below `min_load_factor` after some erase operations, the set will be
|
||||||
|
* shrunk when an insertion occurs. The erase method itself never shrinks
|
||||||
|
* the set.
|
||||||
|
*
|
||||||
|
* The default value of `min_load_factor` is 0.0f, the set never shrinks by
|
||||||
|
* default.
|
||||||
|
*/
|
||||||
|
void min_load_factor(float ml) { m_ht.min_load_factor(ml); }
|
||||||
|
void max_load_factor(float ml) { m_ht.max_load_factor(ml); }
|
||||||
|
|
||||||
|
void rehash(size_type count_) { m_ht.rehash(count_); }
|
||||||
|
void reserve(size_type count_) { m_ht.reserve(count_); }
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Observers
|
||||||
|
*/
|
||||||
|
hasher hash_function() const { return m_ht.hash_function(); }
|
||||||
|
key_equal key_eq() const { return m_ht.key_eq(); }
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Other
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert a const_iterator to an iterator.
|
||||||
|
*/
|
||||||
|
iterator mutable_iterator(const_iterator pos) {
|
||||||
|
return m_ht.mutable_iterator(pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
friend bool operator==(const robin_set& lhs, const robin_set& rhs) {
|
||||||
|
if (lhs.size() != rhs.size()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto& element_lhs : lhs) {
|
||||||
|
const auto it_element_rhs = rhs.find(element_lhs);
|
||||||
|
if (it_element_rhs == rhs.cend()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Serialize the set through the `serializer` parameter.
|
||||||
|
*
|
||||||
|
* The `serializer` parameter must be a function object that supports the
|
||||||
|
* following call:
|
||||||
|
* - `template<typename U> void operator()(const U& value);` where the types
|
||||||
|
* `std::int16_t`, `std::uint32_t`, `std::uint64_t`, `float` and `Key` must be
|
||||||
|
* supported for U.
|
||||||
|
*
|
||||||
|
* The implementation leaves binary compatibility (endianness, IEEE 754 for
|
||||||
|
* floats, ...) of the types it serializes in the hands of the `Serializer`
|
||||||
|
* function object if compatibility is required.
|
||||||
|
*/
|
||||||
|
template <class Serializer>
|
||||||
|
void serialize(Serializer& serializer) const {
|
||||||
|
m_ht.serialize(serializer);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Deserialize a previously serialized set through the `deserializer`
|
||||||
|
* parameter.
|
||||||
|
*
|
||||||
|
* The `deserializer` parameter must be a function object that supports the
|
||||||
|
* following call:
|
||||||
|
* - `template<typename U> U operator()();` where the types `std::int16_t`,
|
||||||
|
* `std::uint32_t`, `std::uint64_t`, `float` and `Key` must be supported for
|
||||||
|
* U.
|
||||||
|
*
|
||||||
|
* If the deserialized hash set type is hash compatible with the serialized
|
||||||
|
* set, the deserialization process can be sped up by setting
|
||||||
|
* `hash_compatible` to true. To be hash compatible, the Hash, KeyEqual and
|
||||||
|
* GrowthPolicy must behave the same way than the ones used on the serialized
|
||||||
|
* set and the StoreHash must have the same value. The `std::size_t` must also
|
||||||
|
* be of the same size as the one on the platform used to serialize the set.
|
||||||
|
* If these criteria are not met, the behaviour is undefined with
|
||||||
|
* `hash_compatible` sets to true.
|
||||||
|
*
|
||||||
|
* The behaviour is undefined if the type `Key` of the `robin_set` is not the
|
||||||
|
* same as the type used during serialization.
|
||||||
|
*
|
||||||
|
* The implementation leaves binary compatibility (endianness, IEEE 754 for
|
||||||
|
* floats, size of int, ...) of the types it deserializes in the hands of the
|
||||||
|
* `Deserializer` function object if compatibility is required.
|
||||||
|
*/
|
||||||
|
template <class Deserializer>
|
||||||
|
static robin_set deserialize(Deserializer& deserializer,
|
||||||
|
bool hash_compatible = false) {
|
||||||
|
robin_set set(0);
|
||||||
|
set.m_ht.deserialize(deserializer, hash_compatible);
|
||||||
|
|
||||||
|
return set;
|
||||||
|
}
|
||||||
|
|
||||||
|
friend bool operator!=(const robin_set& lhs, const robin_set& rhs) {
|
||||||
|
return !operator==(lhs, rhs);
|
||||||
|
}
|
||||||
|
|
||||||
|
friend void swap(robin_set& lhs, robin_set& rhs) { lhs.swap(rhs); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
ht m_ht;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Same as `tsl::robin_set<Key, Hash, KeyEqual, Allocator, StoreHash,
|
||||||
|
* tsl::rh::prime_growth_policy>`.
|
||||||
|
*/
|
||||||
|
template <class Key, class Hash = std::hash<Key>,
|
||||||
|
class KeyEqual = std::equal_to<Key>,
|
||||||
|
class Allocator = std::allocator<Key>, bool StoreHash = false>
|
||||||
|
using robin_pg_set = robin_set<Key, Hash, KeyEqual, Allocator, StoreHash,
|
||||||
|
tsl::rh::prime_growth_policy>;
|
||||||
|
|
||||||
|
} // end namespace tsl
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,100 @@
|
||||||
|
// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
|
||||||
|
// Distributed under the MIT License (http://opensource.org/licenses/MIT)
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
//
|
||||||
|
// Async logging using global thread pool
|
||||||
|
// All loggers created here share same global thread pool.
|
||||||
|
// Each log message is pushed to a queue along with a shared pointer to the
|
||||||
|
// logger.
|
||||||
|
// If a logger deleted while having pending messages in the queue, it's actual
|
||||||
|
// destruction will defer
|
||||||
|
// until all its messages are processed by the thread pool.
|
||||||
|
// This is because each message in the queue holds a shared_ptr to the
|
||||||
|
// originating logger.
|
||||||
|
|
||||||
|
#include <spdlog/async_logger.h>
|
||||||
|
#include <spdlog/details/registry.h>
|
||||||
|
#include <spdlog/details/thread_pool.h>
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
|
#include <memory>
|
||||||
|
#include <mutex>
|
||||||
|
|
||||||
|
namespace spdlog {
|
||||||
|
|
||||||
|
namespace details {
|
||||||
|
static const size_t default_async_q_size = 8192;
|
||||||
|
}
|
||||||
|
|
||||||
|
// async logger factory - creates async loggers backed with thread pool.
|
||||||
|
// if a global thread pool doesn't already exist, create it with default queue
|
||||||
|
// size of 8192 items and single thread.
|
||||||
|
template <async_overflow_policy OverflowPolicy = async_overflow_policy::block>
|
||||||
|
struct async_factory_impl {
|
||||||
|
template <typename Sink, typename... SinkArgs>
|
||||||
|
static std::shared_ptr<async_logger> create(std::string logger_name, SinkArgs &&...args) {
|
||||||
|
auto ®istry_inst = details::registry::instance();
|
||||||
|
|
||||||
|
// create global thread pool if not already exists..
|
||||||
|
|
||||||
|
auto &mutex = registry_inst.tp_mutex();
|
||||||
|
std::lock_guard<std::recursive_mutex> tp_lock(mutex);
|
||||||
|
auto tp = registry_inst.get_tp();
|
||||||
|
if (tp == nullptr) {
|
||||||
|
tp = std::make_shared<details::thread_pool>(details::default_async_q_size, 1U);
|
||||||
|
registry_inst.set_tp(tp);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto sink = std::make_shared<Sink>(std::forward<SinkArgs>(args)...);
|
||||||
|
auto new_logger = std::make_shared<async_logger>(std::move(logger_name), std::move(sink),
|
||||||
|
std::move(tp), OverflowPolicy);
|
||||||
|
registry_inst.initialize_logger(new_logger);
|
||||||
|
return new_logger;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
using async_factory = async_factory_impl<async_overflow_policy::block>;
|
||||||
|
using async_factory_nonblock = async_factory_impl<async_overflow_policy::overrun_oldest>;
|
||||||
|
|
||||||
|
template <typename Sink, typename... SinkArgs>
|
||||||
|
inline std::shared_ptr<spdlog::logger> create_async(std::string logger_name,
|
||||||
|
SinkArgs &&...sink_args) {
|
||||||
|
return async_factory::create<Sink>(std::move(logger_name),
|
||||||
|
std::forward<SinkArgs>(sink_args)...);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Sink, typename... SinkArgs>
|
||||||
|
inline std::shared_ptr<spdlog::logger> create_async_nb(std::string logger_name,
|
||||||
|
SinkArgs &&...sink_args) {
|
||||||
|
return async_factory_nonblock::create<Sink>(std::move(logger_name),
|
||||||
|
std::forward<SinkArgs>(sink_args)...);
|
||||||
|
}
|
||||||
|
|
||||||
|
// set global thread pool.
|
||||||
|
inline void init_thread_pool(size_t q_size,
|
||||||
|
size_t thread_count,
|
||||||
|
std::function<void()> on_thread_start,
|
||||||
|
std::function<void()> on_thread_stop) {
|
||||||
|
auto tp = std::make_shared<details::thread_pool>(q_size, thread_count, on_thread_start,
|
||||||
|
on_thread_stop);
|
||||||
|
details::registry::instance().set_tp(std::move(tp));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void init_thread_pool(size_t q_size,
|
||||||
|
size_t thread_count,
|
||||||
|
std::function<void()> on_thread_start) {
|
||||||
|
init_thread_pool(q_size, thread_count, on_thread_start, [] {});
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void init_thread_pool(size_t q_size, size_t thread_count) {
|
||||||
|
init_thread_pool(
|
||||||
|
q_size, thread_count, [] {}, [] {});
|
||||||
|
}
|
||||||
|
|
||||||
|
// get the global thread pool.
|
||||||
|
inline std::shared_ptr<spdlog::details::thread_pool> thread_pool() {
|
||||||
|
return details::registry::instance().get_tp();
|
||||||
|
}
|
||||||
|
} // namespace spdlog
|
||||||
|
|
@ -0,0 +1,84 @@
|
||||||
|
// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
|
||||||
|
// Distributed under the MIT License (http://opensource.org/licenses/MIT)
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifndef SPDLOG_HEADER_ONLY
|
||||||
|
#include <spdlog/async_logger.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <spdlog/details/thread_pool.h>
|
||||||
|
#include <spdlog/sinks/sink.h>
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
SPDLOG_INLINE spdlog::async_logger::async_logger(std::string logger_name,
|
||||||
|
sinks_init_list sinks_list,
|
||||||
|
std::weak_ptr<details::thread_pool> tp,
|
||||||
|
async_overflow_policy overflow_policy)
|
||||||
|
: async_logger(std::move(logger_name),
|
||||||
|
sinks_list.begin(),
|
||||||
|
sinks_list.end(),
|
||||||
|
std::move(tp),
|
||||||
|
overflow_policy) {}
|
||||||
|
|
||||||
|
SPDLOG_INLINE spdlog::async_logger::async_logger(std::string logger_name,
|
||||||
|
sink_ptr single_sink,
|
||||||
|
std::weak_ptr<details::thread_pool> tp,
|
||||||
|
async_overflow_policy overflow_policy)
|
||||||
|
: async_logger(
|
||||||
|
std::move(logger_name), {std::move(single_sink)}, std::move(tp), overflow_policy) {}
|
||||||
|
|
||||||
|
// send the log message to the thread pool
|
||||||
|
SPDLOG_INLINE void spdlog::async_logger::sink_it_(const details::log_msg &msg){
|
||||||
|
SPDLOG_TRY{if (auto pool_ptr = thread_pool_.lock()){
|
||||||
|
pool_ptr->post_log(shared_from_this(), msg, overflow_policy_);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
throw_spdlog_ex("async log: thread pool doesn't exist anymore");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
SPDLOG_LOGGER_CATCH(msg.source)
|
||||||
|
}
|
||||||
|
|
||||||
|
// send flush request to the thread pool
|
||||||
|
SPDLOG_INLINE void spdlog::async_logger::flush_(){
|
||||||
|
SPDLOG_TRY{if (auto pool_ptr = thread_pool_.lock()){
|
||||||
|
pool_ptr->post_flush(shared_from_this(), overflow_policy_);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
throw_spdlog_ex("async flush: thread pool doesn't exist anymore");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
SPDLOG_LOGGER_CATCH(source_loc())
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// backend functions - called from the thread pool to do the actual job
|
||||||
|
//
|
||||||
|
SPDLOG_INLINE void spdlog::async_logger::backend_sink_it_(const details::log_msg &msg) {
|
||||||
|
for (auto &sink : sinks_) {
|
||||||
|
if (sink->should_log(msg.level)) {
|
||||||
|
SPDLOG_TRY { sink->log(msg); }
|
||||||
|
SPDLOG_LOGGER_CATCH(msg.source)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (should_flush_(msg)) {
|
||||||
|
backend_flush_();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SPDLOG_INLINE void spdlog::async_logger::backend_flush_() {
|
||||||
|
for (auto &sink : sinks_) {
|
||||||
|
SPDLOG_TRY { sink->flush(); }
|
||||||
|
SPDLOG_LOGGER_CATCH(source_loc())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SPDLOG_INLINE std::shared_ptr<spdlog::logger> spdlog::async_logger::clone(std::string new_name) {
|
||||||
|
auto cloned = std::make_shared<spdlog::async_logger>(*this);
|
||||||
|
cloned->name_ = std::move(new_name);
|
||||||
|
return cloned;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,74 @@
|
||||||
|
// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
|
||||||
|
// Distributed under the MIT License (http://opensource.org/licenses/MIT)
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
// Fast asynchronous logger.
|
||||||
|
// Uses pre allocated queue.
|
||||||
|
// Creates a single back thread to pop messages from the queue and log them.
|
||||||
|
//
|
||||||
|
// Upon each log write the logger:
|
||||||
|
// 1. Checks if its log level is enough to log the message
|
||||||
|
// 2. Push a new copy of the message to a queue (or block the caller until
|
||||||
|
// space is available in the queue)
|
||||||
|
// Upon destruction, logs all remaining messages in the queue before
|
||||||
|
// destructing..
|
||||||
|
|
||||||
|
#include <spdlog/logger.h>
|
||||||
|
|
||||||
|
namespace spdlog {
|
||||||
|
|
||||||
|
// Async overflow policy - block by default.
|
||||||
|
enum class async_overflow_policy {
|
||||||
|
block, // Block until message can be enqueued
|
||||||
|
overrun_oldest, // Discard oldest message in the queue if full when trying to
|
||||||
|
// add new item.
|
||||||
|
discard_new // Discard new message if the queue is full when trying to add new item.
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace details {
|
||||||
|
class thread_pool;
|
||||||
|
}
|
||||||
|
|
||||||
|
class SPDLOG_API async_logger final : public std::enable_shared_from_this<async_logger>,
|
||||||
|
public logger {
|
||||||
|
friend class details::thread_pool;
|
||||||
|
|
||||||
|
public:
|
||||||
|
template <typename It>
|
||||||
|
async_logger(std::string logger_name,
|
||||||
|
It begin,
|
||||||
|
It end,
|
||||||
|
std::weak_ptr<details::thread_pool> tp,
|
||||||
|
async_overflow_policy overflow_policy = async_overflow_policy::block)
|
||||||
|
: logger(std::move(logger_name), begin, end),
|
||||||
|
thread_pool_(std::move(tp)),
|
||||||
|
overflow_policy_(overflow_policy) {}
|
||||||
|
|
||||||
|
async_logger(std::string logger_name,
|
||||||
|
sinks_init_list sinks_list,
|
||||||
|
std::weak_ptr<details::thread_pool> tp,
|
||||||
|
async_overflow_policy overflow_policy = async_overflow_policy::block);
|
||||||
|
|
||||||
|
async_logger(std::string logger_name,
|
||||||
|
sink_ptr single_sink,
|
||||||
|
std::weak_ptr<details::thread_pool> tp,
|
||||||
|
async_overflow_policy overflow_policy = async_overflow_policy::block);
|
||||||
|
|
||||||
|
std::shared_ptr<logger> clone(std::string new_name) override;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void sink_it_(const details::log_msg &msg) override;
|
||||||
|
void flush_() override;
|
||||||
|
void backend_sink_it_(const details::log_msg &incoming_log_msg);
|
||||||
|
void backend_flush_();
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::weak_ptr<details::thread_pool> thread_pool_;
|
||||||
|
async_overflow_policy overflow_policy_;
|
||||||
|
};
|
||||||
|
} // namespace spdlog
|
||||||
|
|
||||||
|
#ifdef SPDLOG_HEADER_ONLY
|
||||||
|
#include "async_logger-inl.h"
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,40 @@
|
||||||
|
// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
|
||||||
|
// Distributed under the MIT License (http://opensource.org/licenses/MIT)
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#include <spdlog/cfg/helpers.h>
|
||||||
|
#include <spdlog/details/registry.h>
|
||||||
|
|
||||||
|
//
|
||||||
|
// Init log levels using each argv entry that starts with "SPDLOG_LEVEL="
|
||||||
|
//
|
||||||
|
// set all loggers to debug level:
|
||||||
|
// example.exe "SPDLOG_LEVEL=debug"
|
||||||
|
|
||||||
|
// set logger1 to trace level
|
||||||
|
// example.exe "SPDLOG_LEVEL=logger1=trace"
|
||||||
|
|
||||||
|
// turn off all logging except for logger1 and logger2:
|
||||||
|
// example.exe "SPDLOG_LEVEL=off,logger1=debug,logger2=info"
|
||||||
|
|
||||||
|
namespace spdlog {
|
||||||
|
namespace cfg {
|
||||||
|
|
||||||
|
// search for SPDLOG_LEVEL= in the args and use it to init the levels
|
||||||
|
inline void load_argv_levels(int argc, const char **argv) {
|
||||||
|
const std::string spdlog_level_prefix = "SPDLOG_LEVEL=";
|
||||||
|
for (int i = 1; i < argc; i++) {
|
||||||
|
std::string arg = argv[i];
|
||||||
|
if (arg.find(spdlog_level_prefix) == 0) {
|
||||||
|
auto levels_string = arg.substr(spdlog_level_prefix.size());
|
||||||
|
helpers::load_levels(levels_string);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void load_argv_levels(int argc, char **argv) {
|
||||||
|
load_argv_levels(argc, const_cast<const char **>(argv));
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace cfg
|
||||||
|
} // namespace spdlog
|
||||||
|
|
@ -0,0 +1,36 @@
|
||||||
|
// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
|
||||||
|
// Distributed under the MIT License (http://opensource.org/licenses/MIT)
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#include <spdlog/cfg/helpers.h>
|
||||||
|
#include <spdlog/details/os.h>
|
||||||
|
#include <spdlog/details/registry.h>
|
||||||
|
|
||||||
|
//
|
||||||
|
// Init levels and patterns from env variables SPDLOG_LEVEL
|
||||||
|
// Inspired from Rust's "env_logger" crate (https://crates.io/crates/env_logger).
|
||||||
|
// Note - fallback to "info" level on unrecognized levels
|
||||||
|
//
|
||||||
|
// Examples:
|
||||||
|
//
|
||||||
|
// set global level to debug:
|
||||||
|
// export SPDLOG_LEVEL=debug
|
||||||
|
//
|
||||||
|
// turn off all logging except for logger1:
|
||||||
|
// export SPDLOG_LEVEL="*=off,logger1=debug"
|
||||||
|
//
|
||||||
|
|
||||||
|
// turn off all logging except for logger1 and logger2:
|
||||||
|
// export SPDLOG_LEVEL="off,logger1=debug,logger2=info"
|
||||||
|
|
||||||
|
namespace spdlog {
|
||||||
|
namespace cfg {
|
||||||
|
inline void load_env_levels() {
|
||||||
|
auto env_val = details::os::getenv("SPDLOG_LEVEL");
|
||||||
|
if (!env_val.empty()) {
|
||||||
|
helpers::load_levels(env_val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace cfg
|
||||||
|
} // namespace spdlog
|
||||||
|
|
@ -0,0 +1,107 @@
|
||||||
|
// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
|
||||||
|
// Distributed under the MIT License (http://opensource.org/licenses/MIT)
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifndef SPDLOG_HEADER_ONLY
|
||||||
|
#include <spdlog/cfg/helpers.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <spdlog/details/os.h>
|
||||||
|
#include <spdlog/details/registry.h>
|
||||||
|
#include <spdlog/spdlog.h>
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
namespace spdlog {
|
||||||
|
namespace cfg {
|
||||||
|
namespace helpers {
|
||||||
|
|
||||||
|
// inplace convert to lowercase
|
||||||
|
inline std::string &to_lower_(std::string &str) {
|
||||||
|
std::transform(str.begin(), str.end(), str.begin(), [](char ch) {
|
||||||
|
return static_cast<char>((ch >= 'A' && ch <= 'Z') ? ch + ('a' - 'A') : ch);
|
||||||
|
});
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
// inplace trim spaces
|
||||||
|
inline std::string &trim_(std::string &str) {
|
||||||
|
const char *spaces = " \n\r\t";
|
||||||
|
str.erase(str.find_last_not_of(spaces) + 1);
|
||||||
|
str.erase(0, str.find_first_not_of(spaces));
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
// return (name,value) trimmed pair from given "name=value" string.
|
||||||
|
// return empty string on missing parts
|
||||||
|
// "key=val" => ("key", "val")
|
||||||
|
// " key = val " => ("key", "val")
|
||||||
|
// "key=" => ("key", "")
|
||||||
|
// "val" => ("", "val")
|
||||||
|
|
||||||
|
inline std::pair<std::string, std::string> extract_kv_(char sep, const std::string &str) {
|
||||||
|
auto n = str.find(sep);
|
||||||
|
std::string k, v;
|
||||||
|
if (n == std::string::npos) {
|
||||||
|
v = str;
|
||||||
|
} else {
|
||||||
|
k = str.substr(0, n);
|
||||||
|
v = str.substr(n + 1);
|
||||||
|
}
|
||||||
|
return std::make_pair(trim_(k), trim_(v));
|
||||||
|
}
|
||||||
|
|
||||||
|
// return vector of key/value pairs from sequence of "K1=V1,K2=V2,.."
|
||||||
|
// "a=AAA,b=BBB,c=CCC,.." => {("a","AAA"),("b","BBB"),("c", "CCC"),...}
|
||||||
|
inline std::unordered_map<std::string, std::string> extract_key_vals_(const std::string &str) {
|
||||||
|
std::string token;
|
||||||
|
std::istringstream token_stream(str);
|
||||||
|
std::unordered_map<std::string, std::string> rv{};
|
||||||
|
while (std::getline(token_stream, token, ',')) {
|
||||||
|
if (token.empty()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto kv = extract_kv_('=', token);
|
||||||
|
rv[kv.first] = kv.second;
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
|
SPDLOG_INLINE void load_levels(const std::string &input) {
|
||||||
|
if (input.empty() || input.size() > 512) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto key_vals = extract_key_vals_(input);
|
||||||
|
std::unordered_map<std::string, level::level_enum> levels;
|
||||||
|
level::level_enum global_level = level::info;
|
||||||
|
bool global_level_found = false;
|
||||||
|
|
||||||
|
for (auto &name_level : key_vals) {
|
||||||
|
auto &logger_name = name_level.first;
|
||||||
|
auto level_name = to_lower_(name_level.second);
|
||||||
|
auto level = level::from_str(level_name);
|
||||||
|
// ignore unrecognized level names
|
||||||
|
if (level == level::off && level_name != "off") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (logger_name.empty()) // no logger name indicate global level
|
||||||
|
{
|
||||||
|
global_level_found = true;
|
||||||
|
global_level = level;
|
||||||
|
} else {
|
||||||
|
levels[logger_name] = level;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
details::registry::instance().set_levels(std::move(levels),
|
||||||
|
global_level_found ? &global_level : nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace helpers
|
||||||
|
} // namespace cfg
|
||||||
|
} // namespace spdlog
|
||||||
|
|
@ -0,0 +1,29 @@
|
||||||
|
// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
|
||||||
|
// Distributed under the MIT License (http://opensource.org/licenses/MIT)
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <spdlog/common.h>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
namespace spdlog {
|
||||||
|
namespace cfg {
|
||||||
|
namespace helpers {
|
||||||
|
//
|
||||||
|
// Init levels from given string
|
||||||
|
//
|
||||||
|
// Examples:
|
||||||
|
//
|
||||||
|
// set global level to debug: "debug"
|
||||||
|
// turn off all logging except for logger1: "off,logger1=debug"
|
||||||
|
// turn off all logging except for logger1 and logger2: "off,logger1=debug,logger2=info"
|
||||||
|
//
|
||||||
|
SPDLOG_API void load_levels(const std::string &txt);
|
||||||
|
} // namespace helpers
|
||||||
|
|
||||||
|
} // namespace cfg
|
||||||
|
} // namespace spdlog
|
||||||
|
|
||||||
|
#ifdef SPDLOG_HEADER_ONLY
|
||||||
|
#include "helpers-inl.h"
|
||||||
|
#endif // SPDLOG_HEADER_ONLY
|
||||||
|
|
@ -0,0 +1,68 @@
|
||||||
|
// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
|
||||||
|
// Distributed under the MIT License (http://opensource.org/licenses/MIT)
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifndef SPDLOG_HEADER_ONLY
|
||||||
|
#include <spdlog/common.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <iterator>
|
||||||
|
|
||||||
|
namespace spdlog {
|
||||||
|
namespace level {
|
||||||
|
|
||||||
|
#if __cplusplus >= 201703L
|
||||||
|
constexpr
|
||||||
|
#endif
|
||||||
|
static string_view_t level_string_views[] SPDLOG_LEVEL_NAMES;
|
||||||
|
|
||||||
|
static const char *short_level_names[] SPDLOG_SHORT_LEVEL_NAMES;
|
||||||
|
|
||||||
|
SPDLOG_INLINE const string_view_t &to_string_view(spdlog::level::level_enum l) SPDLOG_NOEXCEPT {
|
||||||
|
return level_string_views[l];
|
||||||
|
}
|
||||||
|
|
||||||
|
SPDLOG_INLINE const char *to_short_c_str(spdlog::level::level_enum l) SPDLOG_NOEXCEPT {
|
||||||
|
return short_level_names[l];
|
||||||
|
}
|
||||||
|
|
||||||
|
SPDLOG_INLINE spdlog::level::level_enum from_str(const std::string &name) SPDLOG_NOEXCEPT {
|
||||||
|
auto it = std::find(std::begin(level_string_views), std::end(level_string_views), name);
|
||||||
|
if (it != std::end(level_string_views))
|
||||||
|
return static_cast<level::level_enum>(std::distance(std::begin(level_string_views), it));
|
||||||
|
|
||||||
|
// check also for "warn" and "err" before giving up..
|
||||||
|
if (name == "warn") {
|
||||||
|
return level::warn;
|
||||||
|
}
|
||||||
|
if (name == "err") {
|
||||||
|
return level::err;
|
||||||
|
}
|
||||||
|
return level::off;
|
||||||
|
}
|
||||||
|
} // namespace level
|
||||||
|
|
||||||
|
SPDLOG_INLINE spdlog_ex::spdlog_ex(std::string msg)
|
||||||
|
: msg_(std::move(msg)) {}
|
||||||
|
|
||||||
|
SPDLOG_INLINE spdlog_ex::spdlog_ex(const std::string &msg, int last_errno) {
|
||||||
|
#ifdef SPDLOG_USE_STD_FORMAT
|
||||||
|
msg_ = std::system_error(std::error_code(last_errno, std::generic_category()), msg).what();
|
||||||
|
#else
|
||||||
|
memory_buf_t outbuf;
|
||||||
|
fmt::format_system_error(outbuf, last_errno, msg.c_str());
|
||||||
|
msg_ = fmt::to_string(outbuf);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
SPDLOG_INLINE const char *spdlog_ex::what() const SPDLOG_NOEXCEPT { return msg_.c_str(); }
|
||||||
|
|
||||||
|
SPDLOG_INLINE void throw_spdlog_ex(const std::string &msg, int last_errno) {
|
||||||
|
SPDLOG_THROW(spdlog_ex(msg, last_errno));
|
||||||
|
}
|
||||||
|
|
||||||
|
SPDLOG_INLINE void throw_spdlog_ex(std::string msg) { SPDLOG_THROW(spdlog_ex(std::move(msg))); }
|
||||||
|
|
||||||
|
} // namespace spdlog
|
||||||
|
|
@ -0,0 +1,411 @@
|
||||||
|
// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
|
||||||
|
// Distributed under the MIT License (http://opensource.org/licenses/MIT)
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <spdlog/details/null_mutex.h>
|
||||||
|
#include <spdlog/tweakme.h>
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
|
#include <chrono>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <exception>
|
||||||
|
#include <functional>
|
||||||
|
#include <initializer_list>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <type_traits>
|
||||||
|
|
||||||
|
#ifdef SPDLOG_USE_STD_FORMAT
|
||||||
|
#include <version>
|
||||||
|
#if __cpp_lib_format >= 202207L
|
||||||
|
#include <format>
|
||||||
|
#else
|
||||||
|
#include <string_view>
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef SPDLOG_COMPILED_LIB
|
||||||
|
#undef SPDLOG_HEADER_ONLY
|
||||||
|
#if defined(SPDLOG_SHARED_LIB)
|
||||||
|
#if defined(_WIN32)
|
||||||
|
#ifdef spdlog_EXPORTS
|
||||||
|
#define SPDLOG_API __declspec(dllexport)
|
||||||
|
#else // !spdlog_EXPORTS
|
||||||
|
#define SPDLOG_API __declspec(dllimport)
|
||||||
|
#endif
|
||||||
|
#else // !defined(_WIN32)
|
||||||
|
#define SPDLOG_API __attribute__((visibility("default")))
|
||||||
|
#endif
|
||||||
|
#else // !defined(SPDLOG_SHARED_LIB)
|
||||||
|
#define SPDLOG_API
|
||||||
|
#endif
|
||||||
|
#define SPDLOG_INLINE
|
||||||
|
#else // !defined(SPDLOG_COMPILED_LIB)
|
||||||
|
#define SPDLOG_API
|
||||||
|
#define SPDLOG_HEADER_ONLY
|
||||||
|
#define SPDLOG_INLINE inline
|
||||||
|
#endif // #ifdef SPDLOG_COMPILED_LIB
|
||||||
|
|
||||||
|
#include <spdlog/fmt/fmt.h>
|
||||||
|
|
||||||
|
#if !defined(SPDLOG_USE_STD_FORMAT) && \
|
||||||
|
FMT_VERSION >= 80000 // backward compatibility with fmt versions older than 8
|
||||||
|
#define SPDLOG_FMT_RUNTIME(format_string) fmt::runtime(format_string)
|
||||||
|
#define SPDLOG_FMT_STRING(format_string) FMT_STRING(format_string)
|
||||||
|
#if defined(SPDLOG_WCHAR_FILENAMES) || defined(SPDLOG_WCHAR_TO_UTF8_SUPPORT)
|
||||||
|
#include <spdlog/fmt/xchar.h>
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#define SPDLOG_FMT_RUNTIME(format_string) format_string
|
||||||
|
#define SPDLOG_FMT_STRING(format_string) format_string
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// visual studio up to 2013 does not support noexcept nor constexpr
|
||||||
|
#if defined(_MSC_VER) && (_MSC_VER < 1900)
|
||||||
|
#define SPDLOG_NOEXCEPT _NOEXCEPT
|
||||||
|
#define SPDLOG_CONSTEXPR
|
||||||
|
#else
|
||||||
|
#define SPDLOG_NOEXCEPT noexcept
|
||||||
|
#define SPDLOG_CONSTEXPR constexpr
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// If building with std::format, can just use constexpr, otherwise if building with fmt
|
||||||
|
// SPDLOG_CONSTEXPR_FUNC needs to be set the same as FMT_CONSTEXPR to avoid situations where
|
||||||
|
// a constexpr function in spdlog could end up calling a non-constexpr function in fmt
|
||||||
|
// depending on the compiler
|
||||||
|
// If fmt determines it can't use constexpr, we should inline the function instead
|
||||||
|
#ifdef SPDLOG_USE_STD_FORMAT
|
||||||
|
#define SPDLOG_CONSTEXPR_FUNC constexpr
|
||||||
|
#else // Being built with fmt
|
||||||
|
#if FMT_USE_CONSTEXPR
|
||||||
|
#define SPDLOG_CONSTEXPR_FUNC FMT_CONSTEXPR
|
||||||
|
#else
|
||||||
|
#define SPDLOG_CONSTEXPR_FUNC inline
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__GNUC__) || defined(__clang__)
|
||||||
|
#define SPDLOG_DEPRECATED __attribute__((deprecated))
|
||||||
|
#elif defined(_MSC_VER)
|
||||||
|
#define SPDLOG_DEPRECATED __declspec(deprecated)
|
||||||
|
#else
|
||||||
|
#define SPDLOG_DEPRECATED
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// disable thread local on msvc 2013
|
||||||
|
#ifndef SPDLOG_NO_TLS
|
||||||
|
#if (defined(_MSC_VER) && (_MSC_VER < 1900)) || defined(__cplusplus_winrt)
|
||||||
|
#define SPDLOG_NO_TLS 1
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef SPDLOG_FUNCTION
|
||||||
|
#define SPDLOG_FUNCTION static_cast<const char *>(__FUNCTION__)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef SPDLOG_NO_EXCEPTIONS
|
||||||
|
#define SPDLOG_TRY
|
||||||
|
#define SPDLOG_THROW(ex) \
|
||||||
|
do { \
|
||||||
|
printf("spdlog fatal error: %s\n", ex.what()); \
|
||||||
|
std::abort(); \
|
||||||
|
} while (0)
|
||||||
|
#define SPDLOG_CATCH_STD
|
||||||
|
#else
|
||||||
|
#define SPDLOG_TRY try
|
||||||
|
#define SPDLOG_THROW(ex) throw(ex)
|
||||||
|
#define SPDLOG_CATCH_STD \
|
||||||
|
catch (const std::exception &) { \
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace spdlog {
|
||||||
|
|
||||||
|
class formatter;
|
||||||
|
|
||||||
|
namespace sinks {
|
||||||
|
class sink;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(_WIN32) && defined(SPDLOG_WCHAR_FILENAMES)
|
||||||
|
using filename_t = std::wstring;
|
||||||
|
// allow macro expansion to occur in SPDLOG_FILENAME_T
|
||||||
|
#define SPDLOG_FILENAME_T_INNER(s) L##s
|
||||||
|
#define SPDLOG_FILENAME_T(s) SPDLOG_FILENAME_T_INNER(s)
|
||||||
|
#else
|
||||||
|
using filename_t = std::string;
|
||||||
|
#define SPDLOG_FILENAME_T(s) s
|
||||||
|
#endif
|
||||||
|
|
||||||
|
using log_clock = std::chrono::system_clock;
|
||||||
|
using sink_ptr = std::shared_ptr<sinks::sink>;
|
||||||
|
using sinks_init_list = std::initializer_list<sink_ptr>;
|
||||||
|
using err_handler = std::function<void(const std::string &err_msg)>;
|
||||||
|
#ifdef SPDLOG_USE_STD_FORMAT
|
||||||
|
namespace fmt_lib = std;
|
||||||
|
|
||||||
|
using string_view_t = std::string_view;
|
||||||
|
using memory_buf_t = std::string;
|
||||||
|
|
||||||
|
template <typename... Args>
|
||||||
|
#if __cpp_lib_format >= 202207L
|
||||||
|
using format_string_t = std::format_string<Args...>;
|
||||||
|
#else
|
||||||
|
using format_string_t = std::string_view;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template <class T, class Char = char>
|
||||||
|
struct is_convertible_to_basic_format_string
|
||||||
|
: std::integral_constant<bool, std::is_convertible<T, std::basic_string_view<Char>>::value> {};
|
||||||
|
|
||||||
|
#if defined(SPDLOG_WCHAR_FILENAMES) || defined(SPDLOG_WCHAR_TO_UTF8_SUPPORT)
|
||||||
|
using wstring_view_t = std::wstring_view;
|
||||||
|
using wmemory_buf_t = std::wstring;
|
||||||
|
|
||||||
|
template <typename... Args>
|
||||||
|
#if __cpp_lib_format >= 202207L
|
||||||
|
using wformat_string_t = std::wformat_string<Args...>;
|
||||||
|
#else
|
||||||
|
using wformat_string_t = std::wstring_view;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#define SPDLOG_BUF_TO_STRING(x) x
|
||||||
|
#else // use fmt lib instead of std::format
|
||||||
|
namespace fmt_lib = fmt;
|
||||||
|
|
||||||
|
using string_view_t = fmt::basic_string_view<char>;
|
||||||
|
using memory_buf_t = fmt::basic_memory_buffer<char, 250>;
|
||||||
|
|
||||||
|
template <typename... Args>
|
||||||
|
using format_string_t = fmt::format_string<Args...>;
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
using remove_cvref_t = typename std::remove_cv<typename std::remove_reference<T>::type>::type;
|
||||||
|
|
||||||
|
template <typename Char>
|
||||||
|
#if FMT_VERSION >= 90101
|
||||||
|
using fmt_runtime_string = fmt::runtime_format_string<Char>;
|
||||||
|
#else
|
||||||
|
using fmt_runtime_string = fmt::basic_runtime<Char>;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// clang doesn't like SFINAE disabled constructor in std::is_convertible<> so have to repeat the
|
||||||
|
// condition from basic_format_string here, in addition, fmt::basic_runtime<Char> is only
|
||||||
|
// convertible to basic_format_string<Char> but not basic_string_view<Char>
|
||||||
|
template <class T, class Char = char>
|
||||||
|
struct is_convertible_to_basic_format_string
|
||||||
|
: std::integral_constant<bool,
|
||||||
|
std::is_convertible<T, fmt::basic_string_view<Char>>::value ||
|
||||||
|
std::is_same<remove_cvref_t<T>, fmt_runtime_string<Char>>::value> {
|
||||||
|
};
|
||||||
|
|
||||||
|
#if defined(SPDLOG_WCHAR_FILENAMES) || defined(SPDLOG_WCHAR_TO_UTF8_SUPPORT)
|
||||||
|
using wstring_view_t = fmt::basic_string_view<wchar_t>;
|
||||||
|
using wmemory_buf_t = fmt::basic_memory_buffer<wchar_t, 250>;
|
||||||
|
|
||||||
|
template <typename... Args>
|
||||||
|
using wformat_string_t = fmt::wformat_string<Args...>;
|
||||||
|
#endif
|
||||||
|
#define SPDLOG_BUF_TO_STRING(x) fmt::to_string(x)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef SPDLOG_WCHAR_TO_UTF8_SUPPORT
|
||||||
|
#ifndef _WIN32
|
||||||
|
#error SPDLOG_WCHAR_TO_UTF8_SUPPORT only supported on windows
|
||||||
|
#endif // _WIN32
|
||||||
|
#endif // SPDLOG_WCHAR_TO_UTF8_SUPPORT
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
struct is_convertible_to_any_format_string
|
||||||
|
: std::integral_constant<bool,
|
||||||
|
is_convertible_to_basic_format_string<T, char>::value ||
|
||||||
|
is_convertible_to_basic_format_string<T, wchar_t>::value> {};
|
||||||
|
|
||||||
|
#if defined(SPDLOG_NO_ATOMIC_LEVELS)
|
||||||
|
using level_t = details::null_atomic_int;
|
||||||
|
#else
|
||||||
|
using level_t = std::atomic<int>;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define SPDLOG_LEVEL_TRACE 0
|
||||||
|
#define SPDLOG_LEVEL_DEBUG 1
|
||||||
|
#define SPDLOG_LEVEL_INFO 2
|
||||||
|
#define SPDLOG_LEVEL_WARN 3
|
||||||
|
#define SPDLOG_LEVEL_ERROR 4
|
||||||
|
#define SPDLOG_LEVEL_CRITICAL 5
|
||||||
|
#define SPDLOG_LEVEL_OFF 6
|
||||||
|
|
||||||
|
#if !defined(SPDLOG_ACTIVE_LEVEL)
|
||||||
|
#define SPDLOG_ACTIVE_LEVEL SPDLOG_LEVEL_INFO
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Log level enum
|
||||||
|
namespace level {
|
||||||
|
enum level_enum : int {
|
||||||
|
trace = SPDLOG_LEVEL_TRACE,
|
||||||
|
debug = SPDLOG_LEVEL_DEBUG,
|
||||||
|
info = SPDLOG_LEVEL_INFO,
|
||||||
|
warn = SPDLOG_LEVEL_WARN,
|
||||||
|
err = SPDLOG_LEVEL_ERROR,
|
||||||
|
critical = SPDLOG_LEVEL_CRITICAL,
|
||||||
|
off = SPDLOG_LEVEL_OFF,
|
||||||
|
n_levels
|
||||||
|
};
|
||||||
|
|
||||||
|
#define SPDLOG_LEVEL_NAME_TRACE spdlog::string_view_t("trace", 5)
|
||||||
|
#define SPDLOG_LEVEL_NAME_DEBUG spdlog::string_view_t("debug", 5)
|
||||||
|
#define SPDLOG_LEVEL_NAME_INFO spdlog::string_view_t("info", 4)
|
||||||
|
#define SPDLOG_LEVEL_NAME_WARNING spdlog::string_view_t("warning", 7)
|
||||||
|
#define SPDLOG_LEVEL_NAME_ERROR spdlog::string_view_t("error", 5)
|
||||||
|
#define SPDLOG_LEVEL_NAME_CRITICAL spdlog::string_view_t("critical", 8)
|
||||||
|
#define SPDLOG_LEVEL_NAME_OFF spdlog::string_view_t("off", 3)
|
||||||
|
|
||||||
|
#if !defined(SPDLOG_LEVEL_NAMES)
|
||||||
|
#define SPDLOG_LEVEL_NAMES \
|
||||||
|
{ \
|
||||||
|
SPDLOG_LEVEL_NAME_TRACE, SPDLOG_LEVEL_NAME_DEBUG, SPDLOG_LEVEL_NAME_INFO, \
|
||||||
|
SPDLOG_LEVEL_NAME_WARNING, SPDLOG_LEVEL_NAME_ERROR, SPDLOG_LEVEL_NAME_CRITICAL, \
|
||||||
|
SPDLOG_LEVEL_NAME_OFF \
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(SPDLOG_SHORT_LEVEL_NAMES)
|
||||||
|
|
||||||
|
#define SPDLOG_SHORT_LEVEL_NAMES \
|
||||||
|
{ "T", "D", "I", "W", "E", "C", "O" }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
SPDLOG_API const string_view_t &to_string_view(spdlog::level::level_enum l) SPDLOG_NOEXCEPT;
|
||||||
|
SPDLOG_API const char *to_short_c_str(spdlog::level::level_enum l) SPDLOG_NOEXCEPT;
|
||||||
|
SPDLOG_API spdlog::level::level_enum from_str(const std::string &name) SPDLOG_NOEXCEPT;
|
||||||
|
|
||||||
|
} // namespace level
|
||||||
|
|
||||||
|
//
|
||||||
|
// Color mode used by sinks with color support.
|
||||||
|
//
|
||||||
|
enum class color_mode { always, automatic, never };
|
||||||
|
|
||||||
|
//
|
||||||
|
// Pattern time - specific time getting to use for pattern_formatter.
|
||||||
|
// local time by default
|
||||||
|
//
|
||||||
|
enum class pattern_time_type {
|
||||||
|
local, // log localtime
|
||||||
|
utc // log utc
|
||||||
|
};
|
||||||
|
|
||||||
|
//
|
||||||
|
// Log exception
|
||||||
|
//
|
||||||
|
class SPDLOG_API spdlog_ex : public std::exception {
|
||||||
|
public:
|
||||||
|
explicit spdlog_ex(std::string msg);
|
||||||
|
spdlog_ex(const std::string &msg, int last_errno);
|
||||||
|
const char *what() const SPDLOG_NOEXCEPT override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::string msg_;
|
||||||
|
};
|
||||||
|
|
||||||
|
[[noreturn]] SPDLOG_API void throw_spdlog_ex(const std::string &msg, int last_errno);
|
||||||
|
[[noreturn]] SPDLOG_API void throw_spdlog_ex(std::string msg);
|
||||||
|
|
||||||
|
struct source_loc {
|
||||||
|
SPDLOG_CONSTEXPR source_loc() = default;
|
||||||
|
SPDLOG_CONSTEXPR source_loc(const char *filename_in, int line_in, const char *funcname_in)
|
||||||
|
: filename{filename_in},
|
||||||
|
line{line_in},
|
||||||
|
funcname{funcname_in} {}
|
||||||
|
|
||||||
|
SPDLOG_CONSTEXPR bool empty() const SPDLOG_NOEXCEPT { return line <= 0; }
|
||||||
|
const char *filename{nullptr};
|
||||||
|
int line{0};
|
||||||
|
const char *funcname{nullptr};
|
||||||
|
};
|
||||||
|
|
||||||
|
struct file_event_handlers {
|
||||||
|
file_event_handlers()
|
||||||
|
: before_open(nullptr),
|
||||||
|
after_open(nullptr),
|
||||||
|
before_close(nullptr),
|
||||||
|
after_close(nullptr) {}
|
||||||
|
|
||||||
|
std::function<void(const filename_t &filename)> before_open;
|
||||||
|
std::function<void(const filename_t &filename, std::FILE *file_stream)> after_open;
|
||||||
|
std::function<void(const filename_t &filename, std::FILE *file_stream)> before_close;
|
||||||
|
std::function<void(const filename_t &filename)> after_close;
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace details {
|
||||||
|
|
||||||
|
// to_string_view
|
||||||
|
|
||||||
|
SPDLOG_CONSTEXPR_FUNC spdlog::string_view_t to_string_view(const memory_buf_t &buf)
|
||||||
|
SPDLOG_NOEXCEPT {
|
||||||
|
return spdlog::string_view_t{buf.data(), buf.size()};
|
||||||
|
}
|
||||||
|
|
||||||
|
SPDLOG_CONSTEXPR_FUNC spdlog::string_view_t to_string_view(spdlog::string_view_t str)
|
||||||
|
SPDLOG_NOEXCEPT {
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(SPDLOG_WCHAR_FILENAMES) || defined(SPDLOG_WCHAR_TO_UTF8_SUPPORT)
|
||||||
|
SPDLOG_CONSTEXPR_FUNC spdlog::wstring_view_t to_string_view(const wmemory_buf_t &buf)
|
||||||
|
SPDLOG_NOEXCEPT {
|
||||||
|
return spdlog::wstring_view_t{buf.data(), buf.size()};
|
||||||
|
}
|
||||||
|
|
||||||
|
SPDLOG_CONSTEXPR_FUNC spdlog::wstring_view_t to_string_view(spdlog::wstring_view_t str)
|
||||||
|
SPDLOG_NOEXCEPT {
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef SPDLOG_USE_STD_FORMAT
|
||||||
|
template <typename T, typename... Args>
|
||||||
|
inline fmt::basic_string_view<T> to_string_view(fmt::basic_format_string<T, Args...> fmt) {
|
||||||
|
return fmt;
|
||||||
|
}
|
||||||
|
#elif __cpp_lib_format >= 202207L
|
||||||
|
template <typename T, typename... Args>
|
||||||
|
SPDLOG_CONSTEXPR_FUNC std::basic_string_view<T> to_string_view(
|
||||||
|
std::basic_format_string<T, Args...> fmt) SPDLOG_NOEXCEPT {
|
||||||
|
return fmt.get();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// make_unique support for pre c++14
|
||||||
|
#if __cplusplus >= 201402L // C++14 and beyond
|
||||||
|
using std::enable_if_t;
|
||||||
|
using std::make_unique;
|
||||||
|
#else
|
||||||
|
template <bool B, class T = void>
|
||||||
|
using enable_if_t = typename std::enable_if<B, T>::type;
|
||||||
|
|
||||||
|
template <typename T, typename... Args>
|
||||||
|
std::unique_ptr<T> make_unique(Args &&...args) {
|
||||||
|
static_assert(!std::is_array<T>::value, "arrays not supported");
|
||||||
|
return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// to avoid useless casts (see https://github.com/nlohmann/json/issues/2893#issuecomment-889152324)
|
||||||
|
template <typename T, typename U, enable_if_t<!std::is_same<T, U>::value, int> = 0>
|
||||||
|
constexpr T conditional_static_cast(U value) {
|
||||||
|
return static_cast<T>(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, typename U, enable_if_t<std::is_same<T, U>::value, int> = 0>
|
||||||
|
constexpr T conditional_static_cast(U value) {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace details
|
||||||
|
} // namespace spdlog
|
||||||
|
|
||||||
|
#ifdef SPDLOG_HEADER_ONLY
|
||||||
|
#include "common-inl.h"
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,63 @@
|
||||||
|
// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
|
||||||
|
// Distributed under the MIT License (http://opensource.org/licenses/MIT)
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifndef SPDLOG_HEADER_ONLY
|
||||||
|
#include <spdlog/details/backtracer.h>
|
||||||
|
#endif
|
||||||
|
namespace spdlog {
|
||||||
|
namespace details {
|
||||||
|
SPDLOG_INLINE backtracer::backtracer(const backtracer &other) {
|
||||||
|
std::lock_guard<std::mutex> lock(other.mutex_);
|
||||||
|
enabled_ = other.enabled();
|
||||||
|
messages_ = other.messages_;
|
||||||
|
}
|
||||||
|
|
||||||
|
SPDLOG_INLINE backtracer::backtracer(backtracer &&other) SPDLOG_NOEXCEPT {
|
||||||
|
std::lock_guard<std::mutex> lock(other.mutex_);
|
||||||
|
enabled_ = other.enabled();
|
||||||
|
messages_ = std::move(other.messages_);
|
||||||
|
}
|
||||||
|
|
||||||
|
SPDLOG_INLINE backtracer &backtracer::operator=(backtracer other) {
|
||||||
|
std::lock_guard<std::mutex> lock(mutex_);
|
||||||
|
enabled_ = other.enabled();
|
||||||
|
messages_ = std::move(other.messages_);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
SPDLOG_INLINE void backtracer::enable(size_t size) {
|
||||||
|
std::lock_guard<std::mutex> lock{mutex_};
|
||||||
|
enabled_.store(true, std::memory_order_relaxed);
|
||||||
|
messages_ = circular_q<log_msg_buffer>{size};
|
||||||
|
}
|
||||||
|
|
||||||
|
SPDLOG_INLINE void backtracer::disable() {
|
||||||
|
std::lock_guard<std::mutex> lock{mutex_};
|
||||||
|
enabled_.store(false, std::memory_order_relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
|
SPDLOG_INLINE bool backtracer::enabled() const { return enabled_.load(std::memory_order_relaxed); }
|
||||||
|
|
||||||
|
SPDLOG_INLINE void backtracer::push_back(const log_msg &msg) {
|
||||||
|
std::lock_guard<std::mutex> lock{mutex_};
|
||||||
|
messages_.push_back(log_msg_buffer{msg});
|
||||||
|
}
|
||||||
|
|
||||||
|
SPDLOG_INLINE bool backtracer::empty() const {
|
||||||
|
std::lock_guard<std::mutex> lock{mutex_};
|
||||||
|
return messages_.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
// pop all items in the q and apply the given fun on each of them.
|
||||||
|
SPDLOG_INLINE void backtracer::foreach_pop(std::function<void(const details::log_msg &)> fun) {
|
||||||
|
std::lock_guard<std::mutex> lock{mutex_};
|
||||||
|
while (!messages_.empty()) {
|
||||||
|
auto &front_msg = messages_.front();
|
||||||
|
fun(front_msg);
|
||||||
|
messages_.pop_front();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // namespace details
|
||||||
|
} // namespace spdlog
|
||||||
|
|
@ -0,0 +1,45 @@
|
||||||
|
// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
|
||||||
|
// Distributed under the MIT License (http://opensource.org/licenses/MIT)
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <spdlog/details/circular_q.h>
|
||||||
|
#include <spdlog/details/log_msg_buffer.h>
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
|
#include <functional>
|
||||||
|
#include <mutex>
|
||||||
|
|
||||||
|
// Store log messages in circular buffer.
|
||||||
|
// Useful for storing debug data in case of error/warning happens.
|
||||||
|
|
||||||
|
namespace spdlog {
|
||||||
|
namespace details {
|
||||||
|
class SPDLOG_API backtracer {
|
||||||
|
mutable std::mutex mutex_;
|
||||||
|
std::atomic<bool> enabled_{false};
|
||||||
|
circular_q<log_msg_buffer> messages_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
backtracer() = default;
|
||||||
|
backtracer(const backtracer &other);
|
||||||
|
|
||||||
|
backtracer(backtracer &&other) SPDLOG_NOEXCEPT;
|
||||||
|
backtracer &operator=(backtracer other);
|
||||||
|
|
||||||
|
void enable(size_t size);
|
||||||
|
void disable();
|
||||||
|
bool enabled() const;
|
||||||
|
void push_back(const log_msg &msg);
|
||||||
|
bool empty() const;
|
||||||
|
|
||||||
|
// pop all items in the q and apply the given fun on each of them.
|
||||||
|
void foreach_pop(std::function<void(const details::log_msg &)> fun);
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace details
|
||||||
|
} // namespace spdlog
|
||||||
|
|
||||||
|
#ifdef SPDLOG_HEADER_ONLY
|
||||||
|
#include "backtracer-inl.h"
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,115 @@
|
||||||
|
// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
|
||||||
|
// Distributed under the MIT License (http://opensource.org/licenses/MIT)
|
||||||
|
|
||||||
|
// circular q view of std::vector.
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "spdlog/common.h"
|
||||||
|
|
||||||
|
namespace spdlog {
|
||||||
|
namespace details {
|
||||||
|
template <typename T>
|
||||||
|
class circular_q {
|
||||||
|
size_t max_items_ = 0;
|
||||||
|
typename std::vector<T>::size_type head_ = 0;
|
||||||
|
typename std::vector<T>::size_type tail_ = 0;
|
||||||
|
size_t overrun_counter_ = 0;
|
||||||
|
std::vector<T> v_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
using value_type = T;
|
||||||
|
|
||||||
|
// empty ctor - create a disabled queue with no elements allocated at all
|
||||||
|
circular_q() = default;
|
||||||
|
|
||||||
|
explicit circular_q(size_t max_items)
|
||||||
|
: max_items_(max_items + 1) // one item is reserved as marker for full q
|
||||||
|
,
|
||||||
|
v_(max_items_) {}
|
||||||
|
|
||||||
|
circular_q(const circular_q &) = default;
|
||||||
|
circular_q &operator=(const circular_q &) = default;
|
||||||
|
|
||||||
|
// move cannot be default,
|
||||||
|
// since we need to reset head_, tail_, etc to zero in the moved object
|
||||||
|
circular_q(circular_q &&other) SPDLOG_NOEXCEPT { copy_moveable(std::move(other)); }
|
||||||
|
|
||||||
|
circular_q &operator=(circular_q &&other) SPDLOG_NOEXCEPT {
|
||||||
|
copy_moveable(std::move(other));
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
// push back, overrun (oldest) item if no room left
|
||||||
|
void push_back(T &&item) {
|
||||||
|
if (max_items_ > 0) {
|
||||||
|
v_[tail_] = std::move(item);
|
||||||
|
tail_ = (tail_ + 1) % max_items_;
|
||||||
|
|
||||||
|
if (tail_ == head_) // overrun last item if full
|
||||||
|
{
|
||||||
|
head_ = (head_ + 1) % max_items_;
|
||||||
|
++overrun_counter_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return reference to the front item.
|
||||||
|
// If there are no elements in the container, the behavior is undefined.
|
||||||
|
const T &front() const { return v_[head_]; }
|
||||||
|
|
||||||
|
T &front() { return v_[head_]; }
|
||||||
|
|
||||||
|
// Return number of elements actually stored
|
||||||
|
size_t size() const {
|
||||||
|
if (tail_ >= head_) {
|
||||||
|
return tail_ - head_;
|
||||||
|
} else {
|
||||||
|
return max_items_ - (head_ - tail_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return const reference to item by index.
|
||||||
|
// If index is out of range 0…size()-1, the behavior is undefined.
|
||||||
|
const T &at(size_t i) const {
|
||||||
|
assert(i < size());
|
||||||
|
return v_[(head_ + i) % max_items_];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pop item from front.
|
||||||
|
// If there are no elements in the container, the behavior is undefined.
|
||||||
|
void pop_front() { head_ = (head_ + 1) % max_items_; }
|
||||||
|
|
||||||
|
bool empty() const { return tail_ == head_; }
|
||||||
|
|
||||||
|
bool full() const {
|
||||||
|
// head is ahead of the tail by 1
|
||||||
|
if (max_items_ > 0) {
|
||||||
|
return ((tail_ + 1) % max_items_) == head_;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t overrun_counter() const { return overrun_counter_; }
|
||||||
|
|
||||||
|
void reset_overrun_counter() { overrun_counter_ = 0; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
// copy from other&& and reset it to disabled state
|
||||||
|
void copy_moveable(circular_q &&other) SPDLOG_NOEXCEPT {
|
||||||
|
max_items_ = other.max_items_;
|
||||||
|
head_ = other.head_;
|
||||||
|
tail_ = other.tail_;
|
||||||
|
overrun_counter_ = other.overrun_counter_;
|
||||||
|
v_ = std::move(other.v_);
|
||||||
|
|
||||||
|
// put &&other in disabled, but valid state
|
||||||
|
other.max_items_ = 0;
|
||||||
|
other.head_ = other.tail_ = 0;
|
||||||
|
other.overrun_counter_ = 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace details
|
||||||
|
} // namespace spdlog
|
||||||
|
|
@ -0,0 +1,28 @@
|
||||||
|
// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
|
||||||
|
// Distributed under the MIT License (http://opensource.org/licenses/MIT)
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <mutex>
|
||||||
|
#include <spdlog/details/null_mutex.h>
|
||||||
|
|
||||||
|
namespace spdlog {
|
||||||
|
namespace details {
|
||||||
|
|
||||||
|
struct console_mutex {
|
||||||
|
using mutex_t = std::mutex;
|
||||||
|
static mutex_t &mutex() {
|
||||||
|
static mutex_t s_mutex;
|
||||||
|
return s_mutex;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct console_nullmutex {
|
||||||
|
using mutex_t = null_mutex;
|
||||||
|
static mutex_t &mutex() {
|
||||||
|
static mutex_t s_mutex;
|
||||||
|
return s_mutex;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace details
|
||||||
|
} // namespace spdlog
|
||||||
|
|
@ -0,0 +1,153 @@
|
||||||
|
// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
|
||||||
|
// Distributed under the MIT License (http://opensource.org/licenses/MIT)
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifndef SPDLOG_HEADER_ONLY
|
||||||
|
#include <spdlog/details/file_helper.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <spdlog/common.h>
|
||||||
|
#include <spdlog/details/os.h>
|
||||||
|
|
||||||
|
#include <cerrno>
|
||||||
|
#include <chrono>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <string>
|
||||||
|
#include <thread>
|
||||||
|
#include <tuple>
|
||||||
|
|
||||||
|
namespace spdlog {
|
||||||
|
namespace details {
|
||||||
|
|
||||||
|
SPDLOG_INLINE file_helper::file_helper(const file_event_handlers &event_handlers)
|
||||||
|
: event_handlers_(event_handlers) {}
|
||||||
|
|
||||||
|
SPDLOG_INLINE file_helper::~file_helper() { close(); }
|
||||||
|
|
||||||
|
SPDLOG_INLINE void file_helper::open(const filename_t &fname, bool truncate) {
|
||||||
|
close();
|
||||||
|
filename_ = fname;
|
||||||
|
|
||||||
|
auto *mode = SPDLOG_FILENAME_T("ab");
|
||||||
|
auto *trunc_mode = SPDLOG_FILENAME_T("wb");
|
||||||
|
|
||||||
|
if (event_handlers_.before_open) {
|
||||||
|
event_handlers_.before_open(filename_);
|
||||||
|
}
|
||||||
|
for (int tries = 0; tries < open_tries_; ++tries) {
|
||||||
|
// create containing folder if not exists already.
|
||||||
|
os::create_dir(os::dir_name(fname));
|
||||||
|
if (truncate) {
|
||||||
|
// Truncate by opening-and-closing a tmp file in "wb" mode, always
|
||||||
|
// opening the actual log-we-write-to in "ab" mode, since that
|
||||||
|
// interacts more politely with eternal processes that might
|
||||||
|
// rotate/truncate the file underneath us.
|
||||||
|
std::FILE *tmp;
|
||||||
|
if (os::fopen_s(&tmp, fname, trunc_mode)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
std::fclose(tmp);
|
||||||
|
}
|
||||||
|
if (!os::fopen_s(&fd_, fname, mode)) {
|
||||||
|
if (event_handlers_.after_open) {
|
||||||
|
event_handlers_.after_open(filename_, fd_);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
details::os::sleep_for_millis(open_interval_);
|
||||||
|
}
|
||||||
|
|
||||||
|
throw_spdlog_ex("Failed opening file " + os::filename_to_str(filename_) + " for writing",
|
||||||
|
errno);
|
||||||
|
}
|
||||||
|
|
||||||
|
SPDLOG_INLINE void file_helper::reopen(bool truncate) {
|
||||||
|
if (filename_.empty()) {
|
||||||
|
throw_spdlog_ex("Failed re opening file - was not opened before");
|
||||||
|
}
|
||||||
|
this->open(filename_, truncate);
|
||||||
|
}
|
||||||
|
|
||||||
|
SPDLOG_INLINE void file_helper::flush() {
|
||||||
|
if (std::fflush(fd_) != 0) {
|
||||||
|
throw_spdlog_ex("Failed flush to file " + os::filename_to_str(filename_), errno);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SPDLOG_INLINE void file_helper::sync() {
|
||||||
|
if (!os::fsync(fd_)) {
|
||||||
|
throw_spdlog_ex("Failed to fsync file " + os::filename_to_str(filename_), errno);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SPDLOG_INLINE void file_helper::close() {
|
||||||
|
if (fd_ != nullptr) {
|
||||||
|
if (event_handlers_.before_close) {
|
||||||
|
event_handlers_.before_close(filename_, fd_);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::fclose(fd_);
|
||||||
|
fd_ = nullptr;
|
||||||
|
|
||||||
|
if (event_handlers_.after_close) {
|
||||||
|
event_handlers_.after_close(filename_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SPDLOG_INLINE void file_helper::write(const memory_buf_t &buf) {
|
||||||
|
if (fd_ == nullptr) return;
|
||||||
|
size_t msg_size = buf.size();
|
||||||
|
auto data = buf.data();
|
||||||
|
|
||||||
|
if (!details::os::fwrite_bytes(data, msg_size, fd_)) {
|
||||||
|
throw_spdlog_ex("Failed writing to file " + os::filename_to_str(filename_), errno);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SPDLOG_INLINE size_t file_helper::size() const {
|
||||||
|
if (fd_ == nullptr) {
|
||||||
|
throw_spdlog_ex("Cannot use size() on closed file " + os::filename_to_str(filename_));
|
||||||
|
}
|
||||||
|
return os::filesize(fd_);
|
||||||
|
}
|
||||||
|
|
||||||
|
SPDLOG_INLINE const filename_t &file_helper::filename() const { return filename_; }
|
||||||
|
|
||||||
|
//
|
||||||
|
// return file path and its extension:
|
||||||
|
//
|
||||||
|
// "mylog.txt" => ("mylog", ".txt")
|
||||||
|
// "mylog" => ("mylog", "")
|
||||||
|
// "mylog." => ("mylog.", "")
|
||||||
|
// "/dir1/dir2/mylog.txt" => ("/dir1/dir2/mylog", ".txt")
|
||||||
|
//
|
||||||
|
// the starting dot in filenames is ignored (hidden files):
|
||||||
|
//
|
||||||
|
// ".mylog" => (".mylog". "")
|
||||||
|
// "my_folder/.mylog" => ("my_folder/.mylog", "")
|
||||||
|
// "my_folder/.mylog.txt" => ("my_folder/.mylog", ".txt")
|
||||||
|
SPDLOG_INLINE std::tuple<filename_t, filename_t> file_helper::split_by_extension(
|
||||||
|
const filename_t &fname) {
|
||||||
|
auto ext_index = fname.rfind('.');
|
||||||
|
|
||||||
|
// no valid extension found - return whole path and empty string as
|
||||||
|
// extension
|
||||||
|
if (ext_index == filename_t::npos || ext_index == 0 || ext_index == fname.size() - 1) {
|
||||||
|
return std::make_tuple(fname, filename_t());
|
||||||
|
}
|
||||||
|
|
||||||
|
// treat cases like "/etc/rc.d/somelogfile or "/abc/.hiddenfile"
|
||||||
|
auto folder_index = fname.find_last_of(details::os::folder_seps_filename);
|
||||||
|
if (folder_index != filename_t::npos && folder_index >= ext_index - 1) {
|
||||||
|
return std::make_tuple(fname, filename_t());
|
||||||
|
}
|
||||||
|
|
||||||
|
// finally - return a valid base and extension tuple
|
||||||
|
return std::make_tuple(fname.substr(0, ext_index), fname.substr(ext_index));
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace details
|
||||||
|
} // namespace spdlog
|
||||||
|
|
@ -0,0 +1,61 @@
|
||||||
|
// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
|
||||||
|
// Distributed under the MIT License (http://opensource.org/licenses/MIT)
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <spdlog/common.h>
|
||||||
|
#include <tuple>
|
||||||
|
|
||||||
|
namespace spdlog {
|
||||||
|
namespace details {
|
||||||
|
|
||||||
|
// Helper class for file sinks.
|
||||||
|
// When failing to open a file, retry several times(5) with a delay interval(10 ms).
|
||||||
|
// Throw spdlog_ex exception on errors.
|
||||||
|
|
||||||
|
class SPDLOG_API file_helper {
|
||||||
|
public:
|
||||||
|
file_helper() = default;
|
||||||
|
explicit file_helper(const file_event_handlers &event_handlers);
|
||||||
|
|
||||||
|
file_helper(const file_helper &) = delete;
|
||||||
|
file_helper &operator=(const file_helper &) = delete;
|
||||||
|
~file_helper();
|
||||||
|
|
||||||
|
void open(const filename_t &fname, bool truncate = false);
|
||||||
|
void reopen(bool truncate);
|
||||||
|
void flush();
|
||||||
|
void sync();
|
||||||
|
void close();
|
||||||
|
void write(const memory_buf_t &buf);
|
||||||
|
size_t size() const;
|
||||||
|
const filename_t &filename() const;
|
||||||
|
|
||||||
|
//
|
||||||
|
// return file path and its extension:
|
||||||
|
//
|
||||||
|
// "mylog.txt" => ("mylog", ".txt")
|
||||||
|
// "mylog" => ("mylog", "")
|
||||||
|
// "mylog." => ("mylog.", "")
|
||||||
|
// "/dir1/dir2/mylog.txt" => ("/dir1/dir2/mylog", ".txt")
|
||||||
|
//
|
||||||
|
// the starting dot in filenames is ignored (hidden files):
|
||||||
|
//
|
||||||
|
// ".mylog" => (".mylog". "")
|
||||||
|
// "my_folder/.mylog" => ("my_folder/.mylog", "")
|
||||||
|
// "my_folder/.mylog.txt" => ("my_folder/.mylog", ".txt")
|
||||||
|
static std::tuple<filename_t, filename_t> split_by_extension(const filename_t &fname);
|
||||||
|
|
||||||
|
private:
|
||||||
|
const int open_tries_ = 5;
|
||||||
|
const unsigned int open_interval_ = 10;
|
||||||
|
std::FILE *fd_{nullptr};
|
||||||
|
filename_t filename_;
|
||||||
|
file_event_handlers event_handlers_;
|
||||||
|
};
|
||||||
|
} // namespace details
|
||||||
|
} // namespace spdlog
|
||||||
|
|
||||||
|
#ifdef SPDLOG_HEADER_ONLY
|
||||||
|
#include "file_helper-inl.h"
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,141 @@
|
||||||
|
// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
|
||||||
|
// Distributed under the MIT License (http://opensource.org/licenses/MIT)
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <chrono>
|
||||||
|
#include <iterator>
|
||||||
|
#include <spdlog/common.h>
|
||||||
|
#include <spdlog/fmt/fmt.h>
|
||||||
|
#include <type_traits>
|
||||||
|
|
||||||
|
#ifdef SPDLOG_USE_STD_FORMAT
|
||||||
|
#include <charconv>
|
||||||
|
#include <limits>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Some fmt helpers to efficiently format and pad ints and strings
|
||||||
|
namespace spdlog {
|
||||||
|
namespace details {
|
||||||
|
namespace fmt_helper {
|
||||||
|
|
||||||
|
inline void append_string_view(spdlog::string_view_t view, memory_buf_t &dest) {
|
||||||
|
auto *buf_ptr = view.data();
|
||||||
|
dest.append(buf_ptr, buf_ptr + view.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef SPDLOG_USE_STD_FORMAT
|
||||||
|
template <typename T>
|
||||||
|
inline void append_int(T n, memory_buf_t &dest) {
|
||||||
|
// Buffer should be large enough to hold all digits (digits10 + 1) and a sign
|
||||||
|
SPDLOG_CONSTEXPR const auto BUF_SIZE = std::numeric_limits<T>::digits10 + 2;
|
||||||
|
char buf[BUF_SIZE];
|
||||||
|
|
||||||
|
auto [ptr, ec] = std::to_chars(buf, buf + BUF_SIZE, n, 10);
|
||||||
|
if (ec == std::errc()) {
|
||||||
|
dest.append(buf, ptr);
|
||||||
|
} else {
|
||||||
|
throw_spdlog_ex("Failed to format int", static_cast<int>(ec));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
template <typename T>
|
||||||
|
inline void append_int(T n, memory_buf_t &dest) {
|
||||||
|
fmt::format_int i(n);
|
||||||
|
dest.append(i.data(), i.data() + i.size());
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
SPDLOG_CONSTEXPR_FUNC unsigned int count_digits_fallback(T n) {
|
||||||
|
// taken from fmt: https://github.com/fmtlib/fmt/blob/8.0.1/include/fmt/format.h#L899-L912
|
||||||
|
unsigned int count = 1;
|
||||||
|
for (;;) {
|
||||||
|
// Integer division is slow so do it for a group of four digits instead
|
||||||
|
// of for every digit. The idea comes from the talk by Alexandrescu
|
||||||
|
// "Three Optimization Tips for C++". See speed-test for a comparison.
|
||||||
|
if (n < 10) return count;
|
||||||
|
if (n < 100) return count + 1;
|
||||||
|
if (n < 1000) return count + 2;
|
||||||
|
if (n < 10000) return count + 3;
|
||||||
|
n /= 10000u;
|
||||||
|
count += 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline unsigned int count_digits(T n) {
|
||||||
|
using count_type =
|
||||||
|
typename std::conditional<(sizeof(T) > sizeof(uint32_t)), uint64_t, uint32_t>::type;
|
||||||
|
#ifdef SPDLOG_USE_STD_FORMAT
|
||||||
|
return count_digits_fallback(static_cast<count_type>(n));
|
||||||
|
#else
|
||||||
|
return static_cast<unsigned int>(fmt::
|
||||||
|
// fmt 7.0.0 renamed the internal namespace to detail.
|
||||||
|
// See: https://github.com/fmtlib/fmt/issues/1538
|
||||||
|
#if FMT_VERSION < 70000
|
||||||
|
internal
|
||||||
|
#else
|
||||||
|
detail
|
||||||
|
#endif
|
||||||
|
::count_digits(static_cast<count_type>(n)));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void pad2(int n, memory_buf_t &dest) {
|
||||||
|
if (n >= 0 && n < 100) // 0-99
|
||||||
|
{
|
||||||
|
dest.push_back(static_cast<char>('0' + n / 10));
|
||||||
|
dest.push_back(static_cast<char>('0' + n % 10));
|
||||||
|
} else // unlikely, but just in case, let fmt deal with it
|
||||||
|
{
|
||||||
|
fmt_lib::format_to(std::back_inserter(dest), SPDLOG_FMT_STRING("{:02}"), n);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline void pad_uint(T n, unsigned int width, memory_buf_t &dest) {
|
||||||
|
static_assert(std::is_unsigned<T>::value, "pad_uint must get unsigned T");
|
||||||
|
for (auto digits = count_digits(n); digits < width; digits++) {
|
||||||
|
dest.push_back('0');
|
||||||
|
}
|
||||||
|
append_int(n, dest);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline void pad3(T n, memory_buf_t &dest) {
|
||||||
|
static_assert(std::is_unsigned<T>::value, "pad3 must get unsigned T");
|
||||||
|
if (n < 1000) {
|
||||||
|
dest.push_back(static_cast<char>(n / 100 + '0'));
|
||||||
|
n = n % 100;
|
||||||
|
dest.push_back(static_cast<char>((n / 10) + '0'));
|
||||||
|
dest.push_back(static_cast<char>((n % 10) + '0'));
|
||||||
|
} else {
|
||||||
|
append_int(n, dest);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline void pad6(T n, memory_buf_t &dest) {
|
||||||
|
pad_uint(n, 6, dest);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline void pad9(T n, memory_buf_t &dest) {
|
||||||
|
pad_uint(n, 9, dest);
|
||||||
|
}
|
||||||
|
|
||||||
|
// return fraction of a second of the given time_point.
|
||||||
|
// e.g.
|
||||||
|
// fraction<std::milliseconds>(tp) -> will return the millis part of the second
|
||||||
|
template <typename ToDuration>
|
||||||
|
inline ToDuration time_fraction(log_clock::time_point tp) {
|
||||||
|
using std::chrono::duration_cast;
|
||||||
|
using std::chrono::seconds;
|
||||||
|
auto duration = tp.time_since_epoch();
|
||||||
|
auto secs = duration_cast<seconds>(duration);
|
||||||
|
return duration_cast<ToDuration>(duration) - duration_cast<ToDuration>(secs);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace fmt_helper
|
||||||
|
} // namespace details
|
||||||
|
} // namespace spdlog
|
||||||
|
|
@ -0,0 +1,44 @@
|
||||||
|
// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
|
||||||
|
// Distributed under the MIT License (http://opensource.org/licenses/MIT)
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifndef SPDLOG_HEADER_ONLY
|
||||||
|
#include <spdlog/details/log_msg.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <spdlog/details/os.h>
|
||||||
|
|
||||||
|
namespace spdlog {
|
||||||
|
namespace details {
|
||||||
|
|
||||||
|
SPDLOG_INLINE log_msg::log_msg(spdlog::log_clock::time_point log_time,
|
||||||
|
spdlog::source_loc loc,
|
||||||
|
string_view_t a_logger_name,
|
||||||
|
spdlog::level::level_enum lvl,
|
||||||
|
spdlog::string_view_t msg)
|
||||||
|
: logger_name(a_logger_name),
|
||||||
|
level(lvl),
|
||||||
|
time(log_time)
|
||||||
|
#ifndef SPDLOG_NO_THREAD_ID
|
||||||
|
,
|
||||||
|
thread_id(os::thread_id())
|
||||||
|
#endif
|
||||||
|
,
|
||||||
|
source(loc),
|
||||||
|
payload(msg) {
|
||||||
|
}
|
||||||
|
|
||||||
|
SPDLOG_INLINE log_msg::log_msg(spdlog::source_loc loc,
|
||||||
|
string_view_t a_logger_name,
|
||||||
|
spdlog::level::level_enum lvl,
|
||||||
|
spdlog::string_view_t msg)
|
||||||
|
: log_msg(os::now(), loc, a_logger_name, lvl, msg) {}
|
||||||
|
|
||||||
|
SPDLOG_INLINE log_msg::log_msg(string_view_t a_logger_name,
|
||||||
|
spdlog::level::level_enum lvl,
|
||||||
|
spdlog::string_view_t msg)
|
||||||
|
: log_msg(os::now(), source_loc{}, a_logger_name, lvl, msg) {}
|
||||||
|
|
||||||
|
} // namespace details
|
||||||
|
} // namespace spdlog
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue