FastDup/ext/klib/cpp/khashl.hpp

259 lines
9.1 KiB
C++
Raw Permalink Normal View History

#ifndef __AC_KHASHL_HPP
#define __AC_KHASHL_HPP
#include <functional> // for std::equal_to
#include <cstdlib> // for malloc() etc
#include <cstring> // for memset()
#include <stdint.h> // for uint32_t
/* // ==> Code example <==
#include <cstdio>
#include "khashl.hpp"
int main(void)
{
klib::KHashMap<uint32_t, int, std::hash<uint32_t> > h; // NB: C++98 doesn't have std::hash
uint32_t k;
int absent;
h[43] = 1, h[53] = 2, h[63] = 3, h[73] = 4; // one way to insert
k = h.put(53, &absent), h.value(k) = -2; // another way to insert
if (!absent) printf("already in the table\n"); // which allows to test presence
if (h.get(33) == h.end()) printf("not found!\n"); // test presence without insertion
h.del(h.get(43)); // deletion
for (k = 0; k != h.end(); ++k) // traversal
if (h.occupied(k)) // some buckets are not occupied; skip them
printf("%u => %d\n", h.key(k), h.value(k));
return 0;
}
*/
namespace klib {
/***********
* HashSet *
***********/
template<class T, class Hash, class Eq = std::equal_to<T>, typename khint_t = uint32_t>
class KHashSet {
khint_t bits, count;
uint32_t *used;
T *keys;
static inline uint32_t __kh_used(const uint32_t *flag, khint_t i) { return flag[i>>5] >> (i&0x1fU) & 1U; };
static inline void __kh_set_used(uint32_t *flag, khint_t i) { flag[i>>5] |= 1U<<(i&0x1fU); };
static inline void __kh_set_unused(uint32_t *flag, khint_t i) { flag[i>>5] &= ~(1U<<(i&0x1fU)); };
static inline khint_t __kh_fsize(khint_t m) { return m<32? 1 : m>>5; }
static inline khint_t __kh_h2b(uint32_t hash, khint_t bits) { return hash * 2654435769U >> (32 - bits); }
static inline khint_t __kh_h2b(uint64_t hash, khint_t bits) { return hash * 11400714819323198485ULL >> (64 - bits); }
public:
KHashSet() : bits(0), count(0), used(0), keys(0) {};
~KHashSet() { std::free(used); std::free(keys); };
inline khint_t n_buckets() const { return used? khint_t(1) << bits : 0; }
inline khint_t end() const { return n_buckets(); }
inline khint_t size() const { return count; }
inline T &key(khint_t x) { return keys[x]; };
inline bool occupied(khint_t x) const { return (__kh_used(used, x) != 0); }
void clear(void) {
if (!used) return;
memset(used, 0, __kh_fsize(n_buckets()) * sizeof(uint32_t));
count = 0;
}
khint_t get(const T &key) const {
khint_t i, last, mask, nb;
if (keys == 0) return 0;
nb = n_buckets();
mask = nb - khint_t(1);
i = last = __kh_h2b(Hash()(key), bits);
while (__kh_used(used, i) && !Eq()(keys[i], key)) {
i = (i + khint_t(1)) & mask;
if (i == last) return nb;
}
return !__kh_used(used, i)? nb : i;
}
int resize(khint_t new_nb) {
uint32_t *new_used = 0;
khint_t j = 0, x = new_nb, nb, new_bits, new_mask;
while ((x >>= khint_t(1)) != 0) ++j;
if (new_nb & (new_nb - 1)) ++j;
new_bits = j > 2? j : 2;
new_nb = khint_t(1) << new_bits;
if (count > (new_nb>>1) + (new_nb>>2)) return 0; /* requested size is too small */
new_used = (uint32_t*)std::malloc(__kh_fsize(new_nb) * sizeof(uint32_t));
memset(new_used, 0, __kh_fsize(new_nb) * sizeof(uint32_t));
if (!new_used) return -1; /* not enough memory */
nb = n_buckets();
if (nb < new_nb) { /* expand */
T *new_keys = (T*)std::realloc(keys, new_nb * sizeof(T));
if (!new_keys) { std::free(new_used); return -1; }
keys = new_keys;
} /* otherwise shrink */
new_mask = new_nb - 1;
for (j = 0; j != nb; ++j) {
if (!__kh_used(used, j)) continue;
T key = keys[j];
__kh_set_unused(used, j);
while (1) { /* kick-out process; sort of like in Cuckoo hashing */
khint_t i;
i = __kh_h2b(Hash()(key), new_bits);
while (__kh_used(new_used, i)) i = (i + khint_t(1)) & new_mask;
__kh_set_used(new_used, i);
if (i < nb && __kh_used(used, i)) { /* kick out the existing element */
{ T tmp = keys[i]; keys[i] = key; key = tmp; }
__kh_set_unused(used, i); /* mark it as deleted in the old hash table */
} else { /* write the element and jump out of the loop */
keys[i] = key;
break;
}
}
}
if (nb > new_nb) /* shrink the hash table */
keys = (T*)std::realloc(keys, new_nb * sizeof(T));
std::free(used); /* free the working space */
used = new_used, bits = new_bits;
return 0;
}
khint_t put(const T &key, int *absent_ = 0) {
khint_t nb, i, last, mask;
int absent = -1;
nb = n_buckets();
if (count >= (nb>>1) + (nb>>2)) { /* rehashing */
if (resize(nb + khint_t(1)) < 0) {
if (absent_) *absent_ = -1;
return nb;
}
nb = n_buckets();
} /* TODO: to implement automatically shrinking; resize() already support shrinking */
mask = nb - 1;
i = last = __kh_h2b(Hash()(key), bits);
while (__kh_used(used, i) && !Eq()(keys[i], key)) {
i = (i + 1U) & mask;
if (i == last) break;
}
if (!__kh_used(used, i)) { /* not present at all */
keys[i] = key;
__kh_set_used(used, i);
++count, absent = 1;
} else absent = 0; /* Don't touch keys[i] if present */
if (absent_) *absent_ = absent;
return i;
}
int del(khint_t i) {
khint_t j = i, k, mask, nb = n_buckets();
if (keys == 0 || i >= nb) return 0;
mask = nb - khint_t(1);
while (1) {
j = (j + khint_t(1)) & mask;
if (j == i || !__kh_used(used, j)) break; /* j==i only when the table is completely full */
k = __kh_h2b(Hash()(keys[j]), bits);
if ((j > i && (k <= i || k > j)) || (j < i && (k <= i && k > j)))
keys[i] = keys[j], i = j;
}
__kh_set_unused(used, i);
--count;
return 1;
}
};
/***********
* HashMap *
***********/
template<class KType, class VType>
struct KHashMapBucket { KType key; VType val; };
template<class T, class Hash, typename khint_t>
struct KHashMapHash { khint_t operator() (const T &a) const { return Hash()(a.key); } };
template<class T, class Eq>
struct KHashMapEq { bool operator() (const T &a, const T &b) const { return Eq()(a.key, b.key); } };
template<class KType, class VType, class Hash, class Eq=std::equal_to<KType>, typename khint_t=uint32_t>
class KHashMap : public KHashSet<KHashMapBucket<KType, VType>,
KHashMapHash<KHashMapBucket<KType, VType>, Hash, khint_t>,
KHashMapEq<KHashMapBucket<KType, VType>, Eq>, khint_t>
{
typedef KHashMapBucket<KType, VType> bucket_t;
typedef KHashSet<bucket_t, KHashMapHash<bucket_t, Hash, khint_t>, KHashMapEq<bucket_t, Eq>, khint_t> hashset_t;
public:
khint_t get(const KType &key) const {
bucket_t t = { key, VType() };
return hashset_t::get(t);
}
khint_t put(const KType &key, int *absent) {
bucket_t t = { key, VType() };
return hashset_t::put(t, absent);
}
inline KType &key(khint_t i) { return hashset_t::key(i).key; }
inline VType &value(khint_t i) { return hashset_t::key(i).val; }
inline VType &operator[] (const KType &key) {
bucket_t t = { key, VType() };
return value(hashset_t::put(t));
}
};
/****************************
* HashSet with cached hash *
****************************/
template<class KType, typename khint_t>
struct KHashSetCachedBucket { KType key; khint_t hash; };
template<class T, typename khint_t>
struct KHashCachedHash { khint_t operator() (const T &a) const { return a.hash; } };
template<class T, class Eq>
struct KHashCachedEq { bool operator() (const T &a, const T &b) const { return a.hash == b.hash && Eq()(a.key, b.key); } };
template<class KType, class Hash, class Eq = std::equal_to<KType>, typename khint_t = uint32_t>
class KHashSetCached : public KHashSet<KHashSetCachedBucket<KType, khint_t>,
KHashCachedHash<KHashSetCachedBucket<KType, khint_t>, khint_t>,
KHashCachedEq<KHashSetCachedBucket<KType, khint_t>, Eq>, khint_t>
{
typedef KHashSetCachedBucket<KType, khint_t> bucket_t;
typedef KHashSet<bucket_t, KHashCachedHash<bucket_t, khint_t>, KHashCachedEq<bucket_t, Eq>, khint_t> hashset_t;
public:
khint_t get(const KType &key) const {
bucket_t t = { key, Hash()(key) };
return hashset_t::get(t);
}
khint_t put(const KType &key, int *absent) {
bucket_t t = { key, Hash()(key) };
return hashset_t::put(t, absent);
}
inline KType &key(khint_t i) { return hashset_t::key(i).key; }
};
/****************************
* HashMap with cached hash *
****************************/
template<class KType, class VType, typename khint_t>
struct KHashMapCachedBucket { KType key; VType val; khint_t hash; };
template<class KType, class VType, class Hash, class Eq = std::equal_to<KType>, typename khint_t = uint32_t>
class KHashMapCached : public KHashSet<KHashMapCachedBucket<KType, VType, khint_t>,
KHashCachedHash<KHashMapCachedBucket<KType, VType, khint_t>, khint_t>,
KHashCachedEq<KHashMapCachedBucket<KType, VType, khint_t>, Eq>, khint_t>
{
typedef KHashMapCachedBucket<KType, VType, khint_t> bucket_t;
typedef KHashSet<bucket_t, KHashCachedHash<bucket_t, khint_t>, KHashCachedEq<bucket_t, Eq>, khint_t> hashset_t;
public:
khint_t get(const KType &key) const {
bucket_t t = { key, VType(), Hash()(key) };
return hashset_t::get(t);
}
khint_t put(const KType &key, int *absent) {
bucket_t t = { key, VType(), Hash()(key) };
return hashset_t::put(t, absent);
}
inline KType &key(khint_t i) { return hashset_t::key(i).key; }
inline VType &value(khint_t i) { return hashset_t::key(i).val; }
inline VType &operator[] (const KType &key) {
bucket_t t = { key, VType(), Hash()(key) };
return value(hashset_t::put(t));
}
};
}
#endif /* __AC_KHASHL_HPP */