|
- // Reference:
- // https://abseil.io/about/design/swisstables#swiss-tables-design-notes
- // https://github.com/gtmshrm/Swissmap
- // https://github.com/Tessil/robin-map
- // https://github.com/google/highwayhash
- // https://peps.python.org/pep-0456
- #include "hash_map.h"
-
- #include <string.h>
-
- #if defined(_WIN32)
- #include <xmmintrin.h>
- #define ALIGNED_ALLOC(size_in_bytes, aligment) _mm_malloc(size_in_bytes, aligment)
- #else
- #include <stdlib.h>
- #define ALIGNED_ALLOC(size_in_bytes, aligment) aligned_alloc(aligment, size_in_bytes)
- #endif /* _WIN32 */
-
- #if defined(__GNUC__) || defined(__clang__)
- #define LIKELY(x) __builtin_expect(!!(x), 1)
- #define UNLIKELY(x) __builtin_expect(!!(x), 0)
- #else
- #define LIKELY(x) (x)
- #define UNLIKELY(x) (x)
- #endif /* __GNUC__ || __clang__ */
-
- // Based on XXHASH
- #include "xxhash.h"
-
- //static inline T getTombstone() noexcept;
- //static inline T getEmpty() noexcept;
- //static inline uint64_t hash(const T& key) noexcept;
- //static inline bool isEqual(const T& lhs, const T& rhs) noexcept;
- //static inline bool isValid(const T& key) noexcept;
-
- struct BucketEntry
- {
-
- };
-
- #define POW2UINT(v) do { \
- (v)--; \
- (v) |= (v) >> 1; \
- (v) |= (v) >> 2; \
- (v) |= (v) >> 4; \
- (v) |= (v) >> 8; \
- (v) |= (v) >> 16; \
- (v)++; \
- } while (0)
-
- #define CACHE_LINE_SIZE ((size_t)(64))
-
- #define ALIAN(cursor, aligment) (((size_t)(cursor) + ((size_t)(alignment) - 1)) & ~((size_t)(alignment) - 1))
- #define ALIANOF(s, aligment) (((size_t)(s) + ((size_t)(alignment) - 1)) & ~((size_t)(alignment) - 1))
- #define IS_POINTER_ALIGNED(cursor, alignment) (((uintpr_t)(cursor) & ((alignment) - 1)) == 0)
-
- #define MAX(a,b) (a) > (b) ? (a) : (b)
-
- static const size_t kPrimes[] = {
- 1u,
- 5u,
- 17u,
- 29u,
- 37u,
- 53u,
- 67u,
- 79u,
- 97u,
- 131u,
- 193u,
- 257u,
- 389u,
- 521u,
- 769u,
- 1031u,
- 1543u,
- 2053u,
- 3079u,
- 6151u,
- 12289u,
- 24593u,
- 49157u,
- #if SIZE_MAX >= ULONG_MAX
- 98317ul,
- 196613ul,
- 393241ul,
- 786433ul,
- 1572869ul,
- 3145739ul,
- 6291469ul,
- 12582917ul,
- 25165843ul,
- 50331653ul,
- 100663319ul,
- 201326611ul,
- 402653189ul,
- 805306457ul,
- 1610612741ul,
- 3221225473ul,
- 4294967291ul,
- #endif
- #if SIZE_MAX >= ULLONG_MAX
- 6442450939ull,
- 12884901893ull,
- 25769803751ull,
- 51539607551ull,
- 103079215111ull,
- 206158430209ull,
- 412316860441ull,
- 824633720831ull,
- 1649267441651ull,
- 3298534883309ull,
- 6597069766657ull,
- #endif
- };
-
- const uint32_t kMinNumberOfBuckets = 16;
- const uint32_t kPrimeMinNumberOfBuckets = 5;
-
- // Open Addressing Hash Table
- struct HashMap
- {
- HashMapGrowthPolicy policy;
- void *storage;
- uint32_t num_of_buckets;
- uint32_t num_of_elements;
- int is_internal_storage;
- };
-
- /*
- * SipHash-2-4
- */
- #define ROTATE(x, b) (uint64_t)( ((x) << (b)) | ( (x) >> (64 - (b))) )
-
- #define HALF_ROUND(a,b,c,d,s,t) \
- do { \
- a += b; c += d; \
- b = ROTATE(b, s) ^ a; \
- d = ROTATE(d, t) ^ c; \
- a = ROTATE(a, 32); \
- } while (0)
-
- #define DOUBLE_ROUND(v0,v1,v2,v3) \
- do { \
- HALF_ROUND(v0,v1,v2,v3,13,16); \
- HALF_ROUND(v2,v1,v0,v3,17,21); \
- HALF_ROUND(v0,v1,v2,v3,13,16); \
- HALF_ROUND(v2,v1,v0,v3,17,21); \
- } while (0)
-
-
- uint64_t SipHash24(const void *src, unsigned long src_sz, const char key[16])
- {
- const uint64_t *_key = (uint64_t *)key;
- uint64_t k0 = (uint64_t)(_key[0]);
- uint64_t k1 = (uint64_t)(_key[1]);
- uint64_t b = (uint64_t)src_sz << 56;
- const uint64_t *in = (uint64_t*)src;
-
- uint64_t v0 = k0 ^ 0x736f6d6570736575ULL;
- uint64_t v1 = k1 ^ 0x646f72616e646f6dULL;
- uint64_t v2 = k0 ^ 0x6c7967656e657261ULL;
- uint64_t v3 = k1 ^ 0x7465646279746573ULL;
-
- while (src_sz >= 8) {
- uint64_t mi = (uint64_t)(*in);
- in += 1;
- src_sz -= 8;
- v3 ^= mi;
- DOUBLE_ROUND(v0,v1,v2,v3);
- v0 ^= mi;
- }
-
- uint64_t t = 0;
- uint8_t *pt = (uint8_t *)&t;
- uint8_t *m = (uint8_t *)in;
-
- switch (src_sz) {
- case 7: pt[6] = m[6];
- case 6: pt[5] = m[5];
- case 5: pt[4] = m[4];
- case 4: *((uint32_t*)&pt[0]) = *((uint32_t*)&m[0]); break;
- case 3: pt[2] = m[2];
- case 2: pt[1] = m[1];
- case 1: pt[0] = m[0];
- }
-
- b |= (uint64_t)(t);
-
- v3 ^= b;
- DOUBLE_ROUND(v0,v1,v2,v3);
- v0 ^= b;
- v2 ^= 0xff;
- DOUBLE_ROUND(v0,v1,v2,v3);
- DOUBLE_ROUND(v0,v1,v2,v3);
-
- return (v0 ^ v1) ^ (v2 ^ v3);
- }
-
- /*
- * DJB2 Hash Function.
- */
- size_t BytesHasher(unsigned char *str)
- {
- size_t hash = 5381;
- int c;
-
- while (c = *str++)
- {
- hash = ((hash << 5) + hash) + c;
- }
- return hash;
- }
-
- size_t UInt32Hasher(uint32_t v)
- {
- // fmix32 from MurmurHash by Austin Appleby (public domain)
- v ^= v >> 16;
- v *= 0x85ebca6b;
- v ^= v >> 13;
- v *= 0xc2b2ae35;
- v ^= v >> 16;
- return (size_t) v;
- }
-
- size_t UInt64Hasher(uint64_t v)
- {
- // fmix32 from MurmurHash by Austin Appleby (public domain)
- v ^= v >> 33;
- v *= (uint64_t) 0xff51afd7ed558ccdull;
- v ^= v >> 33;
- v *= (uint64_t) 0xc4ceb9fe1a85ec53ull;
- v ^= v >> 33;
- return (size_t) v;
- }
-
- size_t XXH128Cmp(const XXH128_hash_t *h1, const XXH128_hash_t *h2)
- {
- if (h1->high64 < h2->high64)
- {
- return 1;
- }
- else if (h1->high64 > h2->high64)
- {
- return 0;
- }
- else if (h1->low64 < h2->low64)
- {
- return 1;
- }
-
- return 0;
- }
-
- void HashCombine(size_t seed, size_t value)
- {
- /// From CityHash (https://github.com/google/cityhash)
- const size_t mult = 0x9ddfea08eb382d69ull;
- size_t a = (value ^ seed) * mult;
- a ^= (a >> 47);
- size_t b = (seed ^ a) * mult;
- b ^= (b >> 47);
- seed = b * mult;
- }
-
- size_t Hash64WithSeed(const void *ptr, size_t size, size_t seed)
- {
- return (size_t)XXH3_64bits_withSeed(ptr, size, (unsigned long long)seed);
- }
-
- size_t Hash64(const void *ptr, size_t size)
- {
- return (size_t)XXH3_64bits(ptr, size);
- }
-
- size_t HashStrWithSeed(const char *str, size_t seed)
- {
- return Hash64WithSeed(str, strlen(str), seed);
- }
-
- size_t HashStr(const char *str)
- {
- return Hash64(str, strlen(str));
- }
-
- // XXH128_hash_t HashKernel(const char *str)
- // {
- // const char *offset = strstr(str, "body:");
- // if (UNLIKELY(!offset))
- // {
- // offset = strchr(str, '{');
- // if (UNLIKELY(!offset))
- // assert(!"hash_kernel(): invalid input!");
- // }
- //
- // return XXH128(offset, strlen(offset), 0);
- // }
-
- HashMap *HashMap_Create(uint32_t num)
- {
- HashMap *o = (HashMap *)malloc(sizeof(HashMap));
-
- o->storage = NULL;
-
- num = (num < kMinNumberOfBuckets) ? kMinNumberOfBuckets : num;
-
- size_t num_bytes = sizeof(TItem) * num;
- // note: 64 to match CPU cache line size
- size_t alignment = MAX(ALIANOF(TItem, CACHE_LINE_SIZE), CACHE_LINE_SIZE);
- num_bytes = ALIGN(num_bytes, alignment);
- assert((num_bytes % alignment) == 0);
-
- void* raw = ALIGNED_ALLOC(num_bytes, alignment);
- assert(raw);
-
- o->storage = raw;
- EXLBR_ASSERT(IS_POINTER_ALIGNED(o->storage, ALIGNOF(TItem)));
-
- o->num_of_buckets = num;
- o->num_of_elements = 0;
-
- TItem* item = o->storage;
- const TItem* end_item = item + num;
-
- for (; item != end_item; item++)
- {
- (item, TKeyInfo::getEmpty());
- }
-
- return o;
- }
-
- void *HashMap_Find(HashMap *o, void *key)
- {
- const uint32_t num = o->num_of_buckets;
- TItem* const first = o->storage;
- TItem* const end = first + num;
- const uint64_t hash_value = TKeyInfo::hash(key);
- uint32_t bucketIndex = uint32_t(hashValue) & (numBuckets - 1);
- TItem* startItem = firstItem + bucketIndex;
- TItem* EXLBR_RESTRICT currentItem = startItem;
- do
- {
- if (EXLBR_LIKELY(currentItem->isEqual(key)))
- {
- return currentItem;
- }
-
- if (currentItem->isEmpty())
- {
- return endItem;
- }
-
- currentItem++;
- currentItem = (currentItem == endItem) ? firstItem : currentItem;
- } while (currentItem != startItem);
- return endItem;
- }
-
- uint32_t HashMap_Size(HashMap *o)
- {
- return o->num_of_elements;
- }
-
- uint32_t HashMap_Capacity(HashMap *o)
- {
- return o->num_of_buckets;
- }
-
- int HashMap_Empty(HashMap *o)
- {
- return o->num_of_elements == 0 ? 1 : 0;
- }
-
- int HashMap_Reserve(HashMap *o, size_t num)
- {
- if (num == 0 || num < o->num_of_buckets)
- {
- return 0;
- }
-
- POW2INT(num);
-
- size_t num_buckets = num_of_buckets;
- TItem* storage = o->storage;
- TItem* item = storage;
- const TItem* enditem = item + num_buckets;
- bool isInlineStorage = isUsingInlineStorage();
-
- num = HashMap_Create(num);
-
- HashMap_Reinsert(o, num, item, enditem);
-
- if (!o->is_internal_storage)
- {
- EXLBR_FREE(storage);
- }
-
- return 1;
- }
|