rcx

library of miscellaneous bits of C code
git clone git://git.rr3.xyz/rcx
Log | Files | Refs | README | LICENSE

commit 51071af719559df83b990c518e076a269dbae576
parent 5a3a0f524139f5819b5791e7bf750a1ba37691f3
Author: Robert Russell <robertrussell.72001@gmail.com>
Date:   Thu,  8 Jun 2023 19:00:50 -0700

Cleanup dict

Diffstat:
Minc/dict.h | 73++++++++++++++++++++++++++++++++++++++++++-------------------------------
Msrc/dict.c | 143+++++++++++++++++++++++++++++++++++++++----------------------------------------
2 files changed, 112 insertions(+), 104 deletions(-)

diff --git a/inc/dict.h b/inc/dict.h @@ -4,11 +4,11 @@ #include "debug.h" #include "def.h" -#include "log.h" #include "rand.h" #include "string.h" typedef struct r_dict_spec RDictSpec; +typedef struct r_dict RDict; typedef bool (*RDictEqFunc)(void *a, void *b, RDictSpec *spec); typedef u64 (*RDictHashFunc)(void *k, u64 seed, RDictSpec *spec); typedef void *(*RDictAlloczFunc)(usize size); @@ -23,7 +23,15 @@ struct r_dict_spec { RDictFreeFunc free; }; -struct r_dict; /* opaque */ +struct r_dict { + u8 b; /* Log2 of number of slots */ + u32 niter; + u64 seed; + usize count; + usize ntombs; + void *mem; + u8 *data; /* Cache line-aligned pointer into mem */ +}; static inline bool r_dict_mem_eq(void *a, void *b, RDictSpec *spec) { return memcmp(a, b, spec->ksize) == 0; } static inline u64 r_dict_mem_hash(void *k, u64 seed, RDictSpec *spec) { return r_hash(k, spec->ksize, seed); } @@ -31,15 +39,16 @@ static inline u64 r_dict_mem_hash(void *k, u64 seed, RDictSpec *spec) { return r static inline bool r_dict_cstr_eq(char **a, char **b, RDictSpec *spec) { return strcmp(*a, *b) == 0; } static inline u64 r_dict_cstr_hash(char **k, u64 seed, RDictSpec *spec) { return r_hash(*k, strlen(*k), seed); } -static inline bool r_dict_str_eq(Str *a, Str *b, RDictSpec *spec) { return str_eq(*a, *b); } -static inline u64 r_dict_str_hash(Str *k, u64 seed, RDictSpec *spec) { return r_hash(str_bytes(*k), str_len(*k), seed); } +static inline bool r_dict_rstr_eq(Str *a, Str *b, RDictSpec *spec) { return str_eq(*a, *b); } +static inline u64 r_dict_rstr_hash(Str *k, u64 seed, RDictSpec *spec) { return r_hash(str_bytes(*k), str_len(*k), seed); } /* TODO: add functions for shrinking dict and reserving space? */ -int r_dict_init(struct r_dict *d, usize hint, RDictSpec *spec); -int r_dict_access(struct r_dict *d, void **v, void *k, u64 h, RDictSpec *spec); -int r_dict_create(struct r_dict *d, void **v, void *k, u64 h, RDictSpec *spec); -int r_dict_delete(struct r_dict *d, void **v, void *k, u64 h, RDictSpec *spec); -void r_dict_clear(struct r_dict *d, RDictSpec *spec); +int r_dict_init(RDict *d, usize hint, RDictSpec *spec); +void r_dict_free(RDict *d, RDictSpec *spec); +int r_dict_access(RDict *d, void **v, void *k, u64 h, RDictSpec *spec); +int r_dict_create(RDict *d, void **v, void *k, u64 h, RDictSpec *spec); +int r_dict_delete(RDict *d, void **v, void *k, u64 h, RDictSpec *spec); +void r_dict_clear(RDict *d, RDictSpec *spec); /* Defaults */ #define R_DICT_STATIC @@ -51,13 +60,7 @@ void r_dict_clear(struct r_dict *d, RDictSpec *spec); #define R_DICT_TYPEDEF(D, K, V)\ typedef struct D { \ - u8 b; \ - u32 niter; \ - u64 seed; \ - usize count; \ - usize ntombs; \ - void *mem; \ - u8 *data; \ + RDict d; \ V default_val; \ } D; @@ -72,37 +75,44 @@ typedef struct D { \ /* TODO: function for assigning without allocating */ #define R_DICT_DECLARE(D, K, V, ...)\ -static inline UNUSED usize R_DICT_METHOD(len,##__VA_ARGS__)(D *d) { return d->count; } \ +static inline UNUSED usize R_DICT_METHOD(len,##__VA_ARGS__)(D *d) { return d->d.count; } \ static inline UNUSED void R_DICT_METHOD(set_default,##__VA_ARGS__)(D *d, V v) { d->default_val = v; } \ static inline UNUSED V R_DICT_METHOD(get_default,##__VA_ARGS__)(D *d) { return d->default_val; } \ static inline UNUSED int R_DICT_METHOD(init,##__VA_ARGS__)(D *d, usize hint) { \ memset(&d->default_val, 0, sizeof(V)); \ - return r_dict_init((void *)d, hint, R_DICT_SPEC_(K,V)); \ + return r_dict_init(&d->d, hint, R_DICT_SPEC_(K,V)); \ } \ -static inline UNUSED void R_DICT_METHOD(free,##__VA_ARGS__)(D *d) { \ - if (d->niter > 0) \ - r_warnf("dict freed with %"PRId32" iterators alive", d->niter); \ - R_DICT_FREE(d->mem); \ - d->data = 0; /* Ensure hard error on use after free. */ \ +static inline UNUSED void R_DICT_METHOD(free,##__VA_ARGS__)(D *d) { r_dict_free(&d->d, R_DICT_SPEC_(K,V)); } \ +static inline UNUSED V *R_DICT_METHOD(ptr,##__VA_ARGS__)(D *d, K k) { \ + u64 h = R_DICT_HASH(&k, d->d.seed, R_DICT_SPEC_(K,V)); /* Allow hash to be inlined. */ \ + void *vp; r_dict_access(&d->d, &vp, &k, h, R_DICT_SPEC_(K,V)); \ + return vp; \ } \ static inline UNUSED bool R_DICT_METHOD(get,##__VA_ARGS__)(D *d, V *v, K k) { \ - u64 h = R_DICT_HASH(&k, d->seed, R_DICT_SPEC_(K,V)); /* Allow hash to be inlined. */ \ - void *vp; int r = r_dict_access((void *)d, &vp, &k, h, R_DICT_SPEC_(K,V)); \ + u64 h = R_DICT_HASH(&k, d->d.seed, R_DICT_SPEC_(K,V)); /* Allow hash to be inlined. */ \ + void *vp; int r = r_dict_access(&d->d, &vp, &k, h, R_DICT_SPEC_(K,V)); \ if (v) *v = vp ? *(V *)vp : d->default_val; \ return r > 0; \ } \ -static inline UNUSED int R_DICT_METHOD(set,##__VA_ARGS__)(D *d, K k, V v) { \ - u64 h = R_DICT_HASH(&k, d->seed, R_DICT_SPEC_(K,V)); /* Allow hash to be inlined. */ \ - void *vp; int r = r_dict_create((void *)d, &vp, &k, h, R_DICT_SPEC_(K,V)); \ +static inline UNUSED bool R_DICT_METHOD(set,##__VA_ARGS__)(D *d, K k, V v) { \ + u64 h = R_DICT_HASH(&k, d->d.seed, R_DICT_SPEC_(K,V)); /* Allow hash to be inlined. */ \ + void *vp; int r = r_dict_access(&d->d, &vp, &k, h, R_DICT_SPEC_(K,V)); \ + if (vp) *(V *)vp = v; \ + return r > 0; \ +} \ +static inline UNUSED int R_DICT_METHOD(put,##__VA_ARGS__)(D *d, K k, V v) { \ + assert(d->d.niter == 0, "can not create new dict entry while iterating"); \ + u64 h = R_DICT_HASH(&k, d->d.seed, R_DICT_SPEC_(K,V)); /* Allow hash to be inlined. */ \ + void *vp; int r = r_dict_create(&d->d, &vp, &k, h, R_DICT_SPEC_(K,V)); \ if (vp) *(V *)vp = v; \ return r; \ } \ static inline UNUSED bool R_DICT_METHOD(del,##__VA_ARGS__)(D *d, V *v, K k) { \ - u64 h = R_DICT_HASH(&k, d->seed, R_DICT_SPEC_(K,V)); /* Allow hash to be inlined. */ \ - void *vp; int r = r_dict_delete((void *)d, &vp, &k, h, R_DICT_SPEC_(K,V)); \ + u64 h = R_DICT_HASH(&k, d->d.seed, R_DICT_SPEC_(K,V)); /* Allow hash to be inlined. */ \ + void *vp; int r = r_dict_delete(&d->d, &vp, &k, h, R_DICT_SPEC_(K,V)); \ if (v) *v = vp ? *(V *)vp : d->default_val; \ return r > 0; \ } \ static inline UNUSED void R_DICT_METHOD(clr,##__VA_ARGS__)(D *d) { r_dict_clear((void *)d, R_DICT_SPEC_(K,V)); } -#define R_DICT_DEFINE(D, K, V, ...) /* No op. Eveything is static inline. */ +#define R_DICT_DEFINE(D, K, V, ...) /* No op. Everything is static inline. */ +\ No newline at end of file diff --git a/src/dict.c b/src/dict.c @@ -4,15 +4,13 @@ #include "bits.h" #include "dict.h" #include "internal/util.h" +#include "log.h" #include "rand.h" #include "rcx.h" -#if defined(__GNUC__) && defined(R_HAVE_AVX2) -#include "simd.h" -#define PROBE_LEN 32u -#define PROBE_LEN_BITS 5u -#elif defined(__GNUC__) && defined(R_HAVE_SSE2) +#if defined(__GNUC__) && defined(R_HAVE_SSE2) #include "simd.h" +#define USE_SIMD 1 #define PROBE_LEN 16u #define PROBE_LEN_BITS 4u #else @@ -21,15 +19,18 @@ #endif /* PAGE_LEN should be a multiple of 8 for packing and alignment. */ +/* TODO: make these parameters programmer-controllable. PAGE_LEN == 8 is + * a conservative choice. In most cases (i.e., ksize a multiple of 8), + * PAGE_LEN == 1 causes no packing issues and would improve caching + * performance with very large keys or values. */ #define PAGE_LEN 8u #define LF_NUM 3u #define LF_DEN 4u -#define EMPTY 0u + +#define EMPTY 0u /* Must be 0. */ #define TOMBSTONE 1u #define MIN_HASH1 2u -typedef struct r_dict_spec Spec; -typedef struct r_dict Dict; typedef u8 Page; /* For readability only; a page isn't really a u8. */ enum scan_mode { @@ -38,22 +39,6 @@ enum scan_mode { DELETE = 2, }; -struct r_dict { - u8 b; /* Log2 of number of slots */ - u32 niter; - u64 seed; - usize count; - usize ntombs; - void *mem; - u8 *data; /* Cache line-aligned pointer into mem */ - /* V default_val; */ -}; - -static inline usize -hash0(u64 h, u8 b) { - return h & ((U64_C(1) << (b - PROBE_LEN_BITS)) - 1u); -} - static inline u8 hash1(u64 h, u8 b) { u64 t = h >> 56; @@ -68,7 +53,7 @@ exceeds_lf(u64 n, u8 b) { } static void -insert(Page *pages, u8 b, void *k, void *v, u64 h, Spec *spec) { +insert(Page *pages, u8 b, void *k, void *v, u64 h, RDictSpec *spec) { assert(false); #if 0 usize h0 = hash0(h, b); @@ -99,7 +84,7 @@ insert(Page *pages, u8 b, void *k, void *v, u64 h, Spec *spec) { } static int -grow(Dict *d, Spec *spec) { +grow(RDict *d, RDictSpec *spec) { assert(false); #if 0 u8 oldb = d->b; @@ -139,10 +124,7 @@ grow(Dict *d, Spec *spec) { } static inline int -scan(Dict *d, void **v, void *k, u64 h, int mode, Spec *spec) { - assert(mode != CREATE || d->niter == 0, - "can not create new dict entry while iterating"); - +scan(RDict *d, void **v, void *k, u64 h, int mode, RDictSpec *spec) { *v = 0; if (mode == CREATE && exceeds_lf(d->count + d->ntombs, d->b)) { @@ -150,7 +132,12 @@ scan(Dict *d, void **v, void *k, u64 h, int mode, Spec *spec) { return -1; } - usize h0 = hash0(h, d->b); + /* meta_mask is a mask for reducing modulo the table size. */ + usize meta_mask = (USIZE_C(1) << (d->b - PROBE_LEN_BITS)) - 1u; + u8 *meta = d->data; + Page *pages = d->data + (USIZE_C(1) << d->b); + + usize h0 = h & meta_mask; u8 h1 = hash1(h, d->b); #if PROBE_LEN == 32 @@ -163,59 +150,62 @@ scan(Dict *d, void **v, void *k, u64 h, int mode, Spec *spec) { u64 vh1; memset(&vh1, h1, sizeof vh1); #endif - Page *pages = d->data + (USIZE_C(1) << d->b); bool create_slot_found = false; usize create_idx; bool create_replacing_tomb; - /* i is the index of a metadata vector (i.e., a u8[PROBE_LEN]). - * We increment it quadratically. */ - usize meta_mask = (USIZE_C(1) << (d->b - PROBE_LEN_BITS)) - 1u; - for (usize i = h0, jump = 1;; i = (i + jump) & meta_mask, jump++) { - u8 *meta = d->data + i * PROBE_LEN; + /* mvi is the metadata vector index (where each metadata vector is a + * u8[PROBE_LEN]), and i is the corresponding index in the meta table. + * Note that mvi is increased quadratically. */ + for (usize mvi = h0, jump = 1;; mvi = (mvi + jump) & meta_mask, jump++) { + usize i = mvi * PROBE_LEN; /* We must load the metadata vector as little endian, so that low bits * in the vector represent earlier slots in the table and hence ctz * gives us earlier slots first. In the SIMD cases, we know we're on * x86, so a memcpy suffices. In the general fallback case, we use - * readl64, which does a byte swap on big endian machines. */ + * readl64, which does a byte swap on big endian machines. + * + * In the SIMD cases, m is a bit array indicating where vmeta equals + * h1. Thus, ctz(m) is the least j such that meta[j] == h1 (provided + * m != 0). + * + * In the portable case, z initially has 0s exactly where vmeta has h1. + * Thus, we can use r8search64 on z to find the indices j of h1 in + * vmeta. After each iteration, we mask in an arbitrary bit into the + * jth byte so that the next iteration gets a different j. */ #if PROBE_LEN == 32 - v32u8 vmeta; memcpy(&vmeta, meta, sizeof vmeta); - /* m is a bit array indicating where vmeta equals h1. Thus, ctz(m) - * is the least j such that meta[j] == h1 (provided m != 0). */ + v32u8 vmeta; memcpy(&vmeta, meta+i, sizeof vmeta); for (u32 m = v32u8_msb(v32u8_cmpeq(vmeta, vh1)); m != 0; m &= m - 1) { int j = r_ctz32(m); #elif PROBE_LEN == 16 - v16u8 vmeta; memcpy(&vmeta, meta, sizeof vmeta); + v16u8 vmeta; memcpy(&vmeta, meta+i, sizeof vmeta); for (u16 m = v16u8_msb(v16u8_cmpeq(vmeta, vh1)); m != 0; m &= m - 1) { int j = r_ctz16(m); #else - u64 vmeta = r_readl64(meta); - u64 z = vh1 ^ vmeta; - /* z now has 0s exactly where vmeta had h1. Thus, we can use r8search64 - * on z to find the indices j of h1 in vmeta. After each iteration, we - * mask in an arbitrary bit into the jth byte so that the next - * iteration gets a different j. */ - for (int j = r_r8search64(z); j < 8; z |= U64_C(1)<<(8*j), j = r_r8search64(z)) { + u64 vmeta = r_readl64(meta+i); + u64 z = vmeta ^ vh1; + for (int j = r_r8search64(z); j != 8; z |= U64_C(1)<<(8*j), j = r_r8search64(z)) { #endif - usize e = i * PROBE_LEN + j; - Page *p = pages + (e - e%PAGE_LEN) * (spec->ksize + spec->vsize); - if likely (spec->eq(p + (e%PAGE_LEN) * spec->ksize, k, spec)) { + usize match_idx = i + j; + usize page_idx = match_idx % PAGE_LEN; + Page *p = pages + (match_idx - page_idx) * (spec->ksize + spec->vsize); + if likely (spec->eq(p + page_idx * spec->ksize, k, spec)) { if (mode == DELETE) { #if PROBE_LEN == 32 if (v32u8_msb(v32u8_cmpeq(vmeta, vempty)) != 0) { #elif PROBE_LEN == 16 if (v16u8_msb(v16u8_cmpeq(vmeta, vempty)) != 0) { #else - if (r_r8search64(vmeta) < 8) { + if (r_r8search64(vmeta) != 8) { #endif - meta[j] = EMPTY; + meta[match_idx] = EMPTY; } else { - meta[j] = TOMBSTONE; + meta[match_idx] = TOMBSTONE; d->ntombs += 1; } d->count -= 1; } - *v = p + PAGE_LEN * spec->ksize + (e%PAGE_LEN) * spec->vsize; + *v = p + PAGE_LEN * spec->ksize + page_idx * spec->vsize; return 1; } } @@ -238,11 +228,11 @@ scan(Dict *d, void **v, void *k, u64 h, int mode, Spec *spec) { u32 m1 = v32u8_msb(v32u8_cmpeq(vmeta, vtomb)); if (m1 != 0) { create_slot_found = true; - create_idx = i * PROBE_LEN + r_ctz32(m1); + create_idx = i + r_ctz32(m1); create_replacing_tomb = true; } else if (m0 != 0) { create_slot_found = true; - create_idx = i * PROBE_LEN + r_ctz32(m0); + create_idx = i + r_ctz32(m0); create_replacing_tomb = false; break; } @@ -251,11 +241,11 @@ scan(Dict *d, void **v, void *k, u64 h, int mode, Spec *spec) { u16 m1 = v16u8_msb(v16u8_cmpeq(vmeta, vtomb)); if (m1 != 0) { create_slot_found = true; - create_idx = i * PROBE_LEN + r_ctz16(m1); + create_idx = i + r_ctz16(m1); create_replacing_tomb = true; } else if (m0 != 0) { create_slot_found = true; - create_idx = i * PROBE_LEN + r_ctz16(m0); + create_idx = i + r_ctz16(m0); create_replacing_tomb = false; break; } @@ -264,11 +254,11 @@ scan(Dict *d, void **v, void *k, u64 h, int mode, Spec *spec) { int j1 = r_r8search64(vmeta ^ vtomb); if (j1 != 8) { create_slot_found = true; - create_idx = j1; + create_idx = i + j1; create_replacing_tomb = true; } else if (j0 != 8) { create_slot_found = true; - create_idx = j0; + create_idx = i + j0; create_replacing_tomb = false; break; } @@ -286,19 +276,18 @@ scan(Dict *d, void **v, void *k, u64 h, int mode, Spec *spec) { if (mode == CREATE) { d->count += 1; if (create_replacing_tomb) d->ntombs -= 1; - d->data[create_idx] = h1; - Page *p = pages + (create_idx - create_idx%PAGE_LEN) * (spec->ksize + spec->vsize); - /* TODO: try returning a pointer to the key slot instead, so that the - * compiler can generate copy code when ksize is a constant */ - memcpy(p + (create_idx%PAGE_LEN) * spec->ksize, k, spec->ksize); - *v = p + PAGE_LEN * spec->ksize + (create_idx%PAGE_LEN) * spec->vsize; + meta[create_idx] = h1; + usize page_idx = create_idx % PAGE_LEN; + Page *p = pages + (create_idx - page_idx) * (spec->ksize + spec->vsize); + memcpy(p + page_idx * spec->ksize, k, spec->ksize); + *v = p + PAGE_LEN * spec->ksize + page_idx * spec->vsize; } return 0; } int -r_dict_init(Dict *d, usize hint, Spec *spec) { +r_dict_init(RDict *d, usize hint, RDictSpec *spec) { memset(d, 0, sizeof *d); /* Impose maximum on hint to prevent overflow and the following while @@ -329,23 +318,31 @@ alloc_error: return -1; } +void +r_dict_free(RDict *d, RDictSpec *spec) { + if (d->niter > 0) + r_warnf("dict freed with %"PRId32" iterators alive", d->niter); + spec->free(d->mem); + d->data = 0; /* Ensure hard error on use after free. */ +} + int -r_dict_access(Dict *d, void **v, void *k, u64 h, Spec *spec) { +r_dict_access(RDict *d, void **v, void *k, u64 h, RDictSpec *spec) { return scan(d, v, k, h, ACCESS, spec); } int -r_dict_create(Dict *d, void **v, void *k, u64 h, Spec *spec) { +r_dict_create(RDict *d, void **v, void *k, u64 h, RDictSpec *spec) { return scan(d, v, k, h, CREATE, spec); } int -r_dict_delete(Dict *d, void **v, void *k, u64 h, Spec *spec) { +r_dict_delete(RDict *d, void **v, void *k, u64 h, RDictSpec *spec) { return scan(d, v, k, h, DELETE, spec); } void -r_dict_clear(Dict *d, Spec *spec) { +r_dict_clear(RDict *d, RDictSpec *spec) { memset(d->data, EMPTY, USIZE_C(1) << d->b); d->count = 0; d->ntombs = 0;