commit 51071af719559df83b990c518e076a269dbae576
parent 5a3a0f524139f5819b5791e7bf750a1ba37691f3
Author: Robert Russell <robertrussell.72001@gmail.com>
Date: Thu, 8 Jun 2023 19:00:50 -0700
Cleanup dict
Diffstat:
| M | inc/dict.h | | | 73 | ++++++++++++++++++++++++++++++++++++++++++------------------------------- |
| M | src/dict.c | | | 143 | +++++++++++++++++++++++++++++++++++++++---------------------------------------- |
2 files changed, 112 insertions(+), 104 deletions(-)
diff --git a/inc/dict.h b/inc/dict.h
@@ -4,11 +4,11 @@
#include "debug.h"
#include "def.h"
-#include "log.h"
#include "rand.h"
#include "string.h"
typedef struct r_dict_spec RDictSpec;
+typedef struct r_dict RDict;
typedef bool (*RDictEqFunc)(void *a, void *b, RDictSpec *spec);
typedef u64 (*RDictHashFunc)(void *k, u64 seed, RDictSpec *spec);
typedef void *(*RDictAlloczFunc)(usize size);
@@ -23,7 +23,15 @@ struct r_dict_spec {
RDictFreeFunc free;
};
-struct r_dict; /* opaque */
+struct r_dict {
+ u8 b; /* Log2 of number of slots */
+ u32 niter;
+ u64 seed;
+ usize count;
+ usize ntombs;
+ void *mem;
+ u8 *data; /* Cache line-aligned pointer into mem */
+};
static inline bool r_dict_mem_eq(void *a, void *b, RDictSpec *spec) { return memcmp(a, b, spec->ksize) == 0; }
static inline u64 r_dict_mem_hash(void *k, u64 seed, RDictSpec *spec) { return r_hash(k, spec->ksize, seed); }
@@ -31,15 +39,16 @@ static inline u64 r_dict_mem_hash(void *k, u64 seed, RDictSpec *spec) { return r
static inline bool r_dict_cstr_eq(char **a, char **b, RDictSpec *spec) { return strcmp(*a, *b) == 0; }
static inline u64 r_dict_cstr_hash(char **k, u64 seed, RDictSpec *spec) { return r_hash(*k, strlen(*k), seed); }
-static inline bool r_dict_str_eq(Str *a, Str *b, RDictSpec *spec) { return str_eq(*a, *b); }
-static inline u64 r_dict_str_hash(Str *k, u64 seed, RDictSpec *spec) { return r_hash(str_bytes(*k), str_len(*k), seed); }
+static inline bool r_dict_rstr_eq(Str *a, Str *b, RDictSpec *spec) { return str_eq(*a, *b); }
+static inline u64 r_dict_rstr_hash(Str *k, u64 seed, RDictSpec *spec) { return r_hash(str_bytes(*k), str_len(*k), seed); }
/* TODO: add functions for shrinking dict and reserving space? */
-int r_dict_init(struct r_dict *d, usize hint, RDictSpec *spec);
-int r_dict_access(struct r_dict *d, void **v, void *k, u64 h, RDictSpec *spec);
-int r_dict_create(struct r_dict *d, void **v, void *k, u64 h, RDictSpec *spec);
-int r_dict_delete(struct r_dict *d, void **v, void *k, u64 h, RDictSpec *spec);
-void r_dict_clear(struct r_dict *d, RDictSpec *spec);
+int r_dict_init(RDict *d, usize hint, RDictSpec *spec);
+void r_dict_free(RDict *d, RDictSpec *spec);
+int r_dict_access(RDict *d, void **v, void *k, u64 h, RDictSpec *spec);
+int r_dict_create(RDict *d, void **v, void *k, u64 h, RDictSpec *spec);
+int r_dict_delete(RDict *d, void **v, void *k, u64 h, RDictSpec *spec);
+void r_dict_clear(RDict *d, RDictSpec *spec);
/* Defaults */
#define R_DICT_STATIC
@@ -51,13 +60,7 @@ void r_dict_clear(struct r_dict *d, RDictSpec *spec);
#define R_DICT_TYPEDEF(D, K, V)\
typedef struct D { \
- u8 b; \
- u32 niter; \
- u64 seed; \
- usize count; \
- usize ntombs; \
- void *mem; \
- u8 *data; \
+ RDict d; \
V default_val; \
} D;
@@ -72,37 +75,44 @@ typedef struct D { \
/* TODO: function for assigning without allocating */
#define R_DICT_DECLARE(D, K, V, ...)\
-static inline UNUSED usize R_DICT_METHOD(len,##__VA_ARGS__)(D *d) { return d->count; } \
+static inline UNUSED usize R_DICT_METHOD(len,##__VA_ARGS__)(D *d) { return d->d.count; } \
static inline UNUSED void R_DICT_METHOD(set_default,##__VA_ARGS__)(D *d, V v) { d->default_val = v; } \
static inline UNUSED V R_DICT_METHOD(get_default,##__VA_ARGS__)(D *d) { return d->default_val; } \
static inline UNUSED int R_DICT_METHOD(init,##__VA_ARGS__)(D *d, usize hint) { \
memset(&d->default_val, 0, sizeof(V)); \
- return r_dict_init((void *)d, hint, R_DICT_SPEC_(K,V)); \
+ return r_dict_init(&d->d, hint, R_DICT_SPEC_(K,V)); \
} \
-static inline UNUSED void R_DICT_METHOD(free,##__VA_ARGS__)(D *d) { \
- if (d->niter > 0) \
- r_warnf("dict freed with %"PRId32" iterators alive", d->niter); \
- R_DICT_FREE(d->mem); \
- d->data = 0; /* Ensure hard error on use after free. */ \
+static inline UNUSED void R_DICT_METHOD(free,##__VA_ARGS__)(D *d) { r_dict_free(&d->d, R_DICT_SPEC_(K,V)); } \
+static inline UNUSED V *R_DICT_METHOD(ptr,##__VA_ARGS__)(D *d, K k) { \
+ u64 h = R_DICT_HASH(&k, d->d.seed, R_DICT_SPEC_(K,V)); /* Allow hash to be inlined. */ \
+ void *vp; r_dict_access(&d->d, &vp, &k, h, R_DICT_SPEC_(K,V)); \
+ return vp; \
} \
static inline UNUSED bool R_DICT_METHOD(get,##__VA_ARGS__)(D *d, V *v, K k) { \
- u64 h = R_DICT_HASH(&k, d->seed, R_DICT_SPEC_(K,V)); /* Allow hash to be inlined. */ \
- void *vp; int r = r_dict_access((void *)d, &vp, &k, h, R_DICT_SPEC_(K,V)); \
+ u64 h = R_DICT_HASH(&k, d->d.seed, R_DICT_SPEC_(K,V)); /* Allow hash to be inlined. */ \
+ void *vp; int r = r_dict_access(&d->d, &vp, &k, h, R_DICT_SPEC_(K,V)); \
if (v) *v = vp ? *(V *)vp : d->default_val; \
return r > 0; \
} \
-static inline UNUSED int R_DICT_METHOD(set,##__VA_ARGS__)(D *d, K k, V v) { \
- u64 h = R_DICT_HASH(&k, d->seed, R_DICT_SPEC_(K,V)); /* Allow hash to be inlined. */ \
- void *vp; int r = r_dict_create((void *)d, &vp, &k, h, R_DICT_SPEC_(K,V)); \
+static inline UNUSED bool R_DICT_METHOD(set,##__VA_ARGS__)(D *d, K k, V v) { \
+ u64 h = R_DICT_HASH(&k, d->d.seed, R_DICT_SPEC_(K,V)); /* Allow hash to be inlined. */ \
+ void *vp; int r = r_dict_access(&d->d, &vp, &k, h, R_DICT_SPEC_(K,V)); \
+ if (vp) *(V *)vp = v; \
+ return r > 0; \
+} \
+static inline UNUSED int R_DICT_METHOD(put,##__VA_ARGS__)(D *d, K k, V v) { \
+ assert(d->d.niter == 0, "can not create new dict entry while iterating"); \
+ u64 h = R_DICT_HASH(&k, d->d.seed, R_DICT_SPEC_(K,V)); /* Allow hash to be inlined. */ \
+ void *vp; int r = r_dict_create(&d->d, &vp, &k, h, R_DICT_SPEC_(K,V)); \
if (vp) *(V *)vp = v; \
return r; \
} \
static inline UNUSED bool R_DICT_METHOD(del,##__VA_ARGS__)(D *d, V *v, K k) { \
- u64 h = R_DICT_HASH(&k, d->seed, R_DICT_SPEC_(K,V)); /* Allow hash to be inlined. */ \
- void *vp; int r = r_dict_delete((void *)d, &vp, &k, h, R_DICT_SPEC_(K,V)); \
+ u64 h = R_DICT_HASH(&k, d->d.seed, R_DICT_SPEC_(K,V)); /* Allow hash to be inlined. */ \
+ void *vp; int r = r_dict_delete(&d->d, &vp, &k, h, R_DICT_SPEC_(K,V)); \
if (v) *v = vp ? *(V *)vp : d->default_val; \
return r > 0; \
} \
static inline UNUSED void R_DICT_METHOD(clr,##__VA_ARGS__)(D *d) { r_dict_clear((void *)d, R_DICT_SPEC_(K,V)); }
-#define R_DICT_DEFINE(D, K, V, ...) /* No op. Eveything is static inline. */
+#define R_DICT_DEFINE(D, K, V, ...) /* No op. Everything is static inline. */
+\ No newline at end of file
diff --git a/src/dict.c b/src/dict.c
@@ -4,15 +4,13 @@
#include "bits.h"
#include "dict.h"
#include "internal/util.h"
+#include "log.h"
#include "rand.h"
#include "rcx.h"
-#if defined(__GNUC__) && defined(R_HAVE_AVX2)
-#include "simd.h"
-#define PROBE_LEN 32u
-#define PROBE_LEN_BITS 5u
-#elif defined(__GNUC__) && defined(R_HAVE_SSE2)
+#if defined(__GNUC__) && defined(R_HAVE_SSE2)
#include "simd.h"
+#define USE_SIMD 1
#define PROBE_LEN 16u
#define PROBE_LEN_BITS 4u
#else
@@ -21,15 +19,18 @@
#endif
/* PAGE_LEN should be a multiple of 8 for packing and alignment. */
+/* TODO: make these parameters programmer-controllable. PAGE_LEN == 8 is
+ * a conservative choice. In most cases (i.e., ksize a multiple of 8),
+ * PAGE_LEN == 1 causes no packing issues and would improve caching
+ * performance with very large keys or values. */
#define PAGE_LEN 8u
#define LF_NUM 3u
#define LF_DEN 4u
-#define EMPTY 0u
+
+#define EMPTY 0u /* Must be 0. */
#define TOMBSTONE 1u
#define MIN_HASH1 2u
-typedef struct r_dict_spec Spec;
-typedef struct r_dict Dict;
typedef u8 Page; /* For readability only; a page isn't really a u8. */
enum scan_mode {
@@ -38,22 +39,6 @@ enum scan_mode {
DELETE = 2,
};
-struct r_dict {
- u8 b; /* Log2 of number of slots */
- u32 niter;
- u64 seed;
- usize count;
- usize ntombs;
- void *mem;
- u8 *data; /* Cache line-aligned pointer into mem */
- /* V default_val; */
-};
-
-static inline usize
-hash0(u64 h, u8 b) {
- return h & ((U64_C(1) << (b - PROBE_LEN_BITS)) - 1u);
-}
-
static inline u8
hash1(u64 h, u8 b) {
u64 t = h >> 56;
@@ -68,7 +53,7 @@ exceeds_lf(u64 n, u8 b) {
}
static void
-insert(Page *pages, u8 b, void *k, void *v, u64 h, Spec *spec) {
+insert(Page *pages, u8 b, void *k, void *v, u64 h, RDictSpec *spec) {
assert(false);
#if 0
usize h0 = hash0(h, b);
@@ -99,7 +84,7 @@ insert(Page *pages, u8 b, void *k, void *v, u64 h, Spec *spec) {
}
static int
-grow(Dict *d, Spec *spec) {
+grow(RDict *d, RDictSpec *spec) {
assert(false);
#if 0
u8 oldb = d->b;
@@ -139,10 +124,7 @@ grow(Dict *d, Spec *spec) {
}
static inline int
-scan(Dict *d, void **v, void *k, u64 h, int mode, Spec *spec) {
- assert(mode != CREATE || d->niter == 0,
- "can not create new dict entry while iterating");
-
+scan(RDict *d, void **v, void *k, u64 h, int mode, RDictSpec *spec) {
*v = 0;
if (mode == CREATE && exceeds_lf(d->count + d->ntombs, d->b)) {
@@ -150,7 +132,12 @@ scan(Dict *d, void **v, void *k, u64 h, int mode, Spec *spec) {
return -1;
}
- usize h0 = hash0(h, d->b);
+ /* meta_mask is a mask for reducing modulo the table size. */
+ usize meta_mask = (USIZE_C(1) << (d->b - PROBE_LEN_BITS)) - 1u;
+ u8 *meta = d->data;
+ Page *pages = d->data + (USIZE_C(1) << d->b);
+
+ usize h0 = h & meta_mask;
u8 h1 = hash1(h, d->b);
#if PROBE_LEN == 32
@@ -163,59 +150,62 @@ scan(Dict *d, void **v, void *k, u64 h, int mode, Spec *spec) {
u64 vh1; memset(&vh1, h1, sizeof vh1);
#endif
- Page *pages = d->data + (USIZE_C(1) << d->b);
bool create_slot_found = false;
usize create_idx;
bool create_replacing_tomb;
- /* i is the index of a metadata vector (i.e., a u8[PROBE_LEN]).
- * We increment it quadratically. */
- usize meta_mask = (USIZE_C(1) << (d->b - PROBE_LEN_BITS)) - 1u;
- for (usize i = h0, jump = 1;; i = (i + jump) & meta_mask, jump++) {
- u8 *meta = d->data + i * PROBE_LEN;
+ /* mvi is the metadata vector index (where each metadata vector is a
+ * u8[PROBE_LEN]), and i is the corresponding index in the meta table.
+ * Note that mvi is increased quadratically. */
+ for (usize mvi = h0, jump = 1;; mvi = (mvi + jump) & meta_mask, jump++) {
+ usize i = mvi * PROBE_LEN;
/* We must load the metadata vector as little endian, so that low bits
* in the vector represent earlier slots in the table and hence ctz
* gives us earlier slots first. In the SIMD cases, we know we're on
* x86, so a memcpy suffices. In the general fallback case, we use
- * readl64, which does a byte swap on big endian machines. */
+ * readl64, which does a byte swap on big endian machines.
+ *
+ * In the SIMD cases, m is a bit array indicating where vmeta equals
+ * h1. Thus, ctz(m) is the least j such that meta[j] == h1 (provided
+ * m != 0).
+ *
+ * In the portable case, z initially has 0s exactly where vmeta has h1.
+ * Thus, we can use r8search64 on z to find the indices j of h1 in
+ * vmeta. After each iteration, we mask in an arbitrary bit into the
+ * jth byte so that the next iteration gets a different j. */
#if PROBE_LEN == 32
- v32u8 vmeta; memcpy(&vmeta, meta, sizeof vmeta);
- /* m is a bit array indicating where vmeta equals h1. Thus, ctz(m)
- * is the least j such that meta[j] == h1 (provided m != 0). */
+ v32u8 vmeta; memcpy(&vmeta, meta+i, sizeof vmeta);
for (u32 m = v32u8_msb(v32u8_cmpeq(vmeta, vh1)); m != 0; m &= m - 1) {
int j = r_ctz32(m);
#elif PROBE_LEN == 16
- v16u8 vmeta; memcpy(&vmeta, meta, sizeof vmeta);
+ v16u8 vmeta; memcpy(&vmeta, meta+i, sizeof vmeta);
for (u16 m = v16u8_msb(v16u8_cmpeq(vmeta, vh1)); m != 0; m &= m - 1) {
int j = r_ctz16(m);
#else
- u64 vmeta = r_readl64(meta);
- u64 z = vh1 ^ vmeta;
- /* z now has 0s exactly where vmeta had h1. Thus, we can use r8search64
- * on z to find the indices j of h1 in vmeta. After each iteration, we
- * mask in an arbitrary bit into the jth byte so that the next
- * iteration gets a different j. */
- for (int j = r_r8search64(z); j < 8; z |= U64_C(1)<<(8*j), j = r_r8search64(z)) {
+ u64 vmeta = r_readl64(meta+i);
+ u64 z = vmeta ^ vh1;
+ for (int j = r_r8search64(z); j != 8; z |= U64_C(1)<<(8*j), j = r_r8search64(z)) {
#endif
- usize e = i * PROBE_LEN + j;
- Page *p = pages + (e - e%PAGE_LEN) * (spec->ksize + spec->vsize);
- if likely (spec->eq(p + (e%PAGE_LEN) * spec->ksize, k, spec)) {
+ usize match_idx = i + j;
+ usize page_idx = match_idx % PAGE_LEN;
+ Page *p = pages + (match_idx - page_idx) * (spec->ksize + spec->vsize);
+ if likely (spec->eq(p + page_idx * spec->ksize, k, spec)) {
if (mode == DELETE) {
#if PROBE_LEN == 32
if (v32u8_msb(v32u8_cmpeq(vmeta, vempty)) != 0) {
#elif PROBE_LEN == 16
if (v16u8_msb(v16u8_cmpeq(vmeta, vempty)) != 0) {
#else
- if (r_r8search64(vmeta) < 8) {
+ if (r_r8search64(vmeta) != 8) {
#endif
- meta[j] = EMPTY;
+ meta[match_idx] = EMPTY;
} else {
- meta[j] = TOMBSTONE;
+ meta[match_idx] = TOMBSTONE;
d->ntombs += 1;
}
d->count -= 1;
}
- *v = p + PAGE_LEN * spec->ksize + (e%PAGE_LEN) * spec->vsize;
+ *v = p + PAGE_LEN * spec->ksize + page_idx * spec->vsize;
return 1;
}
}
@@ -238,11 +228,11 @@ scan(Dict *d, void **v, void *k, u64 h, int mode, Spec *spec) {
u32 m1 = v32u8_msb(v32u8_cmpeq(vmeta, vtomb));
if (m1 != 0) {
create_slot_found = true;
- create_idx = i * PROBE_LEN + r_ctz32(m1);
+ create_idx = i + r_ctz32(m1);
create_replacing_tomb = true;
} else if (m0 != 0) {
create_slot_found = true;
- create_idx = i * PROBE_LEN + r_ctz32(m0);
+ create_idx = i + r_ctz32(m0);
create_replacing_tomb = false;
break;
}
@@ -251,11 +241,11 @@ scan(Dict *d, void **v, void *k, u64 h, int mode, Spec *spec) {
u16 m1 = v16u8_msb(v16u8_cmpeq(vmeta, vtomb));
if (m1 != 0) {
create_slot_found = true;
- create_idx = i * PROBE_LEN + r_ctz16(m1);
+ create_idx = i + r_ctz16(m1);
create_replacing_tomb = true;
} else if (m0 != 0) {
create_slot_found = true;
- create_idx = i * PROBE_LEN + r_ctz16(m0);
+ create_idx = i + r_ctz16(m0);
create_replacing_tomb = false;
break;
}
@@ -264,11 +254,11 @@ scan(Dict *d, void **v, void *k, u64 h, int mode, Spec *spec) {
int j1 = r_r8search64(vmeta ^ vtomb);
if (j1 != 8) {
create_slot_found = true;
- create_idx = j1;
+ create_idx = i + j1;
create_replacing_tomb = true;
} else if (j0 != 8) {
create_slot_found = true;
- create_idx = j0;
+ create_idx = i + j0;
create_replacing_tomb = false;
break;
}
@@ -286,19 +276,18 @@ scan(Dict *d, void **v, void *k, u64 h, int mode, Spec *spec) {
if (mode == CREATE) {
d->count += 1;
if (create_replacing_tomb) d->ntombs -= 1;
- d->data[create_idx] = h1;
- Page *p = pages + (create_idx - create_idx%PAGE_LEN) * (spec->ksize + spec->vsize);
- /* TODO: try returning a pointer to the key slot instead, so that the
- * compiler can generate copy code when ksize is a constant */
- memcpy(p + (create_idx%PAGE_LEN) * spec->ksize, k, spec->ksize);
- *v = p + PAGE_LEN * spec->ksize + (create_idx%PAGE_LEN) * spec->vsize;
+ meta[create_idx] = h1;
+ usize page_idx = create_idx % PAGE_LEN;
+ Page *p = pages + (create_idx - page_idx) * (spec->ksize + spec->vsize);
+ memcpy(p + page_idx * spec->ksize, k, spec->ksize);
+ *v = p + PAGE_LEN * spec->ksize + page_idx * spec->vsize;
}
return 0;
}
int
-r_dict_init(Dict *d, usize hint, Spec *spec) {
+r_dict_init(RDict *d, usize hint, RDictSpec *spec) {
memset(d, 0, sizeof *d);
/* Impose maximum on hint to prevent overflow and the following while
@@ -329,23 +318,31 @@ alloc_error:
return -1;
}
+void
+r_dict_free(RDict *d, RDictSpec *spec) {
+ if (d->niter > 0)
+ r_warnf("dict freed with %"PRId32" iterators alive", d->niter);
+ spec->free(d->mem);
+ d->data = 0; /* Ensure hard error on use after free. */
+}
+
int
-r_dict_access(Dict *d, void **v, void *k, u64 h, Spec *spec) {
+r_dict_access(RDict *d, void **v, void *k, u64 h, RDictSpec *spec) {
return scan(d, v, k, h, ACCESS, spec);
}
int
-r_dict_create(Dict *d, void **v, void *k, u64 h, Spec *spec) {
+r_dict_create(RDict *d, void **v, void *k, u64 h, RDictSpec *spec) {
return scan(d, v, k, h, CREATE, spec);
}
int
-r_dict_delete(Dict *d, void **v, void *k, u64 h, Spec *spec) {
+r_dict_delete(RDict *d, void **v, void *k, u64 h, RDictSpec *spec) {
return scan(d, v, k, h, DELETE, spec);
}
void
-r_dict_clear(Dict *d, Spec *spec) {
+r_dict_clear(RDict *d, RDictSpec *spec) {
memset(d->data, EMPTY, USIZE_C(1) << d->b);
d->count = 0;
d->ntombs = 0;