Cleanup dict - rcx - library of miscellaneous bits of C code

commit 51071af719559df83b990c518e076a269dbae576
parent 5a3a0f524139f5819b5791e7bf750a1ba37691f3
Author: Robert Russell <robertrussell.72001@gmail.com>
Date:   Thu,  8 Jun 2023 19:00:50 -0700

Cleanup dict

Diffstat:
M inc/dict.h  | 73 ++++++++++++++++++++++++++++++++++++++++++-------------------------------
M src/dict.c  | 143 +++++++++++++++++++++++++++++++++++++++----------------------------------------

2 files changed, 112 insertions(+), 104 deletions(-)
diff --git a/inc/dict.h b/inc/dict.h
@@ -4,11 +4,11 @@
 
 #include "debug.h"
 #include "def.h"
-#include "log.h"
 #include "rand.h"
 #include "string.h"
 
 typedef struct r_dict_spec RDictSpec;
+typedef struct r_dict RDict;
 typedef bool (*RDictEqFunc)(void *a, void *b, RDictSpec *spec);
 typedef u64 (*RDictHashFunc)(void *k, u64 seed, RDictSpec *spec);
 typedef void *(*RDictAlloczFunc)(usize size);
@@ -23,7 +23,15 @@ struct r_dict_spec {
 	RDictFreeFunc free;
 };
 
-struct r_dict; /* opaque */
+struct r_dict {
+	u8 b; /* Log2 of number of slots */
+	u32 niter;
+	u64 seed;
+	usize count;
+	usize ntombs;
+	void *mem;
+	u8 *data; /* Cache line-aligned pointer into mem */
+};
 
 static inline bool r_dict_mem_eq(void *a, void *b, RDictSpec *spec) { return memcmp(a, b, spec->ksize) == 0; }
 static inline u64 r_dict_mem_hash(void *k, u64 seed, RDictSpec *spec) { return r_hash(k, spec->ksize, seed); }
@@ -31,15 +39,16 @@ static inline u64 r_dict_mem_hash(void *k, u64 seed, RDictSpec *spec) { return r
 static inline bool r_dict_cstr_eq(char **a, char **b, RDictSpec *spec) { return strcmp(*a, *b) == 0; }
 static inline u64 r_dict_cstr_hash(char **k, u64 seed, RDictSpec *spec) { return r_hash(*k, strlen(*k), seed); }
 
-static inline bool r_dict_str_eq(Str *a, Str *b, RDictSpec *spec) { return str_eq(*a, *b); }
-static inline u64 r_dict_str_hash(Str *k, u64 seed, RDictSpec *spec) { return r_hash(str_bytes(*k), str_len(*k), seed); }
+static inline bool r_dict_rstr_eq(Str *a, Str *b, RDictSpec *spec) { return str_eq(*a, *b); }
+static inline u64 r_dict_rstr_hash(Str *k, u64 seed, RDictSpec *spec) { return r_hash(str_bytes(*k), str_len(*k), seed); }
 
 /* TODO: add functions for shrinking dict and reserving space? */
-int r_dict_init(struct r_dict *d, usize hint, RDictSpec *spec);
-int r_dict_access(struct r_dict *d, void **v, void *k, u64 h, RDictSpec *spec);
-int r_dict_create(struct r_dict *d, void **v, void *k, u64 h, RDictSpec *spec);
-int r_dict_delete(struct r_dict *d, void **v, void *k, u64 h, RDictSpec *spec);
-void r_dict_clear(struct r_dict *d, RDictSpec *spec);
+int r_dict_init(RDict *d, usize hint, RDictSpec *spec);
+void r_dict_free(RDict *d, RDictSpec *spec);
+int r_dict_access(RDict *d, void **v, void *k, u64 h, RDictSpec *spec);
+int r_dict_create(RDict *d, void **v, void *k, u64 h, RDictSpec *spec);
+int r_dict_delete(RDict *d, void **v, void *k, u64 h, RDictSpec *spec);
+void r_dict_clear(RDict *d, RDictSpec *spec);
 
 /* Defaults */
 #define R_DICT_STATIC
@@ -51,13 +60,7 @@ void r_dict_clear(struct r_dict *d, RDictSpec *spec);
 
 #define R_DICT_TYPEDEF(D, K, V)\
 typedef struct D { \
-	u8 b; \
-	u32 niter; \
-	u64 seed; \
-	usize count; \
-	usize ntombs; \
-	void *mem; \
-	u8 *data; \
+	RDict d; \
 	V default_val; \
 } D;
 
@@ -72,37 +75,44 @@ typedef struct D { \
 
 /* TODO: function for assigning without allocating */
 #define R_DICT_DECLARE(D, K, V, ...)\
-static inline UNUSED usize R_DICT_METHOD(len,##__VA_ARGS__)(D *d) { return d->count; } \
+static inline UNUSED usize R_DICT_METHOD(len,##__VA_ARGS__)(D *d) { return d->d.count; } \
 static inline UNUSED void R_DICT_METHOD(set_default,##__VA_ARGS__)(D *d, V v) { d->default_val = v; } \
 static inline UNUSED V R_DICT_METHOD(get_default,##__VA_ARGS__)(D *d) { return d->default_val; } \
 static inline UNUSED int R_DICT_METHOD(init,##__VA_ARGS__)(D *d, usize hint) { \
 	memset(&d->default_val, 0, sizeof(V)); \
-	return r_dict_init((void *)d, hint, R_DICT_SPEC_(K,V)); \
+	return r_dict_init(&d->d, hint, R_DICT_SPEC_(K,V)); \
 } \
-static inline UNUSED void R_DICT_METHOD(free,##__VA_ARGS__)(D *d) { \
-	if (d->niter > 0) \
-		r_warnf("dict freed with %"PRId32" iterators alive", d->niter); \
-	R_DICT_FREE(d->mem); \
-	d->data = 0; /* Ensure hard error on use after free. */ \
+static inline UNUSED void R_DICT_METHOD(free,##__VA_ARGS__)(D *d) { r_dict_free(&d->d, R_DICT_SPEC_(K,V)); } \
+static inline UNUSED V *R_DICT_METHOD(ptr,##__VA_ARGS__)(D *d, K k) { \
+	u64 h = R_DICT_HASH(&k, d->d.seed, R_DICT_SPEC_(K,V)); /* Allow hash to be inlined. */ \
+	void *vp; r_dict_access(&d->d, &vp, &k, h, R_DICT_SPEC_(K,V)); \
+	return vp; \
 } \
 static inline UNUSED bool R_DICT_METHOD(get,##__VA_ARGS__)(D *d, V *v, K k) { \
-	u64 h = R_DICT_HASH(&k, d->seed, R_DICT_SPEC_(K,V)); /* Allow hash to be inlined. */ \
-	void *vp; int r = r_dict_access((void *)d, &vp, &k, h, R_DICT_SPEC_(K,V)); \
+	u64 h = R_DICT_HASH(&k, d->d.seed, R_DICT_SPEC_(K,V)); /* Allow hash to be inlined. */ \
+	void *vp; int r = r_dict_access(&d->d, &vp, &k, h, R_DICT_SPEC_(K,V)); \
 	if (v) *v = vp ? *(V *)vp : d->default_val; \
 	return r > 0; \
 } \
-static inline UNUSED int R_DICT_METHOD(set,##__VA_ARGS__)(D *d, K k, V v) { \
-	u64 h = R_DICT_HASH(&k, d->seed, R_DICT_SPEC_(K,V)); /* Allow hash to be inlined. */ \
-	void *vp; int r = r_dict_create((void *)d, &vp, &k, h, R_DICT_SPEC_(K,V)); \
+static inline UNUSED bool R_DICT_METHOD(set,##__VA_ARGS__)(D *d, K k, V v) { \
+	u64 h = R_DICT_HASH(&k, d->d.seed, R_DICT_SPEC_(K,V)); /* Allow hash to be inlined. */ \
+	void *vp; int r = r_dict_access(&d->d, &vp, &k, h, R_DICT_SPEC_(K,V)); \
+	if (vp) *(V *)vp = v; \
+	return r > 0; \
+} \
+static inline UNUSED int R_DICT_METHOD(put,##__VA_ARGS__)(D *d, K k, V v) { \
+	assert(d->d.niter == 0, "can not create new dict entry while iterating"); \
+	u64 h = R_DICT_HASH(&k, d->d.seed, R_DICT_SPEC_(K,V)); /* Allow hash to be inlined. */ \
+	void *vp; int r = r_dict_create(&d->d, &vp, &k, h, R_DICT_SPEC_(K,V)); \
 	if (vp) *(V *)vp = v; \
 	return r; \
 } \
 static inline UNUSED bool R_DICT_METHOD(del,##__VA_ARGS__)(D *d, V *v, K k) { \
-	u64 h = R_DICT_HASH(&k, d->seed, R_DICT_SPEC_(K,V)); /* Allow hash to be inlined. */ \
-	void *vp; int r = r_dict_delete((void *)d, &vp, &k, h, R_DICT_SPEC_(K,V)); \
+	u64 h = R_DICT_HASH(&k, d->d.seed, R_DICT_SPEC_(K,V)); /* Allow hash to be inlined. */ \
+	void *vp; int r = r_dict_delete(&d->d, &vp, &k, h, R_DICT_SPEC_(K,V)); \
 	if (v) *v = vp ? *(V *)vp : d->default_val; \
 	return r > 0; \
 } \
 static inline UNUSED void R_DICT_METHOD(clr,##__VA_ARGS__)(D *d) { r_dict_clear((void *)d, R_DICT_SPEC_(K,V)); }
 
-#define R_DICT_DEFINE(D, K, V, ...) /* No op. Eveything is static inline. */
+#define R_DICT_DEFINE(D, K, V, ...) /* No op. Everything is static inline. */
+\ No newline at end of file
diff --git a/src/dict.c b/src/dict.c
@@ -4,15 +4,13 @@
 #include "bits.h"
 #include "dict.h"
 #include "internal/util.h"
+#include "log.h"
 #include "rand.h"
 #include "rcx.h"
 
-#if defined(__GNUC__) && defined(R_HAVE_AVX2)
-#include "simd.h"
-#define PROBE_LEN 32u
-#define PROBE_LEN_BITS 5u
-#elif defined(__GNUC__) && defined(R_HAVE_SSE2)
+#if defined(__GNUC__) && defined(R_HAVE_SSE2)
 #include "simd.h"
+#define USE_SIMD 1
 #define PROBE_LEN 16u
 #define PROBE_LEN_BITS 4u
 #else
@@ -21,15 +19,18 @@
 #endif
 
 /* PAGE_LEN should be a multiple of 8 for packing and alignment. */
+/* TODO: make these parameters programmer-controllable. PAGE_LEN == 8 is
+ * a conservative choice. In most cases (i.e., ksize a multiple of 8),
+ * PAGE_LEN == 1 causes no packing issues and would improve caching
+ * performance with very large keys or values. */
 #define PAGE_LEN 8u
 #define LF_NUM 3u
 #define LF_DEN 4u
-#define EMPTY 0u
+
+#define EMPTY 0u /* Must be 0. */
 #define TOMBSTONE 1u
 #define MIN_HASH1 2u
 
-typedef struct r_dict_spec Spec;
-typedef struct r_dict Dict;
 typedef u8 Page; /* For readability only; a page isn't really a u8. */
 
 enum scan_mode {
@@ -38,22 +39,6 @@ enum scan_mode {
 	DELETE = 2,
 };
 
-struct r_dict {
-	u8 b; /* Log2 of number of slots */
-	u32 niter;
-	u64 seed;
-	usize count;
-	usize ntombs;
-	void *mem;
-	u8 *data; /* Cache line-aligned pointer into mem */
-	/* V default_val; */
-};
-
-static inline usize
-hash0(u64 h, u8 b) {
-	return h & ((U64_C(1) << (b - PROBE_LEN_BITS)) - 1u);
-}
-
 static inline u8
 hash1(u64 h, u8 b) {
 	u64 t = h >> 56;
@@ -68,7 +53,7 @@ exceeds_lf(u64 n, u8 b) {
 }
 
 static void
-insert(Page *pages, u8 b, void *k, void *v, u64 h, Spec *spec) {
+insert(Page *pages, u8 b, void *k, void *v, u64 h, RDictSpec *spec) {
 	assert(false);
 #if 0
 	usize h0 = hash0(h, b);
@@ -99,7 +84,7 @@ insert(Page *pages, u8 b, void *k, void *v, u64 h, Spec *spec) {
 }
 
 static int
-grow(Dict *d, Spec *spec) {
+grow(RDict *d, RDictSpec *spec) {
 	assert(false);
 #if 0
 	u8 oldb = d->b;
@@ -139,10 +124,7 @@ grow(Dict *d, Spec *spec) {
 }
 
 static inline int
-scan(Dict *d, void **v, void *k, u64 h, int mode, Spec *spec) {
-	assert(mode != CREATE || d->niter == 0,
-		"can not create new dict entry while iterating");
-
+scan(RDict *d, void **v, void *k, u64 h, int mode, RDictSpec *spec) {
 	*v = 0;
 
 	if (mode == CREATE && exceeds_lf(d->count + d->ntombs, d->b)) {
@@ -150,7 +132,12 @@ scan(Dict *d, void **v, void *k, u64 h, int mode, Spec *spec) {
 			return -1;
 	}
 
-	usize h0 = hash0(h, d->b);
+	/* meta_mask is a mask for reducing modulo the table size. */
+	usize meta_mask = (USIZE_C(1) << (d->b - PROBE_LEN_BITS)) - 1u;
+	u8 *meta = d->data;
+	Page *pages = d->data + (USIZE_C(1) << d->b);
+
+	usize h0 = h & meta_mask;
 	u8 h1 = hash1(h, d->b);
 
 #if PROBE_LEN == 32
@@ -163,59 +150,62 @@ scan(Dict *d, void **v, void *k, u64 h, int mode, Spec *spec) {
 	u64 vh1; memset(&vh1, h1, sizeof vh1);
 #endif
 
-	Page *pages = d->data + (USIZE_C(1) << d->b);
 	bool create_slot_found = false;
 	usize create_idx;
 	bool create_replacing_tomb;
-	/* i is the index of a metadata vector (i.e., a u8[PROBE_LEN]).
-	 * We increment it quadratically. */
-	usize meta_mask = (USIZE_C(1) << (d->b - PROBE_LEN_BITS)) - 1u;
-	for (usize i = h0, jump = 1;; i = (i + jump) & meta_mask, jump++) {
-		u8 *meta = d->data + i * PROBE_LEN;
+	/* mvi is the metadata vector index (where each metadata vector is a
+	 * u8[PROBE_LEN]), and i is the corresponding index in the meta table.
+	 * Note that mvi is increased quadratically. */
+	for (usize mvi = h0, jump = 1;; mvi = (mvi + jump) & meta_mask, jump++) {
+		usize i = mvi * PROBE_LEN;
 
 		/* We must load the metadata vector as little endian, so that low bits
 		 * in the vector represent earlier slots in the table and hence ctz
 		 * gives us earlier slots first. In the SIMD cases, we know we're on
 		 * x86, so a memcpy suffices. In the general fallback case, we use
-		 * readl64, which does a byte swap on big endian machines. */
+		 * readl64, which does a byte swap on big endian machines.
+		 *
+		 * In the SIMD cases, m is a bit array indicating where vmeta equals
+		 * h1. Thus, ctz(m) is the least j such that meta[j] == h1 (provided
+		 * m != 0).
+		 * 
+		 * In the portable case, z initially has 0s exactly where vmeta has h1.
+		 * Thus, we can use r8search64 on z to find the indices j of h1 in
+		 * vmeta. After each iteration, we mask in an arbitrary bit into the
+		 * jth byte so that the next iteration gets a different j. */
 #if PROBE_LEN == 32
-		v32u8 vmeta; memcpy(&vmeta, meta, sizeof vmeta);
-		/* m is a bit array indicating where vmeta equals h1. Thus, ctz(m)
-		 * is the least j such that meta[j] == h1 (provided m != 0). */
+		v32u8 vmeta; memcpy(&vmeta, meta+i, sizeof vmeta);
 		for (u32 m = v32u8_msb(v32u8_cmpeq(vmeta, vh1)); m != 0; m &= m - 1) {
 			int j = r_ctz32(m);
 #elif PROBE_LEN == 16
-		v16u8 vmeta; memcpy(&vmeta, meta, sizeof vmeta);
+		v16u8 vmeta; memcpy(&vmeta, meta+i, sizeof vmeta);
 		for (u16 m = v16u8_msb(v16u8_cmpeq(vmeta, vh1)); m != 0; m &= m - 1) {
 			int j = r_ctz16(m);
 #else
-		u64 vmeta = r_readl64(meta);
-		u64 z = vh1 ^ vmeta;
-		/* z now has 0s exactly where vmeta had h1. Thus, we can use r8search64
-		 * on z to find the indices j of h1 in vmeta. After each iteration, we
-		 * mask in an arbitrary bit into the jth byte so that the next
-		 * iteration gets a different j. */
-		for (int j = r_r8search64(z); j < 8; z |= U64_C(1)<<(8*j), j = r_r8search64(z)) {
+		u64 vmeta = r_readl64(meta+i);
+		u64 z = vmeta ^ vh1;
+		for (int j = r_r8search64(z); j != 8; z |= U64_C(1)<<(8*j), j = r_r8search64(z)) {
 #endif
-			usize e = i * PROBE_LEN + j;
-			Page *p = pages + (e - e%PAGE_LEN) * (spec->ksize + spec->vsize);
-			if likely (spec->eq(p + (e%PAGE_LEN) * spec->ksize, k, spec)) {
+			usize match_idx = i + j;
+			usize page_idx = match_idx % PAGE_LEN;
+			Page *p = pages + (match_idx - page_idx) * (spec->ksize + spec->vsize);
+			if likely (spec->eq(p + page_idx * spec->ksize, k, spec)) {
 				if (mode == DELETE) {
 #if PROBE_LEN == 32
 					if (v32u8_msb(v32u8_cmpeq(vmeta, vempty)) != 0) {
 #elif PROBE_LEN == 16
 					if (v16u8_msb(v16u8_cmpeq(vmeta, vempty)) != 0) {
 #else
-					if (r_r8search64(vmeta) < 8) {
+					if (r_r8search64(vmeta) != 8) {
 #endif
-						meta[j] = EMPTY;
+						meta[match_idx] = EMPTY;
 					} else {
-						meta[j] = TOMBSTONE;
+						meta[match_idx] = TOMBSTONE;
 						d->ntombs += 1;
 					}
 					d->count -= 1;
 				}
-				*v = p + PAGE_LEN * spec->ksize + (e%PAGE_LEN) * spec->vsize;
+				*v = p + PAGE_LEN * spec->ksize + page_idx * spec->vsize;
 				return 1;
 			}
 		}
@@ -238,11 +228,11 @@ scan(Dict *d, void **v, void *k, u64 h, int mode, Spec *spec) {
 			u32 m1 = v32u8_msb(v32u8_cmpeq(vmeta, vtomb));
 			if (m1 != 0) {
 				create_slot_found = true;
-				create_idx = i * PROBE_LEN + r_ctz32(m1);
+				create_idx = i + r_ctz32(m1);
 				create_replacing_tomb = true;
 			} else if (m0 != 0) {
 				create_slot_found = true;
-				create_idx = i * PROBE_LEN + r_ctz32(m0);
+				create_idx = i + r_ctz32(m0);
 				create_replacing_tomb = false;
 				break;
 			}
@@ -251,11 +241,11 @@ scan(Dict *d, void **v, void *k, u64 h, int mode, Spec *spec) {
 			u16 m1 = v16u8_msb(v16u8_cmpeq(vmeta, vtomb));
 			if (m1 != 0) {
 				create_slot_found = true;
-				create_idx = i * PROBE_LEN + r_ctz16(m1);
+				create_idx = i + r_ctz16(m1);
 				create_replacing_tomb = true;
 			} else if (m0 != 0) {
 				create_slot_found = true;
-				create_idx = i * PROBE_LEN + r_ctz16(m0);
+				create_idx = i + r_ctz16(m0);
 				create_replacing_tomb = false;
 				break;
 			}
@@ -264,11 +254,11 @@ scan(Dict *d, void **v, void *k, u64 h, int mode, Spec *spec) {
 			int j1 = r_r8search64(vmeta ^ vtomb);
 			if (j1 != 8) {
 				create_slot_found = true;
-				create_idx = j1;
+				create_idx = i + j1;
 				create_replacing_tomb = true;
 			} else if (j0 != 8) {
 				create_slot_found = true;
-				create_idx = j0;
+				create_idx = i + j0;
 				create_replacing_tomb = false;
 				break;
 			}
@@ -286,19 +276,18 @@ scan(Dict *d, void **v, void *k, u64 h, int mode, Spec *spec) {
 	if (mode == CREATE) {
 		d->count += 1;
 		if (create_replacing_tomb) d->ntombs -= 1;
-		d->data[create_idx] = h1;
-		Page *p = pages + (create_idx - create_idx%PAGE_LEN) * (spec->ksize + spec->vsize);
-		/* TODO: try returning a pointer to the key slot instead, so that the
-		 * compiler can generate copy code when ksize is a constant */
-		memcpy(p + (create_idx%PAGE_LEN) * spec->ksize, k, spec->ksize);
-		*v = p + PAGE_LEN * spec->ksize + (create_idx%PAGE_LEN) * spec->vsize;
+		meta[create_idx] = h1;
+		usize page_idx = create_idx % PAGE_LEN;
+		Page *p = pages + (create_idx - page_idx) * (spec->ksize + spec->vsize);
+		memcpy(p + page_idx * spec->ksize, k, spec->ksize);
+		*v = p + PAGE_LEN * spec->ksize + page_idx * spec->vsize;
 	}
 
 	return 0;
 }
 
 int
-r_dict_init(Dict *d, usize hint, Spec *spec) {
+r_dict_init(RDict *d, usize hint, RDictSpec *spec) {
 	memset(d, 0, sizeof *d);
 
 	/* Impose maximum on hint to prevent overflow and the following while
@@ -329,23 +318,31 @@ alloc_error:
 	return -1;
 }
 
+void
+r_dict_free(RDict *d, RDictSpec *spec) {
+	if (d->niter > 0)
+		r_warnf("dict freed with %"PRId32" iterators alive", d->niter);
+	spec->free(d->mem);
+	d->data = 0; /* Ensure hard error on use after free. */
+}
+
 int
-r_dict_access(Dict *d, void **v, void *k, u64 h, Spec *spec) {
+r_dict_access(RDict *d, void **v, void *k, u64 h, RDictSpec *spec) {
 	return scan(d, v, k, h, ACCESS, spec);
 }
 
 int
-r_dict_create(Dict *d, void **v, void *k, u64 h, Spec *spec) {
+r_dict_create(RDict *d, void **v, void *k, u64 h, RDictSpec *spec) {
 	return scan(d, v, k, h, CREATE, spec);
 }
 
 int
-r_dict_delete(Dict *d, void **v, void *k, u64 h, Spec *spec) {
+r_dict_delete(RDict *d, void **v, void *k, u64 h, RDictSpec *spec) {
 	return scan(d, v, k, h, DELETE, spec);
 }
 
 void
-r_dict_clear(Dict *d, Spec *spec) {
+r_dict_clear(RDict *d, RDictSpec *spec) {
 	memset(d->data, EMPTY, USIZE_C(1) << d->b);
 	d->count = 0;
 	d->ntombs = 0;

	rcx library of miscellaneous bits of C code
	git clone git://git.rr3.xyz/rcx
	Log \| Files \| Refs \| README \| LICENSE

M	inc/dict.h	\|	73	++++++++++++++++++++++++++++++++++++++++++-------------------------------
M	src/dict.c	\|	143	+++++++++++++++++++++++++++++++++++++++----------------------------------------