commit 335a2967fe5e27d1fe92d75dbcb4828c8467e61d
parent 6e3d696e92f9c04a13224de6429e98c5b6d3b05b
Author: Robert Russell <robertrussell.72001@gmail.com>
Date: Sun, 21 May 2023 18:31:54 -0700
Implement wyhash and wyrand
Diffstat:
| M | Makefile | | | 2 | ++ |
| A | inc/hash.h | | | 26 | ++++++++++++++++++++++++++ |
| A | src/hash.c | | | 85 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
3 files changed, 113 insertions(+), 0 deletions(-)
diff --git a/Makefile b/Makefile
@@ -7,6 +7,7 @@ SRC =\
src/bench.c\
src/buffer.c\
src/debug.c\
+ src/hash.c\
src/log.c\
src/str.c\
src/string.c\
@@ -26,6 +27,7 @@ src/alloc.o: src/alloc.c inc/alloc.h inc/def.h inc/log.h inc/rcx.h inc/internal/
src/bench.o: src/bench.c inc/bench.h inc/def.h inc/log.h inc/rcx.h config.mk
src/buffer.o: src/buffer.c inc/alloc.h inc/buffer.h inc/debug.h inc/def.h inc/log.h inc/rcx.h inc/string.h config.mk
src/debug.o: src/debug.c inc/debug.h inc/def.h inc/rcx.h config.mk
+src/hash.o: src/hash.c inc/bits.h inc/def.h inc/hash.h inc/rcx.h config.mk
src/log.o: src/log.c inc/def.h inc/log.h inc/rcx.h config.mk
src/opt.o: src/opt.c inc/def.h inc/opt.h inc/rcx.h config.mk
src/str.o: src/str.c inc/alloc.h inc/debug.h inc/def.h inc/log.h inc/rcx.h inc/str.h config.mk
diff --git a/inc/hash.h b/inc/hash.h
@@ -0,0 +1,26 @@
+#include "bits.h"
+#include "def.h"
+
+static inline u64
+r_wymix_(u64 x, u64 y) {
+ u64 h, l;
+ r_mul64(&h, &l, x, y);
+ return h ^ l;
+}
+
+u64 r_hash(void *data, u64 len, u64 seed, u64 (*key)[4]);
+
+/* TODO: optimize these common cases */
+static inline u64 r_hash8 (u8 data, u64 seed, u64 (*key)[4]) { return r_hash(&data, 1, seed, key); }
+static inline u64 r_hash16(u16 data, u64 seed, u64 (*key)[4]) { return r_hash(&data, 2, seed, key); }
+static inline u64 r_hash32(u32 data, u64 seed, u64 (*key)[4]) { return r_hash(&data, 4, seed, key); }
+static inline u64 r_hash64(u64 data, u64 seed, u64 (*key)[4]) { return r_hash(&data, 8, seed, key); }
+
+static inline u64
+r_rand(u64 *seed) {
+ /* wyrand */
+ *seed += U64_C(0xa0761d6478bd642f);
+ return r_wymix_(*seed, *seed ^ U64_C(0xe7037ed1a0b428db));
+}
+
+void r_make_hash_key(u64 (*key)[4], u64 seed);
diff --git a/src/hash.c b/src/hash.c
@@ -0,0 +1,85 @@
+#include "bits.h"
+#include "hash.h"
+#include "rcx.h"
+
+#define mix r_wymix_
+#define r4(p) ((u64)r_read32h(p))
+#define r8(p) ((u64)r_read64h(p))
+
+/* _wyp */
+static u64 default_key[4] = {
+ U64_C(0xa0761d6478bd642f),
+ U64_C(0xe7037ed1a0b428db),
+ U64_C(0x8ebc6af09c88c6e3),
+ U64_C(0x589965cc75374cc3),
+};
+
+/* wyhash */
+u64
+r_hash(void *data, u64 len, u64 seed, u64 (*key)[4]) {
+ if (!key) key = &default_key;
+ u8 *p = data;
+
+ seed ^= mix(seed ^ (*key)[0], (*key)[1]);
+
+ u64 a, b;
+ if unlikely (len == 0) {
+ a = 0;
+ b = 0;
+ } else if unlikely (len < 4) {
+ a = ((u64)p[0] << 16) | ((u64)p[len>>1] << 8) | (u64)p[len-1];
+ b = 0;
+ } else if likely (len <= 16) {
+ a = (r4(p) << 32) | r4(p+((len>>3)<<2));
+ b = (r4(p+len-4) << 32) | r4(p+len-4-((len>>3)<<2));
+ } else {
+ u64 i = len;
+ if unlikely (i > 48) {
+ u64 seed0 = seed;
+ u64 seed1 = seed;
+ u64 seed2 = seed;
+ for (; i > 48; p += 48, i -= 48) {
+ seed0 = mix(r8(p+ 0) ^ (*key)[1], r8(p+ 8) ^ seed0);
+ seed1 = mix(r8(p+16) ^ (*key)[2], r8(p+24) ^ seed1);
+ seed2 = mix(r8(p+32) ^ (*key)[3], r8(p+40) ^ seed2);
+ }
+ seed = seed0 ^ seed1 ^ seed2;
+ }
+ for (; i > 16; p += 16, i -= 16)
+ seed = mix(r8(p) ^ (*key)[1], r8(p+8) ^ seed);
+ a = r8(p+i-16);
+ b = r8(p+i-8);
+ }
+
+ a ^= (*key)[1];
+ b ^= seed;
+ r_mul64(&b, &a, a, b);
+ return mix(a ^ (*key)[0] ^ len, b ^ (*key)[1]);
+}
+
+/* make_secret */
+void
+r_make_hash_key(u64 (*key)[4], u64 seed) {
+ u8 c[] = {
+ 15, 23, 27, 29, 30, 39, 43, 45, 46, 51,
+ 53, 54, 57, 58, 60, 71, 75, 77, 78, 83,
+ 85, 86, 89, 90, 92, 99, 101, 102, 105, 106,
+ 108, 113, 114, 116, 120, 135, 139, 141, 142, 147,
+ 149, 150, 153, 154, 156, 163, 165, 166, 169, 170,
+ 172, 177, 178, 180, 184, 195, 197, 198, 201, 202,
+ 204, 209, 210, 212, 216, 225, 226, 228, 232, 240,
+ };
+
+ for (usize i = 0; i < 4; i++) {
+ again:
+ (*key)[i] = 0;
+ for (usize j = 0; j < 64; j += 8)
+ (*key)[i] |= (u64)c[r_rand(&seed) % sizeof c] << j;
+ if ((*key)[i] % 2 == 0)
+ goto again;
+ for (usize j = 0; j < i; j++) {
+ if (r_popcnt64((*key)[j] ^ (*key)[i]) != 32)
+ goto again;
+ }
+ }
+}