commit e28da4600a0f6d0a55ec81d0e1eaa60bb4d408eb
parent 335a2967fe5e27d1fe92d75dbcb4828c8467e61d
Author: Robert Russell <robertrussell.72001@gmail.com>
Date: Mon, 22 May 2023 17:07:07 -0700
Reorganize and improve hash/rand module
Diffstat:
| M | Makefile | | | 4 | ++-- |
| D | inc/hash.h | | | 26 | -------------------------- |
| A | inc/rand.h | | | 40 | ++++++++++++++++++++++++++++++++++++++++ |
| D | src/hash.c | | | 85 | ------------------------------------------------------------------------------- |
| A | src/rand.c | | | 102 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
5 files changed, 144 insertions(+), 113 deletions(-)
diff --git a/Makefile b/Makefile
@@ -7,8 +7,8 @@ SRC =\
src/bench.c\
src/buffer.c\
src/debug.c\
- src/hash.c\
src/log.c\
+ src/rand.c\
src/str.c\
src/string.c\
src/unicode.c\
@@ -27,9 +27,9 @@ src/alloc.o: src/alloc.c inc/alloc.h inc/def.h inc/log.h inc/rcx.h inc/internal/
src/bench.o: src/bench.c inc/bench.h inc/def.h inc/log.h inc/rcx.h config.mk
src/buffer.o: src/buffer.c inc/alloc.h inc/buffer.h inc/debug.h inc/def.h inc/log.h inc/rcx.h inc/string.h config.mk
src/debug.o: src/debug.c inc/debug.h inc/def.h inc/rcx.h config.mk
-src/hash.o: src/hash.c inc/bits.h inc/def.h inc/hash.h inc/rcx.h config.mk
src/log.o: src/log.c inc/def.h inc/log.h inc/rcx.h config.mk
src/opt.o: src/opt.c inc/def.h inc/opt.h inc/rcx.h config.mk
+src/rand.o: src/rand.c inc/bits.h inc/def.h inc/rand.h inc/rcx.h inc/unix.h config.mk
src/str.o: src/str.c inc/alloc.h inc/debug.h inc/def.h inc/log.h inc/rcx.h inc/str.h config.mk
src/string.o: src/string.c inc/buffer.h inc/debug.h inc/def.h inc/rcx.h inc/string.h inc/utf8.h config.mk
src/unicode.o: src/unicode.c inc/def.h inc/rcx.h gen/ucattab.inc config.mk
diff --git a/inc/hash.h b/inc/hash.h
@@ -1,26 +0,0 @@
-#include "bits.h"
-#include "def.h"
-
-static inline u64
-r_wymix_(u64 x, u64 y) {
- u64 h, l;
- r_mul64(&h, &l, x, y);
- return h ^ l;
-}
-
-u64 r_hash(void *data, u64 len, u64 seed, u64 (*key)[4]);
-
-/* TODO: optimize these common cases */
-static inline u64 r_hash8 (u8 data, u64 seed, u64 (*key)[4]) { return r_hash(&data, 1, seed, key); }
-static inline u64 r_hash16(u16 data, u64 seed, u64 (*key)[4]) { return r_hash(&data, 2, seed, key); }
-static inline u64 r_hash32(u32 data, u64 seed, u64 (*key)[4]) { return r_hash(&data, 4, seed, key); }
-static inline u64 r_hash64(u64 data, u64 seed, u64 (*key)[4]) { return r_hash(&data, 8, seed, key); }
-
-static inline u64
-r_rand(u64 *seed) {
- /* wyrand */
- *seed += U64_C(0xa0761d6478bd642f);
- return r_wymix_(*seed, *seed ^ U64_C(0xe7037ed1a0b428db));
-}
-
-void r_make_hash_key(u64 (*key)[4], u64 seed);
diff --git a/inc/rand.h b/inc/rand.h
@@ -0,0 +1,40 @@
+#include "bits.h"
+#include "def.h"
+
+/* The default seed used by r_rand when no argument is provided. To change the
+ * seed, assign it directly (e.g., using r_trand). */
+extern u64 r_seed;
+
+/* The default key used by r_hash when no key argument is provided. To change
+ * the key, use r_make_hash_key. */
+extern u64 r_hash_key[4];
+
+static inline u64
+r_wymix_(u64 x, u64 y) {
+ u64 h, l;
+ r_mul64(&h, &l, x, y);
+ return h ^ l;
+}
+
+/* Hash the given data using the wyhash algorithm. */
+#define r_hash(data, len, seed, ...) \
+ r_hash_((data), (len), (seed), VA_DEFAULT(,##__VA_ARGS__, &r_hash_key))
+u64 r_hash_(void *data, u64 len, u64 seed, u64 (*key)[4]);
+
+void r_make_hash_key(u64 (*key)[4], u64 seed);
+
+/* Generate len (truly) random bytes using the /dev/urandom interface and put
+ * the result in buf. Return 0 on success; on error, return -1 and set errno.
+ * r_trand is slow; it's intented use is to seed userspace PRNG's (like
+ * r_prand64) on program initialization. */
+int r_trand(u8 *buf, usize len);
+
+/* Generate a pseudo-random u64 seeded from the given u64*, or from r_seed if
+ * no argument is provided. Without an argument, r_prand is not thread-safe.
+ * The PRNG algorithm (wyrand) is fast but not safe for cryptographic use. */
+#define r_prand64(...) r_prand64_(VA_DEFAULT(,##__VA_ARGS__, &r_seed))
+static inline u64
+r_prand64_(u64 *seed) {
+ *seed += U64_C(0xa0761d6478bd642f);
+ return r_wymix_(*seed, *seed ^ U64_C(0xe7037ed1a0b428db));
+}
diff --git a/src/hash.c b/src/hash.c
@@ -1,85 +0,0 @@
-#include "bits.h"
-#include "hash.h"
-#include "rcx.h"
-
-#define mix r_wymix_
-#define r4(p) ((u64)r_read32h(p))
-#define r8(p) ((u64)r_read64h(p))
-
-/* _wyp */
-static u64 default_key[4] = {
- U64_C(0xa0761d6478bd642f),
- U64_C(0xe7037ed1a0b428db),
- U64_C(0x8ebc6af09c88c6e3),
- U64_C(0x589965cc75374cc3),
-};
-
-/* wyhash */
-u64
-r_hash(void *data, u64 len, u64 seed, u64 (*key)[4]) {
- if (!key) key = &default_key;
- u8 *p = data;
-
- seed ^= mix(seed ^ (*key)[0], (*key)[1]);
-
- u64 a, b;
- if unlikely (len == 0) {
- a = 0;
- b = 0;
- } else if unlikely (len < 4) {
- a = ((u64)p[0] << 16) | ((u64)p[len>>1] << 8) | (u64)p[len-1];
- b = 0;
- } else if likely (len <= 16) {
- a = (r4(p) << 32) | r4(p+((len>>3)<<2));
- b = (r4(p+len-4) << 32) | r4(p+len-4-((len>>3)<<2));
- } else {
- u64 i = len;
- if unlikely (i > 48) {
- u64 seed0 = seed;
- u64 seed1 = seed;
- u64 seed2 = seed;
- for (; i > 48; p += 48, i -= 48) {
- seed0 = mix(r8(p+ 0) ^ (*key)[1], r8(p+ 8) ^ seed0);
- seed1 = mix(r8(p+16) ^ (*key)[2], r8(p+24) ^ seed1);
- seed2 = mix(r8(p+32) ^ (*key)[3], r8(p+40) ^ seed2);
- }
- seed = seed0 ^ seed1 ^ seed2;
- }
- for (; i > 16; p += 16, i -= 16)
- seed = mix(r8(p) ^ (*key)[1], r8(p+8) ^ seed);
- a = r8(p+i-16);
- b = r8(p+i-8);
- }
-
- a ^= (*key)[1];
- b ^= seed;
- r_mul64(&b, &a, a, b);
- return mix(a ^ (*key)[0] ^ len, b ^ (*key)[1]);
-}
-
-/* make_secret */
-void
-r_make_hash_key(u64 (*key)[4], u64 seed) {
- u8 c[] = {
- 15, 23, 27, 29, 30, 39, 43, 45, 46, 51,
- 53, 54, 57, 58, 60, 71, 75, 77, 78, 83,
- 85, 86, 89, 90, 92, 99, 101, 102, 105, 106,
- 108, 113, 114, 116, 120, 135, 139, 141, 142, 147,
- 149, 150, 153, 154, 156, 163, 165, 166, 169, 170,
- 172, 177, 178, 180, 184, 195, 197, 198, 201, 202,
- 204, 209, 210, 212, 216, 225, 226, 228, 232, 240,
- };
-
- for (usize i = 0; i < 4; i++) {
- again:
- (*key)[i] = 0;
- for (usize j = 0; j < 64; j += 8)
- (*key)[i] |= (u64)c[r_rand(&seed) % sizeof c] << j;
- if ((*key)[i] % 2 == 0)
- goto again;
- for (usize j = 0; j < i; j++) {
- if (r_popcnt64((*key)[j] ^ (*key)[i]) != 32)
- goto again;
- }
- }
-}
diff --git a/src/rand.c b/src/rand.c
@@ -0,0 +1,102 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "bits.h"
+#include "rand.h"
+#include "rcx.h"
+#include "unix.h"
+
+#define mix r_wymix_
+#define r4(p) ((u64)r_read32h(p))
+#define r8(p) ((u64)r_read64h(p))
+
+u64 r_seed = 0;
+
+u64 r_hash_key[4] = {
+ U64_C(0xa0761d6478bd642f),
+ U64_C(0xe7037ed1a0b428db),
+ U64_C(0x8ebc6af09c88c6e3),
+ U64_C(0x589965cc75374cc3),
+};
+
+u64
+r_hash_(void *data, u64 len, u64 seed, u64 (*key)[4]) {
+ u8 *p = data;
+
+ seed ^= mix(seed ^ (*key)[0], (*key)[1]);
+
+ u64 a, b;
+ if unlikely (len == 0) {
+ a = 0;
+ b = 0;
+ } else if unlikely (len < 4) {
+ a = ((u64)p[0] << 16) | ((u64)p[len>>1] << 8) | (u64)p[len-1];
+ b = 0;
+ } else if likely (len <= 16) {
+ a = (r4(p) << 32) | r4(p+((len>>3)<<2));
+ b = (r4(p+len-4) << 32) | r4(p+len-4-((len>>3)<<2));
+ } else {
+ u64 i = len;
+ if unlikely (i > 48) {
+ u64 seed0 = seed;
+ u64 seed1 = seed;
+ u64 seed2 = seed;
+ for (; i > 48; p += 48, i -= 48) {
+ seed0 = mix(r8(p+ 0) ^ (*key)[1], r8(p+ 8) ^ seed0);
+ seed1 = mix(r8(p+16) ^ (*key)[2], r8(p+24) ^ seed1);
+ seed2 = mix(r8(p+32) ^ (*key)[3], r8(p+40) ^ seed2);
+ }
+ seed = seed0 ^ seed1 ^ seed2;
+ }
+ for (; i > 16; p += 16, i -= 16)
+ seed = mix(r8(p) ^ (*key)[1], r8(p+8) ^ seed);
+ a = r8(p+i-16);
+ b = r8(p+i-8);
+ }
+
+ a ^= (*key)[1];
+ b ^= seed;
+ r_mul64(&b, &a, a, b);
+ return mix(a ^ (*key)[0] ^ len, b ^ (*key)[1]);
+}
+
+void
+r_make_hash_key(u64 (*key)[4], u64 seed) {
+ u8 c[] = {
+ 15, 23, 27, 29, 30, 39, 43, 45, 46, 51,
+ 53, 54, 57, 58, 60, 71, 75, 77, 78, 83,
+ 85, 86, 89, 90, 92, 99, 101, 102, 105, 106,
+ 108, 113, 114, 116, 120, 135, 139, 141, 142, 147,
+ 149, 150, 153, 154, 156, 163, 165, 166, 169, 170,
+ 172, 177, 178, 180, 184, 195, 197, 198, 201, 202,
+ 204, 209, 210, 212, 216, 225, 226, 228, 232, 240,
+ };
+
+ for (usize i = 0; i < 4; i++) {
+ again:
+ (*key)[i] = 0;
+ for (usize j = 0; j < 64; j += 8)
+ (*key)[i] |= (u64)c[r_prand64(&seed) % sizeof c] << j;
+ if ((*key)[i] % 2 == 0)
+ goto again;
+ for (usize j = 0; j < i; j++) {
+ if (r_popcnt64((*key)[j] ^ (*key)[i]) != 32)
+ goto again;
+ }
+ }
+}
+
+int
+r_trand(u8 *buf, usize len) {
+ int fd = open("/dev/urandom", O_RDONLY);
+ if (fd < 0) return -1;
+ if (r_read_all(0, fd, buf, len) < 0) {
+ int e = errno;
+ close(fd);
+ errno = e;
+ return -1;
+ }
+ close(fd);
+ return 0;
+}