rcx

library of miscellaneous bits of C code
git clone git://git.rr3.xyz/rcx
Log | Files | Refs | README | LICENSE

commit e28da4600a0f6d0a55ec81d0e1eaa60bb4d408eb
parent 335a2967fe5e27d1fe92d75dbcb4828c8467e61d
Author: Robert Russell <robertrussell.72001@gmail.com>
Date:   Mon, 22 May 2023 17:07:07 -0700

Reorganize and improve hash/rand module

Diffstat:
MMakefile | 4++--
Dinc/hash.h | 26--------------------------
Ainc/rand.h | 40++++++++++++++++++++++++++++++++++++++++
Dsrc/hash.c | 85-------------------------------------------------------------------------------
Asrc/rand.c | 102+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 144 insertions(+), 113 deletions(-)

diff --git a/Makefile b/Makefile @@ -7,8 +7,8 @@ SRC =\ src/bench.c\ src/buffer.c\ src/debug.c\ - src/hash.c\ src/log.c\ + src/rand.c\ src/str.c\ src/string.c\ src/unicode.c\ @@ -27,9 +27,9 @@ src/alloc.o: src/alloc.c inc/alloc.h inc/def.h inc/log.h inc/rcx.h inc/internal/ src/bench.o: src/bench.c inc/bench.h inc/def.h inc/log.h inc/rcx.h config.mk src/buffer.o: src/buffer.c inc/alloc.h inc/buffer.h inc/debug.h inc/def.h inc/log.h inc/rcx.h inc/string.h config.mk src/debug.o: src/debug.c inc/debug.h inc/def.h inc/rcx.h config.mk -src/hash.o: src/hash.c inc/bits.h inc/def.h inc/hash.h inc/rcx.h config.mk src/log.o: src/log.c inc/def.h inc/log.h inc/rcx.h config.mk src/opt.o: src/opt.c inc/def.h inc/opt.h inc/rcx.h config.mk +src/rand.o: src/rand.c inc/bits.h inc/def.h inc/rand.h inc/rcx.h inc/unix.h config.mk src/str.o: src/str.c inc/alloc.h inc/debug.h inc/def.h inc/log.h inc/rcx.h inc/str.h config.mk src/string.o: src/string.c inc/buffer.h inc/debug.h inc/def.h inc/rcx.h inc/string.h inc/utf8.h config.mk src/unicode.o: src/unicode.c inc/def.h inc/rcx.h gen/ucattab.inc config.mk diff --git a/inc/hash.h b/inc/hash.h @@ -1,26 +0,0 @@ -#include "bits.h" -#include "def.h" - -static inline u64 -r_wymix_(u64 x, u64 y) { - u64 h, l; - r_mul64(&h, &l, x, y); - return h ^ l; -} - -u64 r_hash(void *data, u64 len, u64 seed, u64 (*key)[4]); - -/* TODO: optimize these common cases */ -static inline u64 r_hash8 (u8 data, u64 seed, u64 (*key)[4]) { return r_hash(&data, 1, seed, key); } -static inline u64 r_hash16(u16 data, u64 seed, u64 (*key)[4]) { return r_hash(&data, 2, seed, key); } -static inline u64 r_hash32(u32 data, u64 seed, u64 (*key)[4]) { return r_hash(&data, 4, seed, key); } -static inline u64 r_hash64(u64 data, u64 seed, u64 (*key)[4]) { return r_hash(&data, 8, seed, key); } - -static inline u64 -r_rand(u64 *seed) { - /* wyrand */ - *seed += U64_C(0xa0761d6478bd642f); - return r_wymix_(*seed, *seed ^ U64_C(0xe7037ed1a0b428db)); -} - -void r_make_hash_key(u64 (*key)[4], u64 seed); diff --git a/inc/rand.h b/inc/rand.h @@ -0,0 +1,40 @@ +#include "bits.h" +#include "def.h" + +/* The default seed used by r_rand when no argument is provided. To change the + * seed, assign it directly (e.g., using r_trand). */ +extern u64 r_seed; + +/* The default key used by r_hash when no key argument is provided. To change + * the key, use r_make_hash_key. */ +extern u64 r_hash_key[4]; + +static inline u64 +r_wymix_(u64 x, u64 y) { + u64 h, l; + r_mul64(&h, &l, x, y); + return h ^ l; +} + +/* Hash the given data using the wyhash algorithm. */ +#define r_hash(data, len, seed, ...) \ + r_hash_((data), (len), (seed), VA_DEFAULT(,##__VA_ARGS__, &r_hash_key)) +u64 r_hash_(void *data, u64 len, u64 seed, u64 (*key)[4]); + +void r_make_hash_key(u64 (*key)[4], u64 seed); + +/* Generate len (truly) random bytes using the /dev/urandom interface and put + * the result in buf. Return 0 on success; on error, return -1 and set errno. + * r_trand is slow; it's intented use is to seed userspace PRNG's (like + * r_prand64) on program initialization. */ +int r_trand(u8 *buf, usize len); + +/* Generate a pseudo-random u64 seeded from the given u64*, or from r_seed if + * no argument is provided. Without an argument, r_prand is not thread-safe. + * The PRNG algorithm (wyrand) is fast but not safe for cryptographic use. */ +#define r_prand64(...) r_prand64_(VA_DEFAULT(,##__VA_ARGS__, &r_seed)) +static inline u64 +r_prand64_(u64 *seed) { + *seed += U64_C(0xa0761d6478bd642f); + return r_wymix_(*seed, *seed ^ U64_C(0xe7037ed1a0b428db)); +} diff --git a/src/hash.c b/src/hash.c @@ -1,85 +0,0 @@ -#include "bits.h" -#include "hash.h" -#include "rcx.h" - -#define mix r_wymix_ -#define r4(p) ((u64)r_read32h(p)) -#define r8(p) ((u64)r_read64h(p)) - -/* _wyp */ -static u64 default_key[4] = { - U64_C(0xa0761d6478bd642f), - U64_C(0xe7037ed1a0b428db), - U64_C(0x8ebc6af09c88c6e3), - U64_C(0x589965cc75374cc3), -}; - -/* wyhash */ -u64 -r_hash(void *data, u64 len, u64 seed, u64 (*key)[4]) { - if (!key) key = &default_key; - u8 *p = data; - - seed ^= mix(seed ^ (*key)[0], (*key)[1]); - - u64 a, b; - if unlikely (len == 0) { - a = 0; - b = 0; - } else if unlikely (len < 4) { - a = ((u64)p[0] << 16) | ((u64)p[len>>1] << 8) | (u64)p[len-1]; - b = 0; - } else if likely (len <= 16) { - a = (r4(p) << 32) | r4(p+((len>>3)<<2)); - b = (r4(p+len-4) << 32) | r4(p+len-4-((len>>3)<<2)); - } else { - u64 i = len; - if unlikely (i > 48) { - u64 seed0 = seed; - u64 seed1 = seed; - u64 seed2 = seed; - for (; i > 48; p += 48, i -= 48) { - seed0 = mix(r8(p+ 0) ^ (*key)[1], r8(p+ 8) ^ seed0); - seed1 = mix(r8(p+16) ^ (*key)[2], r8(p+24) ^ seed1); - seed2 = mix(r8(p+32) ^ (*key)[3], r8(p+40) ^ seed2); - } - seed = seed0 ^ seed1 ^ seed2; - } - for (; i > 16; p += 16, i -= 16) - seed = mix(r8(p) ^ (*key)[1], r8(p+8) ^ seed); - a = r8(p+i-16); - b = r8(p+i-8); - } - - a ^= (*key)[1]; - b ^= seed; - r_mul64(&b, &a, a, b); - return mix(a ^ (*key)[0] ^ len, b ^ (*key)[1]); -} - -/* make_secret */ -void -r_make_hash_key(u64 (*key)[4], u64 seed) { - u8 c[] = { - 15, 23, 27, 29, 30, 39, 43, 45, 46, 51, - 53, 54, 57, 58, 60, 71, 75, 77, 78, 83, - 85, 86, 89, 90, 92, 99, 101, 102, 105, 106, - 108, 113, 114, 116, 120, 135, 139, 141, 142, 147, - 149, 150, 153, 154, 156, 163, 165, 166, 169, 170, - 172, 177, 178, 180, 184, 195, 197, 198, 201, 202, - 204, 209, 210, 212, 216, 225, 226, 228, 232, 240, - }; - - for (usize i = 0; i < 4; i++) { - again: - (*key)[i] = 0; - for (usize j = 0; j < 64; j += 8) - (*key)[i] |= (u64)c[r_rand(&seed) % sizeof c] << j; - if ((*key)[i] % 2 == 0) - goto again; - for (usize j = 0; j < i; j++) { - if (r_popcnt64((*key)[j] ^ (*key)[i]) != 32) - goto again; - } - } -} diff --git a/src/rand.c b/src/rand.c @@ -0,0 +1,102 @@ +#include <errno.h> +#include <fcntl.h> +#include <unistd.h> + +#include "bits.h" +#include "rand.h" +#include "rcx.h" +#include "unix.h" + +#define mix r_wymix_ +#define r4(p) ((u64)r_read32h(p)) +#define r8(p) ((u64)r_read64h(p)) + +u64 r_seed = 0; + +u64 r_hash_key[4] = { + U64_C(0xa0761d6478bd642f), + U64_C(0xe7037ed1a0b428db), + U64_C(0x8ebc6af09c88c6e3), + U64_C(0x589965cc75374cc3), +}; + +u64 +r_hash_(void *data, u64 len, u64 seed, u64 (*key)[4]) { + u8 *p = data; + + seed ^= mix(seed ^ (*key)[0], (*key)[1]); + + u64 a, b; + if unlikely (len == 0) { + a = 0; + b = 0; + } else if unlikely (len < 4) { + a = ((u64)p[0] << 16) | ((u64)p[len>>1] << 8) | (u64)p[len-1]; + b = 0; + } else if likely (len <= 16) { + a = (r4(p) << 32) | r4(p+((len>>3)<<2)); + b = (r4(p+len-4) << 32) | r4(p+len-4-((len>>3)<<2)); + } else { + u64 i = len; + if unlikely (i > 48) { + u64 seed0 = seed; + u64 seed1 = seed; + u64 seed2 = seed; + for (; i > 48; p += 48, i -= 48) { + seed0 = mix(r8(p+ 0) ^ (*key)[1], r8(p+ 8) ^ seed0); + seed1 = mix(r8(p+16) ^ (*key)[2], r8(p+24) ^ seed1); + seed2 = mix(r8(p+32) ^ (*key)[3], r8(p+40) ^ seed2); + } + seed = seed0 ^ seed1 ^ seed2; + } + for (; i > 16; p += 16, i -= 16) + seed = mix(r8(p) ^ (*key)[1], r8(p+8) ^ seed); + a = r8(p+i-16); + b = r8(p+i-8); + } + + a ^= (*key)[1]; + b ^= seed; + r_mul64(&b, &a, a, b); + return mix(a ^ (*key)[0] ^ len, b ^ (*key)[1]); +} + +void +r_make_hash_key(u64 (*key)[4], u64 seed) { + u8 c[] = { + 15, 23, 27, 29, 30, 39, 43, 45, 46, 51, + 53, 54, 57, 58, 60, 71, 75, 77, 78, 83, + 85, 86, 89, 90, 92, 99, 101, 102, 105, 106, + 108, 113, 114, 116, 120, 135, 139, 141, 142, 147, + 149, 150, 153, 154, 156, 163, 165, 166, 169, 170, + 172, 177, 178, 180, 184, 195, 197, 198, 201, 202, + 204, 209, 210, 212, 216, 225, 226, 228, 232, 240, + }; + + for (usize i = 0; i < 4; i++) { + again: + (*key)[i] = 0; + for (usize j = 0; j < 64; j += 8) + (*key)[i] |= (u64)c[r_prand64(&seed) % sizeof c] << j; + if ((*key)[i] % 2 == 0) + goto again; + for (usize j = 0; j < i; j++) { + if (r_popcnt64((*key)[j] ^ (*key)[i]) != 32) + goto again; + } + } +} + +int +r_trand(u8 *buf, usize len) { + int fd = open("/dev/urandom", O_RDONLY); + if (fd < 0) return -1; + if (r_read_all(0, fd, buf, len) < 0) { + int e = errno; + close(fd); + errno = e; + return -1; + } + close(fd); + return 0; +}