rcx

miscellaneous C library
git clone git://git.rr3.xyz/rcx
Log | Files | Refs | README | LICENSE

commit 3260a7d245b9f6f9b764c4cc47cb029b93a67f46
parent 2c0567085f8ddb562cb1d26a666e34e1004e96ee
Author: Robert Russell <robert@rr3.xyz>
Date:   Sun, 12 Jan 2025 23:19:20 -0800

Add full width signed multiplies

Diffstat:
Minc/bits.h | 117+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
Minc/rand.h | 2+-
Msrc/rand.c | 2+-
3 files changed, 84 insertions(+), 37 deletions(-)

diff --git a/inc/bits.h b/inc/bits.h @@ -205,6 +205,33 @@ r_rzb64(u64 n) { } +/* ----- Sign extension ----- */ + +static inline u8 +r_sext8(u8 x, uint b) { + uint c = 8 - b; + return (u8)((i8)(x << c) >> c); +} + +static inline u16 +r_sext16(u16 x, uint b) { + uint c = 16 - b; + return (u16)((i16)(x << c) >> c); +} + +static inline u32 +r_sext32(u32 x, uint b) { + uint c = 32 - b; + return (u32)((i32)(x << c) >> c); +} + +static inline u64 +r_sext64(u64 x, uint b) { + uint c = 64 - b; + return (u64)((i64)(x << c) >> c); +} + + /* ----- Ternary add and subtract ----- */ /* We implement ternary add/sub on arbitrary unsigned integers instead of with @@ -292,34 +319,34 @@ r_sub64(u64 *h, u64 *l, u64 x, u64 y, u64 z) { /* ----- Full width multiply ----- */ static inline void -r_mul8(u8 *h, u8 *l, u8 x, u8 y) { +r_mulu8(u8 *h, u8 *l, u8 x, u8 y) { u16 hl = (u16)x * (u16)y; *h = hl >> 8; *l = hl; } static inline void -r_mul16(u16 *h, u16 *l, u16 x, u16 y) { +r_mulu16(u16 *h, u16 *l, u16 x, u16 y) { u32 hl = (u32)x * (u32)y; *h = hl >> 16; *l = hl; } static inline void -r_mul32(u32 *h, u32 *l, u32 x, u32 y) { +r_mulu32(u32 *h, u32 *l, u32 x, u32 y) { u64 hl = (u64)x * (u64)y; *h = hl >> 32; *l = hl; } static inline void -r_mul64(u64 *h, u64 *l, u64 x, u64 y) { +r_mulu64(u64 *h, u64 *l, u64 x, u64 y) { #ifdef R_HAVE_128 u128 hl = (u128)x * (u128)y; *h = hl >> 64; *l = hl; #else - const u64 m = (U64_C(1)<<32) - 1; + const u64 m = (U64_C(1) << 32) - 1; u64 x0 = x & m; u64 x1 = x >> 32; @@ -331,10 +358,57 @@ r_mul64(u64 *h, u64 *l, u64 x, u64 y) { u64 x1y0 = x1 * y0; u64 x1y1 = x1 * y1; - u64 c = ((x0y1&m) + (x1y0&m) + (x0y0>>32)) >> 32; + u64 c = ((x0y1 & m) + (x1y0 & m) + (x0y0 >> 32)) >> 32; + + *h = x1y1 + (x0y1 >> 32) + (x1y0 >> 32) + c; + *l = x0y0 + (x0y1 << 32) + (x1y0 << 32); +#endif +} + +static inline void +r_muls8(i8 *h, u8 *l, i8 x, i8 y) { + i16 hl = (i16)x * (i16)y; + *h = hl >> 8; + *l = hl; +} + +static inline void +r_muls16(i16 *h, u16 *l, i16 x, i16 y) { + i32 hl = (i32)x * (i32)y; + *h = hl >> 16; + *l = hl; +} + +static inline void +r_muls32(i32 *h, u32 *l, i32 x, i32 y) { + i64 hl = (i64)x * (i64)y; + *h = hl >> 32; + *l = hl; +} + +static inline void +r_muls64(i64 *h, u64 *l, i64 x, i64 y) { +#ifdef R_HAVE_128 + i128 hl = (i128)x * (i128)y; + *h = hl >> 64; + *l = hl; +#else + const u64 m = (U64_C(1) << 32) - 1; - *h = x1y1 + (x0y1>>32) + (x1y0>>32) + c; - *l = x0y0 + (x0y1<<32) + (x1y0<<32); + u64 x0 = x & m; + i64 x1 = x >> 32; + u64 y0 = y & m; + i64 y1 = y >> 32; + + u64 x0y0 = x0 * y0; + i64 x0y1 = x0 * y1; + i64 x1y0 = x1 * y0; + i64 x1y1 = x1 * y1; + + u64 c = ((x0y1 & m) + (x1y0 & m) + (x0y0 >> 32)) >> 32; + + *h = x1y1 + (x0y1 >> 32) + (x1y0 >> 32) + c; + *l = x0y0 + (x0y1 << 32) + (x1y0 << 32); #endif } @@ -372,33 +446,6 @@ r_swap64(u64 n) { #endif -/* ----- Sign extension ----- */ - -static inline u8 -r_sext8(u8 x, uint b) { - uint c = 8 - b; - return (u8)((i8)(x << c) >> c); -} - -static inline u16 -r_sext16(u16 x, uint b) { - uint c = 16 - b; - return (u16)((i16)(x << c) >> c); -} - -static inline u32 -r_sext32(u32 x, uint b) { - uint c = 32 - b; - return (u32)((i32)(x << c) >> c); -} - -static inline u64 -r_sext64(u64 x, uint b) { - uint c = 64 - b; - return (u64)((i64)(x << c) >> c); -} - - /* ----- Endian conversions ----- */ /* There is 2x redundancy here (e.g., ltoh = htol), but this allows code using diff --git a/inc/rand.h b/inc/rand.h @@ -14,7 +14,7 @@ extern u64 r_hash_key[4]; static inline u64 r_wymix_(u64 x, u64 y) { u64 h, l; - r_mul64(&h, &l, x, y); + r_mulu64(&h, &l, x, y); return h ^ l; } diff --git a/src/rand.c b/src/rand.c @@ -57,7 +57,7 @@ r_hash_(void *data, u64 len, u64 seed, u64 (*key)[4]) { a ^= (*key)[1]; b ^= seed; - r_mul64(&b, &a, a, b); + r_mulu64(&b, &a, a, b); return mix(a ^ (*key)[0] ^ len, b ^ (*key)[1]); }