Add full width signed multiplies - rcx - miscellaneous C library

commit 3260a7d245b9f6f9b764c4cc47cb029b93a67f46
parent 2c0567085f8ddb562cb1d26a666e34e1004e96ee
Author: Robert Russell <robert@rr3.xyz>
Date:   Sun, 12 Jan 2025 23:19:20 -0800

Add full width signed multiplies

Diffstat:
M inc/bits.h  | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
M inc/rand.h  | 2 +-
M src/rand.c  | 2 +-

3 files changed, 84 insertions(+), 37 deletions(-)
diff --git a/inc/bits.h b/inc/bits.h
@@ -205,6 +205,33 @@ r_rzb64(u64 n) {
 }
 
 
+/* ----- Sign extension ----- */
+
+static inline u8
+r_sext8(u8 x, uint b) {
+	uint c = 8 - b;
+	return (u8)((i8)(x << c) >> c);
+}
+
+static inline u16
+r_sext16(u16 x, uint b) {
+	uint c = 16 - b;
+	return (u16)((i16)(x << c) >> c);
+}
+
+static inline u32
+r_sext32(u32 x, uint b) {
+	uint c = 32 - b;
+	return (u32)((i32)(x << c) >> c);
+}
+
+static inline u64
+r_sext64(u64 x, uint b) {
+	uint c = 64 - b;
+	return (u64)((i64)(x << c) >> c);
+}
+
+
 /* ----- Ternary add and subtract ----- */
 
 /* We implement ternary add/sub on arbitrary unsigned integers instead of with
@@ -292,34 +319,34 @@ r_sub64(u64 *h, u64 *l, u64 x, u64 y, u64 z) {
 /* ----- Full width multiply ----- */
 
 static inline void
-r_mul8(u8 *h, u8 *l, u8 x, u8 y) {
+r_mulu8(u8 *h, u8 *l, u8 x, u8 y) {
 	u16 hl = (u16)x * (u16)y;
 	*h = hl >> 8;
 	*l = hl;
 }
 
 static inline void
-r_mul16(u16 *h, u16 *l, u16 x, u16 y) {
+r_mulu16(u16 *h, u16 *l, u16 x, u16 y) {
 	u32 hl = (u32)x * (u32)y;
 	*h = hl >> 16;
 	*l = hl;
 }
 
 static inline void
-r_mul32(u32 *h, u32 *l, u32 x, u32 y) {
+r_mulu32(u32 *h, u32 *l, u32 x, u32 y) {
 	u64 hl = (u64)x * (u64)y;
 	*h = hl >> 32;
 	*l = hl;
 }
 
 static inline void
-r_mul64(u64 *h, u64 *l, u64 x, u64 y) {
+r_mulu64(u64 *h, u64 *l, u64 x, u64 y) {
 #ifdef R_HAVE_128
 	u128 hl = (u128)x * (u128)y;
 	*h = hl >> 64;
 	*l = hl;
 #else
-	const u64 m = (U64_C(1)<<32) - 1;
+	const u64 m = (U64_C(1) << 32) - 1;
 
 	u64 x0 = x & m;
 	u64 x1 = x >> 32;
@@ -331,10 +358,57 @@ r_mul64(u64 *h, u64 *l, u64 x, u64 y) {
 	u64 x1y0 = x1 * y0;
 	u64 x1y1 = x1 * y1;
 
-	u64 c = ((x0y1&m) + (x1y0&m) + (x0y0>>32)) >> 32;
+	u64 c = ((x0y1 & m) + (x1y0 & m) + (x0y0 >> 32)) >> 32;
+
+	*h = x1y1 + (x0y1 >> 32) + (x1y0 >> 32) + c;
+	*l = x0y0 + (x0y1 << 32) + (x1y0 << 32);
+#endif
+}
+
+static inline void
+r_muls8(i8 *h, u8 *l, i8 x, i8 y) {
+	i16 hl = (i16)x * (i16)y;
+	*h = hl >> 8;
+	*l = hl;
+}
+
+static inline void
+r_muls16(i16 *h, u16 *l, i16 x, i16 y) {
+	i32 hl = (i32)x * (i32)y;
+	*h = hl >> 16;
+	*l = hl;
+}
+
+static inline void
+r_muls32(i32 *h, u32 *l, i32 x, i32 y) {
+	i64 hl = (i64)x * (i64)y;
+	*h = hl >> 32;
+	*l = hl;
+}
+
+static inline void
+r_muls64(i64 *h, u64 *l, i64 x, i64 y) {
+#ifdef R_HAVE_128
+	i128 hl = (i128)x * (i128)y;
+	*h = hl >> 64;
+	*l = hl;
+#else
+	const u64 m = (U64_C(1) << 32) - 1;
 
-	*h = x1y1 + (x0y1>>32) + (x1y0>>32) + c;
-	*l = x0y0 + (x0y1<<32) + (x1y0<<32);
+	u64 x0 = x & m;
+	i64 x1 = x >> 32;
+	u64 y0 = y & m;
+	i64 y1 = y >> 32;
+
+	u64 x0y0 = x0 * y0;
+	i64 x0y1 = x0 * y1;
+	i64 x1y0 = x1 * y0;
+	i64 x1y1 = x1 * y1;
+
+	u64 c = ((x0y1 & m) + (x1y0 & m) + (x0y0 >> 32)) >> 32;
+
+	*h = x1y1 + (x0y1 >> 32) + (x1y0 >> 32) + c;
+	*l = x0y0 + (x0y1 << 32) + (x1y0 << 32);
 #endif
 }
 
@@ -372,33 +446,6 @@ r_swap64(u64 n) {
 #endif
 
 
-/* ----- Sign extension ----- */
-
-static inline u8
-r_sext8(u8 x, uint b) {
-	uint c = 8 - b;
-	return (u8)((i8)(x << c) >> c);
-}
-
-static inline u16
-r_sext16(u16 x, uint b) {
-	uint c = 16 - b;
-	return (u16)((i16)(x << c) >> c);
-}
-
-static inline u32
-r_sext32(u32 x, uint b) {
-	uint c = 32 - b;
-	return (u32)((i32)(x << c) >> c);
-}
-
-static inline u64
-r_sext64(u64 x, uint b) {
-	uint c = 64 - b;
-	return (u64)((i64)(x << c) >> c);
-}
-
-
 /* ----- Endian conversions ----- */
 
 /* There is 2x redundancy here (e.g., ltoh = htol), but this allows code using
diff --git a/inc/rand.h b/inc/rand.h
@@ -14,7 +14,7 @@ extern u64 r_hash_key[4];
 static inline u64
 r_wymix_(u64 x, u64 y) {
 	u64 h, l;
-	r_mul64(&h, &l, x, y);
+	r_mulu64(&h, &l, x, y);
 	return h ^ l;
 }
 
diff --git a/src/rand.c b/src/rand.c
@@ -57,7 +57,7 @@ r_hash_(void *data, u64 len, u64 seed, u64 (*key)[4]) {
 
 	a ^= (*key)[1];
 	b ^= seed;
-	r_mul64(&b, &a, a, b);
+	r_mulu64(&b, &a, a, b);
 	return mix(a ^ (*key)[0] ^ len, b ^ (*key)[1]);
 }

	rcx miscellaneous C library
	git clone git://git.rr3.xyz/rcx
	Log \| Files \| Refs \| README \| LICENSE

M	inc/bits.h	\|	117	+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
M	inc/rand.h	\|	2	+-
M	src/rand.c	\|	2	+-