Remove likely useless high-arity bit zips, and inline rest - rcx

commit 19f3dade30260560bd1c9846d52e845e6e70c761
parent c767da3aab656bf85f3ec956c5037a786c8d343e
Author: Robert Russell <robert@rr3.xyz>
Date:   Mon, 13 Jan 2025 10:43:36 -0800

Remove likely useless high-arity bit zips, and inline rest

Diffstat:
M Makefile  | 2 --
M inc/bits.h  | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++-----------
D src/bits.c  | 63 ---------------------------------------------------------------

3 files changed, 50 insertions(+), 76 deletions(-)
diff --git a/Makefile b/Makefile
@@ -5,7 +5,6 @@ include config.mk
 SRC =\
 	src/alloc.c\
 	src/bench.c\
-	src/bits.c\
 	src/conv.c\
 	src/debug.c\
 	src/dict/impl/table.c\
@@ -50,7 +49,6 @@ librcx.a: $(SRC:.c=.o)
 
 src/alloc.o: src/alloc.c inc/alloc.h inc/def.h inc/log.h inc/rcx.h inc/internal/util.h config.mk
 src/bench.o: src/bench.c inc/bench.h inc/def.h inc/log.h inc/rcx.h config.mk
-src/bits.o: src/bits.c inc/bits.h inc/def.h inc/rcx.h config.mk
 src/conv.o: src/conv.c inc/conv.h inc/debug.h inc/def.h inc/rcx.h config.mk
 src/debug.o: src/debug.c inc/debug.h inc/def.h inc/rcx.h config.mk
 src/dict/impl/table.o: src/dict/impl/table.c inc/dict/impl/table.h $(TABLE_DEPS) config.mk
diff --git a/inc/bits.h b/inc/bits.h
@@ -21,18 +21,57 @@ static inline u64 r_rotr64(u64 n, uint c) { return (n >> (c&63u)) | (n << (-c&63
 
 /* ----- Bit zips ----- */
 
+/* To perform a bit zip with n inputs, we evaluate, over the integers mod 2
+ * (where XOR is addition and AND is multiplication), a recursively-structured
+ * polynomial with n variables, 2^n terms, and coefficients f0,f1,...,f(2^n-1),
+ * the bits of the zipping function f. The pattern is illustrated below for the
+ * first few values of n. In principle, we do this independently for each of
+ * the 64 n-tuples of input bits, but C's bit-wise operators let us do all 64
+ * evaluations in parallel. */
+
 /* TODO: Provide 8, 16, and 32 bit as well? The naming gets awkward, and then
- * there would be 28 functions! */
-/* TODO: The smaller of these (r_bz{0,1,2}, perhaps) should probably be static
- * inline. */
-
-u64 r_bz0(u64 f);
-u64 r_bz1(u64 f, u64 u);
-u64 r_bz2(u64 f, u64 u, u64 v);
-u64 r_bz3(u64 f, u64 u, u64 v, u64 w);
-u64 r_bz4(u64 f, u64 u, u64 v, u64 w, u64 x);
-u64 r_bz5(u64 f, u64 u, u64 v, u64 w, u64 x, u64 y);
-u64 r_bz6(u64 f, u64 u, u64 v, u64 w, u64 x, u64 y, u64 z);
+ * there would be 16 functions! */
+
+/* This is the constant polynomial f0. We use a two's complement trick to
+ * broadcast f0 to an entire u64. */
+static inline u64
+r_bz0_(u64 f) {
+	return !(f & 1u) - 1u;
+}
+
+/* f0     ^
+ * f1 & u   */
+static inline u64
+r_bz1_(u64 f, u64 u) {
+	return r_bz0_(f) ^ (r_bz0_(f >> 1) & u);
+}
+
+/* f0         ^
+ * f1 & u     ^
+ * f2     & v ^
+ * f3 & u & v   */
+static inline u64
+r_bz2_(u64 f, u64 u, u64 v) {
+	return r_bz1_(f, u) ^ (r_bz1_(f >> 2, u) & v);
+}
+
+/* f0             ^
+ * f1 & u         ^
+ * f2     & v     ^
+ * f3 & u & v     ^
+ * f4         & w ^
+ * f5 & u     & w ^
+ * f6     & v & w ^
+ * f7 & u & v & w   */
+static inline u64
+r_bz3_(u64 f, u64 u, u64 v, u64 w) {
+	return r_bz2_(f, u, v) ^ (r_bz2_(f >> 4, u, v) & w);
+}
+
+static inline u64 r_bz0(u8 f)                      { return r_bz0_(f); }
+static inline u64 r_bz1(u8 f, u64 u)               { return r_bz1_(f, u); }
+static inline u64 r_bz2(u8 f, u64 u, u64 v)        { return r_bz2_(f, u, v); }
+static inline u64 r_bz3(u8 f, u64 u, u64 v, u64 w) { return r_bz3_(f, u, v, w); }
 
 
 /* ----- Population count ----- */
diff --git a/src/bits.c b/src/bits.c
@@ -1,63 +0,0 @@
-#include "bits.h"
-#include "rcx.h"
-
-/* To perform a bit zip with n inputs, we evaluate, over the integers mod 2
- * (where XOR is addition and AND is multiplication), a recursively-structured
- * polynomial with n variables, 2^n terms, and coefficients f0,f1,...,f(2^n-1),
- * the bits of the zipping function f. The pattern is illustrated below for the
- * first few values of n. In principle, we do this independently for each of
- * the 64 n-tuples of input bits, but C's bit-wise operators let us do all 64
- * evaluations in parallel. */
-
-/* This is the constant polynomial f0. We use a two's complement trick to
- * broadcast f0 to an entire u64. */
-static inline u64 bz0(u64 f) {
-	return !(f & 1u) - 1u;
-}
-
-/* f0     ^
- * f1 & u   */
-static inline u64 bz1(u64 f, u64 u) {
-	return bz0(f) ^ (bz0(f >> 1) & u);
-}
-
-/* f0         ^
- * f1 & u     ^
- * f2     & v ^
- * f3 & u & v   */
-static inline u64 bz2(u64 f, u64 u, u64 v) {
-	return bz1(f, u) ^ (bz1(f >> 2, u) & v);
-}
-
-/* f0             ^
- * f1 & u         ^
- * f2     & v     ^
- * f3 & u & v     ^
- * f4         & w ^
- * f5 & u     & w ^
- * f6     & v & w ^
- * f7 & u & v & w   */
-static inline u64 bz3(u64 f, u64 u, u64 v, u64 w) {
-	return bz2(f, u, v) ^ (bz2(f >> 4, u, v) & w);
-}
-
-/* Etc... */
-static inline u64 bz4(u64 f, u64 u, u64 v, u64 w, u64 x) {
-	return bz3(f, u, v, w) ^ (bz3(f >> 8, u, v, w) & x);
-}
-
-static inline u64 bz5(u64 f, u64 u, u64 v, u64 w, u64 x, u64 y) {
-	return bz4(f, u, v, w, x) ^ (bz4(f >> 16, u, v, w, x) & y);
-}
-
-static inline u64 bz6(u64 f, u64 u, u64 v, u64 w, u64 x, u64 y, u64 z) {
-	return bz5(f, u, v, w, x, y) ^ (bz5(f >> 32, u, v, w, x, y) & z);
-}
-
-u64 r_bz0(u64 f)                                           { return bz0(f); }
-u64 r_bz1(u64 f, u64 u)                                    { return bz1(f, u); }
-u64 r_bz2(u64 f, u64 u, u64 v)                             { return bz2(f, u, v); }
-u64 r_bz3(u64 f, u64 u, u64 v, u64 w)                      { return bz3(f, u, v, w); }
-u64 r_bz4(u64 f, u64 u, u64 v, u64 w, u64 x)               { return bz4(f, u, v, w, x); }
-u64 r_bz5(u64 f, u64 u, u64 v, u64 w, u64 x, u64 y)        { return bz5(f, u, v, w, x, y); }
-u64 r_bz6(u64 f, u64 u, u64 v, u64 w, u64 x, u64 y, u64 z) { return bz6(f, u, v, w, x, y, z); }

	rcx miscellaneous C library
	git clone git://git.rr3.xyz/rcx
	Log \| Files \| Refs \| README \| LICENSE

M	Makefile	\|	2	--
M	inc/bits.h	\|	61	++++++++++++++++++++++++++++++++++++++++++++++++++-----------
D	src/bits.c	\|	63	---------------------------------------------------------------