rcx

miscellaneous C library
git clone git://git.rr3.xyz/rcx
Log | Files | Refs | README | LICENSE

commit 19f3dade30260560bd1c9846d52e845e6e70c761
parent c767da3aab656bf85f3ec956c5037a786c8d343e
Author: Robert Russell <robert@rr3.xyz>
Date:   Mon, 13 Jan 2025 10:43:36 -0800

Remove likely useless high-arity bit zips, and inline rest

Diffstat:
MMakefile | 2--
Minc/bits.h | 61++++++++++++++++++++++++++++++++++++++++++++++++++-----------
Dsrc/bits.c | 63---------------------------------------------------------------
3 files changed, 50 insertions(+), 76 deletions(-)

diff --git a/Makefile b/Makefile @@ -5,7 +5,6 @@ include config.mk SRC =\ src/alloc.c\ src/bench.c\ - src/bits.c\ src/conv.c\ src/debug.c\ src/dict/impl/table.c\ @@ -50,7 +49,6 @@ librcx.a: $(SRC:.c=.o) src/alloc.o: src/alloc.c inc/alloc.h inc/def.h inc/log.h inc/rcx.h inc/internal/util.h config.mk src/bench.o: src/bench.c inc/bench.h inc/def.h inc/log.h inc/rcx.h config.mk -src/bits.o: src/bits.c inc/bits.h inc/def.h inc/rcx.h config.mk src/conv.o: src/conv.c inc/conv.h inc/debug.h inc/def.h inc/rcx.h config.mk src/debug.o: src/debug.c inc/debug.h inc/def.h inc/rcx.h config.mk src/dict/impl/table.o: src/dict/impl/table.c inc/dict/impl/table.h $(TABLE_DEPS) config.mk diff --git a/inc/bits.h b/inc/bits.h @@ -21,18 +21,57 @@ static inline u64 r_rotr64(u64 n, uint c) { return (n >> (c&63u)) | (n << (-c&63 /* ----- Bit zips ----- */ +/* To perform a bit zip with n inputs, we evaluate, over the integers mod 2 + * (where XOR is addition and AND is multiplication), a recursively-structured + * polynomial with n variables, 2^n terms, and coefficients f0,f1,...,f(2^n-1), + * the bits of the zipping function f. The pattern is illustrated below for the + * first few values of n. In principle, we do this independently for each of + * the 64 n-tuples of input bits, but C's bit-wise operators let us do all 64 + * evaluations in parallel. */ + /* TODO: Provide 8, 16, and 32 bit as well? The naming gets awkward, and then - * there would be 28 functions! */ -/* TODO: The smaller of these (r_bz{0,1,2}, perhaps) should probably be static - * inline. */ - -u64 r_bz0(u64 f); -u64 r_bz1(u64 f, u64 u); -u64 r_bz2(u64 f, u64 u, u64 v); -u64 r_bz3(u64 f, u64 u, u64 v, u64 w); -u64 r_bz4(u64 f, u64 u, u64 v, u64 w, u64 x); -u64 r_bz5(u64 f, u64 u, u64 v, u64 w, u64 x, u64 y); -u64 r_bz6(u64 f, u64 u, u64 v, u64 w, u64 x, u64 y, u64 z); + * there would be 16 functions! */ + +/* This is the constant polynomial f0. We use a two's complement trick to + * broadcast f0 to an entire u64. */ +static inline u64 +r_bz0_(u64 f) { + return !(f & 1u) - 1u; +} + +/* f0 ^ + * f1 & u */ +static inline u64 +r_bz1_(u64 f, u64 u) { + return r_bz0_(f) ^ (r_bz0_(f >> 1) & u); +} + +/* f0 ^ + * f1 & u ^ + * f2 & v ^ + * f3 & u & v */ +static inline u64 +r_bz2_(u64 f, u64 u, u64 v) { + return r_bz1_(f, u) ^ (r_bz1_(f >> 2, u) & v); +} + +/* f0 ^ + * f1 & u ^ + * f2 & v ^ + * f3 & u & v ^ + * f4 & w ^ + * f5 & u & w ^ + * f6 & v & w ^ + * f7 & u & v & w */ +static inline u64 +r_bz3_(u64 f, u64 u, u64 v, u64 w) { + return r_bz2_(f, u, v) ^ (r_bz2_(f >> 4, u, v) & w); +} + +static inline u64 r_bz0(u8 f) { return r_bz0_(f); } +static inline u64 r_bz1(u8 f, u64 u) { return r_bz1_(f, u); } +static inline u64 r_bz2(u8 f, u64 u, u64 v) { return r_bz2_(f, u, v); } +static inline u64 r_bz3(u8 f, u64 u, u64 v, u64 w) { return r_bz3_(f, u, v, w); } /* ----- Population count ----- */ diff --git a/src/bits.c b/src/bits.c @@ -1,63 +0,0 @@ -#include "bits.h" -#include "rcx.h" - -/* To perform a bit zip with n inputs, we evaluate, over the integers mod 2 - * (where XOR is addition and AND is multiplication), a recursively-structured - * polynomial with n variables, 2^n terms, and coefficients f0,f1,...,f(2^n-1), - * the bits of the zipping function f. The pattern is illustrated below for the - * first few values of n. In principle, we do this independently for each of - * the 64 n-tuples of input bits, but C's bit-wise operators let us do all 64 - * evaluations in parallel. */ - -/* This is the constant polynomial f0. We use a two's complement trick to - * broadcast f0 to an entire u64. */ -static inline u64 bz0(u64 f) { - return !(f & 1u) - 1u; -} - -/* f0 ^ - * f1 & u */ -static inline u64 bz1(u64 f, u64 u) { - return bz0(f) ^ (bz0(f >> 1) & u); -} - -/* f0 ^ - * f1 & u ^ - * f2 & v ^ - * f3 & u & v */ -static inline u64 bz2(u64 f, u64 u, u64 v) { - return bz1(f, u) ^ (bz1(f >> 2, u) & v); -} - -/* f0 ^ - * f1 & u ^ - * f2 & v ^ - * f3 & u & v ^ - * f4 & w ^ - * f5 & u & w ^ - * f6 & v & w ^ - * f7 & u & v & w */ -static inline u64 bz3(u64 f, u64 u, u64 v, u64 w) { - return bz2(f, u, v) ^ (bz2(f >> 4, u, v) & w); -} - -/* Etc... */ -static inline u64 bz4(u64 f, u64 u, u64 v, u64 w, u64 x) { - return bz3(f, u, v, w) ^ (bz3(f >> 8, u, v, w) & x); -} - -static inline u64 bz5(u64 f, u64 u, u64 v, u64 w, u64 x, u64 y) { - return bz4(f, u, v, w, x) ^ (bz4(f >> 16, u, v, w, x) & y); -} - -static inline u64 bz6(u64 f, u64 u, u64 v, u64 w, u64 x, u64 y, u64 z) { - return bz5(f, u, v, w, x, y) ^ (bz5(f >> 32, u, v, w, x, y) & z); -} - -u64 r_bz0(u64 f) { return bz0(f); } -u64 r_bz1(u64 f, u64 u) { return bz1(f, u); } -u64 r_bz2(u64 f, u64 u, u64 v) { return bz2(f, u, v); } -u64 r_bz3(u64 f, u64 u, u64 v, u64 w) { return bz3(f, u, v, w); } -u64 r_bz4(u64 f, u64 u, u64 v, u64 w, u64 x) { return bz4(f, u, v, w, x); } -u64 r_bz5(u64 f, u64 u, u64 v, u64 w, u64 x, u64 y) { return bz5(f, u, v, w, x, y); } -u64 r_bz6(u64 f, u64 u, u64 v, u64 w, u64 x, u64 y, u64 z) { return bz6(f, u, v, w, x, y, z); }