rcx

library of miscellaneous bits of C code
git clone git://git.rr3.xyz/rcx
Log | Files | Refs | README | LICENSE

commit 6e3d696e92f9c04a13224de6429e98c5b6d3b05b
parent 0014ba79a8b1c54889853961c5d3ad261cb0140f
Author: Robert Russell <robertrussell.72001@gmail.com>
Date:   Sun, 21 May 2023 18:22:41 -0700

Make all bits functions static inline

Diffstat:
MMakefile | 2--
Minc/bits.h | 64+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
Dsrc/bits.c | 63---------------------------------------------------------------
3 files changed, 59 insertions(+), 70 deletions(-)

diff --git a/Makefile b/Makefile @@ -5,7 +5,6 @@ include config.mk SRC =\ src/alloc.c\ src/bench.c\ - src/bits.c\ src/buffer.c\ src/debug.c\ src/log.c\ @@ -25,7 +24,6 @@ librcx.a: $(SRC:.c=.o) src/alloc.o: src/alloc.c inc/alloc.h inc/def.h inc/log.h inc/rcx.h inc/internal/util.h config.mk src/bench.o: src/bench.c inc/bench.h inc/def.h inc/log.h inc/rcx.h config.mk -src/bits.o: src/bits.c inc/bits.h inc/def.h inc/rcx.h config.mk src/buffer.o: src/buffer.c inc/alloc.h inc/buffer.h inc/debug.h inc/def.h inc/log.h inc/rcx.h inc/string.h config.mk src/debug.o: src/debug.c inc/debug.h inc/def.h inc/rcx.h config.mk src/log.o: src/log.c inc/def.h inc/log.h inc/rcx.h config.mk diff --git a/inc/bits.h b/inc/bits.h @@ -4,14 +4,68 @@ #include "def.h" -int r_popcnt8(u8 n); -int r_popcnt16(u16 n); -int r_popcnt32(u32 n); -int r_popcnt64(u64 n); +/* The popcnt functions are implemented with a divide and conquer strategy. + * See Henry Warren, Hacker's Delight, 2 ed., sec. 5.1. */ + +static inline int +r_popcnt8(u8 n) { + n = (n & U8_C(0x55)) + ((n>>1) & U8_C(0x55)); + n = (n & U8_C(0x33)) + ((n>>2) & U8_C(0x33)); + n = (n & U8_C(0x0f)) + ((n>>4) & U8_C(0x0f)); + return n; +} + +static inline int +r_popcnt16(u16 n) { + n = (n & U16_C(0x5555)) + ((n>>1) & U16_C(0x5555)); + n = (n & U16_C(0x3333)) + ((n>>2) & U16_C(0x3333)); + n = (n & U16_C(0x0f0f)) + ((n>>4) & U16_C(0x0f0f)); + n = (n & U16_C(0x00ff)) + ((n>>8) & U16_C(0x00ff)); + return n; +} + +static inline int +r_popcnt32(u32 n) { + n = (n & U32_C(0x55555555)) + ((n>>1) & U32_C(0x55555555)); + n = (n & U32_C(0x33333333)) + ((n>>2) & U32_C(0x33333333)); + n = (n & U32_C(0x0f0f0f0f)) + ((n>>4) & U32_C(0x0f0f0f0f)); + n = (n & U32_C(0x00ff00ff)) + ((n>>8) & U32_C(0x00ff00ff)); + n = (n & U32_C(0x0000ffff)) + ((n>>16) & U32_C(0x0000ffff)); + return n; +} + +static inline int +r_popcnt64(u64 n) { + n = (n & U64_C(0x5555555555555555)) + ((n>>1) & U64_C(0x5555555555555555)); + n = (n & U64_C(0x3333333333333333)) + ((n>>2) & U64_C(0x3333333333333333)); + n = (n & U64_C(0x0f0f0f0f0f0f0f0f)) + ((n>>4) & U64_C(0x0f0f0f0f0f0f0f0f)); + n = (n & U64_C(0x00ff00ff00ff00ff)) + ((n>>8) & U64_C(0x00ff00ff00ff00ff)); + n = (n & U64_C(0x0000ffff0000ffff)) + ((n>>16) & U64_C(0x0000ffff0000ffff)); + n = (n & U64_C(0x00000000ffffffff)) + ((n>>32) & U64_C(0x00000000ffffffff)); + return n; +} /* Perform a full-width multiply of x and y, storing the upper (resp., lower) * 64 bits of the product in *h (resp., *l). */ -void r_mul64(u64 *h, u64 *l, u64 x, u64 y); +static inline void +r_mul64(u64 *h, u64 *l, u64 x, u64 y) { + const u64 m = (U64_C(1)<<32) - 1; + + u64 x0 = x & m; + u64 x1 = x >> 32; + u64 y0 = y & m; + u64 y1 = y >> 32; + + u64 x0y0 = x0 * y0; + u64 x0y1 = x0 * y1; + u64 x1y0 = x1 * y0; + u64 x1y1 = x1 * y1; + + u64 c = ((x0y1&m) + (x1y0&m) + (x0y0>>32)) >> 32; + + *l = x0y0 + (x0y1<<32) + (x1y0<<32); + *h = x1y1 + (x0y1>>32) + (x1y0>>32) + c; +} static inline u16 r_read16b(u8 *p) { return ((u16)p[0] << 8) | ((u16)p[1] << 0); } static inline u16 r_read16l(u8 *p) { return ((u16)p[1] << 8) | ((u16)p[0] << 0); } diff --git a/src/bits.c b/src/bits.c @@ -1,63 +0,0 @@ -#include "bits.h" -#include "rcx.h" - -/* popcnt functions are implemented with a divide and conquer strategy. - * See Henry Warren, Hacker's Delight, 2 ed., sec. 5.1. */ - -int -r_popcnt8(u8 n) { - n = (n & U8_C(0x55)) + ((n>>1) & U8_C(0x55)); - n = (n & U8_C(0x33)) + ((n>>2) & U8_C(0x33)); - n = (n & U8_C(0x0f)) + ((n>>4) & U8_C(0x0f)); - return n; -} - -int -r_popcnt16(u16 n) { - n = (n & U16_C(0x5555)) + ((n>>1) & U16_C(0x5555)); - n = (n & U16_C(0x3333)) + ((n>>2) & U16_C(0x3333)); - n = (n & U16_C(0x0f0f)) + ((n>>4) & U16_C(0x0f0f)); - n = (n & U16_C(0x00ff)) + ((n>>8) & U16_C(0x00ff)); - return n; -} - -int -r_popcnt32(u32 n) { - n = (n & U32_C(0x55555555)) + ((n>>1) & U32_C(0x55555555)); - n = (n & U32_C(0x33333333)) + ((n>>2) & U32_C(0x33333333)); - n = (n & U32_C(0x0f0f0f0f)) + ((n>>4) & U32_C(0x0f0f0f0f)); - n = (n & U32_C(0x00ff00ff)) + ((n>>8) & U32_C(0x00ff00ff)); - n = (n & U32_C(0x0000ffff)) + ((n>>16) & U32_C(0x0000ffff)); - return n; -} - -int -r_popcnt64(u64 n) { - n = (n & U64_C(0x5555555555555555)) + ((n>>1) & U64_C(0x5555555555555555)); - n = (n & U64_C(0x3333333333333333)) + ((n>>2) & U64_C(0x3333333333333333)); - n = (n & U64_C(0x0f0f0f0f0f0f0f0f)) + ((n>>4) & U64_C(0x0f0f0f0f0f0f0f0f)); - n = (n & U64_C(0x00ff00ff00ff00ff)) + ((n>>8) & U64_C(0x00ff00ff00ff00ff)); - n = (n & U64_C(0x0000ffff0000ffff)) + ((n>>16) & U64_C(0x0000ffff0000ffff)); - n = (n & U64_C(0x00000000ffffffff)) + ((n>>32) & U64_C(0x00000000ffffffff)); - return n; -} - -void -r_mul64(u64 *h, u64 *l, u64 x, u64 y) { - const u64 m = (U64_C(1)<<32) - 1; - - u64 x0 = x & m; - u64 x1 = x >> 32; - u64 y0 = y & m; - u64 y1 = y >> 32; - - u64 x0y0 = x0 * y0; - u64 x0y1 = x0 * y1; - u64 x1y0 = x1 * y0; - u64 x1y1 = x1 * y1; - - u64 c = ((x0y1&m) + (x1y0&m) + (x0y0>>32)) >> 32; - - *l = x0y0 + (x0y1<<32) + (x1y0<<32); - *h = x1y1 + (x0y1>>32) + (x1y0>>32) + c; -}