commit 6e3d696e92f9c04a13224de6429e98c5b6d3b05b
parent 0014ba79a8b1c54889853961c5d3ad261cb0140f
Author: Robert Russell <robertrussell.72001@gmail.com>
Date: Sun, 21 May 2023 18:22:41 -0700
Make all bits functions static inline
Diffstat:
| M | Makefile | | | 2 | -- |
| M | inc/bits.h | | | 64 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- |
| D | src/bits.c | | | 63 | --------------------------------------------------------------- |
3 files changed, 59 insertions(+), 70 deletions(-)
diff --git a/Makefile b/Makefile
@@ -5,7 +5,6 @@ include config.mk
SRC =\
src/alloc.c\
src/bench.c\
- src/bits.c\
src/buffer.c\
src/debug.c\
src/log.c\
@@ -25,7 +24,6 @@ librcx.a: $(SRC:.c=.o)
src/alloc.o: src/alloc.c inc/alloc.h inc/def.h inc/log.h inc/rcx.h inc/internal/util.h config.mk
src/bench.o: src/bench.c inc/bench.h inc/def.h inc/log.h inc/rcx.h config.mk
-src/bits.o: src/bits.c inc/bits.h inc/def.h inc/rcx.h config.mk
src/buffer.o: src/buffer.c inc/alloc.h inc/buffer.h inc/debug.h inc/def.h inc/log.h inc/rcx.h inc/string.h config.mk
src/debug.o: src/debug.c inc/debug.h inc/def.h inc/rcx.h config.mk
src/log.o: src/log.c inc/def.h inc/log.h inc/rcx.h config.mk
diff --git a/inc/bits.h b/inc/bits.h
@@ -4,14 +4,68 @@
#include "def.h"
-int r_popcnt8(u8 n);
-int r_popcnt16(u16 n);
-int r_popcnt32(u32 n);
-int r_popcnt64(u64 n);
+/* The popcnt functions are implemented with a divide and conquer strategy.
+ * See Henry Warren, Hacker's Delight, 2 ed., sec. 5.1. */
+
+static inline int
+r_popcnt8(u8 n) {
+ n = (n & U8_C(0x55)) + ((n>>1) & U8_C(0x55));
+ n = (n & U8_C(0x33)) + ((n>>2) & U8_C(0x33));
+ n = (n & U8_C(0x0f)) + ((n>>4) & U8_C(0x0f));
+ return n;
+}
+
+static inline int
+r_popcnt16(u16 n) {
+ n = (n & U16_C(0x5555)) + ((n>>1) & U16_C(0x5555));
+ n = (n & U16_C(0x3333)) + ((n>>2) & U16_C(0x3333));
+ n = (n & U16_C(0x0f0f)) + ((n>>4) & U16_C(0x0f0f));
+ n = (n & U16_C(0x00ff)) + ((n>>8) & U16_C(0x00ff));
+ return n;
+}
+
+static inline int
+r_popcnt32(u32 n) {
+ n = (n & U32_C(0x55555555)) + ((n>>1) & U32_C(0x55555555));
+ n = (n & U32_C(0x33333333)) + ((n>>2) & U32_C(0x33333333));
+ n = (n & U32_C(0x0f0f0f0f)) + ((n>>4) & U32_C(0x0f0f0f0f));
+ n = (n & U32_C(0x00ff00ff)) + ((n>>8) & U32_C(0x00ff00ff));
+ n = (n & U32_C(0x0000ffff)) + ((n>>16) & U32_C(0x0000ffff));
+ return n;
+}
+
+static inline int
+r_popcnt64(u64 n) {
+ n = (n & U64_C(0x5555555555555555)) + ((n>>1) & U64_C(0x5555555555555555));
+ n = (n & U64_C(0x3333333333333333)) + ((n>>2) & U64_C(0x3333333333333333));
+ n = (n & U64_C(0x0f0f0f0f0f0f0f0f)) + ((n>>4) & U64_C(0x0f0f0f0f0f0f0f0f));
+ n = (n & U64_C(0x00ff00ff00ff00ff)) + ((n>>8) & U64_C(0x00ff00ff00ff00ff));
+ n = (n & U64_C(0x0000ffff0000ffff)) + ((n>>16) & U64_C(0x0000ffff0000ffff));
+ n = (n & U64_C(0x00000000ffffffff)) + ((n>>32) & U64_C(0x00000000ffffffff));
+ return n;
+}
/* Perform a full-width multiply of x and y, storing the upper (resp., lower)
* 64 bits of the product in *h (resp., *l). */
-void r_mul64(u64 *h, u64 *l, u64 x, u64 y);
+static inline void
+r_mul64(u64 *h, u64 *l, u64 x, u64 y) {
+ const u64 m = (U64_C(1)<<32) - 1;
+
+ u64 x0 = x & m;
+ u64 x1 = x >> 32;
+ u64 y0 = y & m;
+ u64 y1 = y >> 32;
+
+ u64 x0y0 = x0 * y0;
+ u64 x0y1 = x0 * y1;
+ u64 x1y0 = x1 * y0;
+ u64 x1y1 = x1 * y1;
+
+ u64 c = ((x0y1&m) + (x1y0&m) + (x0y0>>32)) >> 32;
+
+ *l = x0y0 + (x0y1<<32) + (x1y0<<32);
+ *h = x1y1 + (x0y1>>32) + (x1y0>>32) + c;
+}
static inline u16 r_read16b(u8 *p) { return ((u16)p[0] << 8) | ((u16)p[1] << 0); }
static inline u16 r_read16l(u8 *p) { return ((u16)p[1] << 8) | ((u16)p[0] << 0); }
diff --git a/src/bits.c b/src/bits.c
@@ -1,63 +0,0 @@
-#include "bits.h"
-#include "rcx.h"
-
-/* popcnt functions are implemented with a divide and conquer strategy.
- * See Henry Warren, Hacker's Delight, 2 ed., sec. 5.1. */
-
-int
-r_popcnt8(u8 n) {
- n = (n & U8_C(0x55)) + ((n>>1) & U8_C(0x55));
- n = (n & U8_C(0x33)) + ((n>>2) & U8_C(0x33));
- n = (n & U8_C(0x0f)) + ((n>>4) & U8_C(0x0f));
- return n;
-}
-
-int
-r_popcnt16(u16 n) {
- n = (n & U16_C(0x5555)) + ((n>>1) & U16_C(0x5555));
- n = (n & U16_C(0x3333)) + ((n>>2) & U16_C(0x3333));
- n = (n & U16_C(0x0f0f)) + ((n>>4) & U16_C(0x0f0f));
- n = (n & U16_C(0x00ff)) + ((n>>8) & U16_C(0x00ff));
- return n;
-}
-
-int
-r_popcnt32(u32 n) {
- n = (n & U32_C(0x55555555)) + ((n>>1) & U32_C(0x55555555));
- n = (n & U32_C(0x33333333)) + ((n>>2) & U32_C(0x33333333));
- n = (n & U32_C(0x0f0f0f0f)) + ((n>>4) & U32_C(0x0f0f0f0f));
- n = (n & U32_C(0x00ff00ff)) + ((n>>8) & U32_C(0x00ff00ff));
- n = (n & U32_C(0x0000ffff)) + ((n>>16) & U32_C(0x0000ffff));
- return n;
-}
-
-int
-r_popcnt64(u64 n) {
- n = (n & U64_C(0x5555555555555555)) + ((n>>1) & U64_C(0x5555555555555555));
- n = (n & U64_C(0x3333333333333333)) + ((n>>2) & U64_C(0x3333333333333333));
- n = (n & U64_C(0x0f0f0f0f0f0f0f0f)) + ((n>>4) & U64_C(0x0f0f0f0f0f0f0f0f));
- n = (n & U64_C(0x00ff00ff00ff00ff)) + ((n>>8) & U64_C(0x00ff00ff00ff00ff));
- n = (n & U64_C(0x0000ffff0000ffff)) + ((n>>16) & U64_C(0x0000ffff0000ffff));
- n = (n & U64_C(0x00000000ffffffff)) + ((n>>32) & U64_C(0x00000000ffffffff));
- return n;
-}
-
-void
-r_mul64(u64 *h, u64 *l, u64 x, u64 y) {
- const u64 m = (U64_C(1)<<32) - 1;
-
- u64 x0 = x & m;
- u64 x1 = x >> 32;
- u64 y0 = y & m;
- u64 y1 = y >> 32;
-
- u64 x0y0 = x0 * y0;
- u64 x0y1 = x0 * y1;
- u64 x1y0 = x1 * y0;
- u64 x1y1 = x1 * y1;
-
- u64 c = ((x0y1&m) + (x1y0&m) + (x0y0>>32)) >> 32;
-
- *l = x0y0 + (x0y1<<32) + (x1y0<<32);
- *h = x1y1 + (x0y1>>32) + (x1y0>>32) + c;
-}