commit d4c32369f4df75861e97d824cf891a942d71e7ed
parent 32b4bbda66aba0bc90e74463a073d6e4e4ef2b5a
Author: Robert Russell <robert@rr3.xyz>
Date: Thu, 2 Jan 2025 12:52:44 -0800
Fix invalid use of __builtin_addcl in fmaa64
Diffstat:
| M | bigmul.c | | | 30 | +++++++++++++++--------------- |
1 file changed, 15 insertions(+), 15 deletions(-)
diff --git a/bigmul.c b/bigmul.c
@@ -28,10 +28,12 @@ mul64(u64 *rh, u64 *rl, u64 x, u64 y) {
inline void
fmaa64(u64 *rh, u64 *rl, u64 w, u64 x, u64 y, u64 z) {
- u64 h0, h1, l;
- mul64(&h0, &l, w, x); // h0:l = w * x
- *rl = __builtin_addcl(l, y, z, &h1); // h1:rl = l + y + z
- *rh = h0 + h1;
+ u64 h0, h1, h2, l;
+ mul64(&h0, &l, w, x); // h0:l = w * x
+ l = __builtin_addcl(l, y, 0, &h1); // h1:l = l + y
+ l = __builtin_addcl(l, z, 0, &h2); // h2:l = l + z
+ *rh = h0 + h1 + h2;
+ *rl = l;
}
@@ -272,27 +274,25 @@ void bench_karatsuba4096(u64 n) { bench_karatsuba(4096, n); }
int
main(void) {
-/*
u64 x[] = { 0x1234123412341234, 0x5678567856785678, 0x89ab89ab89ab89ab, 0xcdefcdefcdefcdef };
u64 y[] = { 0x4321432143214321, 0x8765876587658765, 0xba98ba98ba98ba98, 0xfedcfedcfedcfedc };
u64 r0[LEN(x) + LEN(y)]; mul_quadratic(r0, x, LEN(x), y, LEN(y));
u64 r1[LEN(x) + LEN(y)]; mul_karatsuba(r1, x, LEN(x), y, LEN(y));
printf("0x%016lx%016lx%016lx%016lx%016lx%016lx%016lx%016lx\n", r0[7], r0[6], r0[5], r0[4], r0[3], r0[2], r0[1], r0[0]);
printf("0x%016lx%016lx%016lx%016lx%016lx%016lx%016lx%016lx\n", r1[7], r1[6], r1[5], r1[4], r1[3], r1[2], r1[1], r1[0]);
-*/
for (usize i = 0; i < LEN(x); i++) x[i] = r_prand64();
for (usize i = 0; i < LEN(y); i++) y[i] = r_prand64();
- // r_bench(bench_quadratic16, 1000);
- // r_bench(bench_quadratic32, 1000);
- // r_bench(bench_quadratic64, 1000);
- // r_bench(bench_quadratic128, 1000);
- // r_bench(bench_quadratic256, 1000);
- // r_bench(bench_quadratic512, 1000);
- // r_bench(bench_quadratic1024, 1000);
- // r_bench(bench_quadratic2048, 1000);
- // r_bench(bench_quadratic4096, 1000);
+ r_bench(bench_quadratic16, 1000);
+ r_bench(bench_quadratic32, 1000);
+ r_bench(bench_quadratic64, 1000);
+ r_bench(bench_quadratic128, 1000);
+ r_bench(bench_quadratic256, 1000);
+ r_bench(bench_quadratic512, 1000);
+ r_bench(bench_quadratic1024, 1000);
+ r_bench(bench_quadratic2048, 1000);
+ r_bench(bench_quadratic4096, 1000);
r_bench(bench_karatsuba16, 1000);
r_bench(bench_karatsuba32, 1000);