bigmul

big multiplication in C
git clone git://git.rr3.xyz/bigmul
Log | Files | Refs | README | LICENSE

commit d4c32369f4df75861e97d824cf891a942d71e7ed
parent 32b4bbda66aba0bc90e74463a073d6e4e4ef2b5a
Author: Robert Russell <robert@rr3.xyz>
Date:   Thu,  2 Jan 2025 12:52:44 -0800

Fix invalid use of __builtin_addcl in fmaa64

Diffstat:
Mbigmul.c | 30+++++++++++++++---------------
1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/bigmul.c b/bigmul.c @@ -28,10 +28,12 @@ mul64(u64 *rh, u64 *rl, u64 x, u64 y) { inline void fmaa64(u64 *rh, u64 *rl, u64 w, u64 x, u64 y, u64 z) { - u64 h0, h1, l; - mul64(&h0, &l, w, x); // h0:l = w * x - *rl = __builtin_addcl(l, y, z, &h1); // h1:rl = l + y + z - *rh = h0 + h1; + u64 h0, h1, h2, l; + mul64(&h0, &l, w, x); // h0:l = w * x + l = __builtin_addcl(l, y, 0, &h1); // h1:l = l + y + l = __builtin_addcl(l, z, 0, &h2); // h2:l = l + z + *rh = h0 + h1 + h2; + *rl = l; } @@ -272,27 +274,25 @@ void bench_karatsuba4096(u64 n) { bench_karatsuba(4096, n); } int main(void) { -/* u64 x[] = { 0x1234123412341234, 0x5678567856785678, 0x89ab89ab89ab89ab, 0xcdefcdefcdefcdef }; u64 y[] = { 0x4321432143214321, 0x8765876587658765, 0xba98ba98ba98ba98, 0xfedcfedcfedcfedc }; u64 r0[LEN(x) + LEN(y)]; mul_quadratic(r0, x, LEN(x), y, LEN(y)); u64 r1[LEN(x) + LEN(y)]; mul_karatsuba(r1, x, LEN(x), y, LEN(y)); printf("0x%016lx%016lx%016lx%016lx%016lx%016lx%016lx%016lx\n", r0[7], r0[6], r0[5], r0[4], r0[3], r0[2], r0[1], r0[0]); printf("0x%016lx%016lx%016lx%016lx%016lx%016lx%016lx%016lx\n", r1[7], r1[6], r1[5], r1[4], r1[3], r1[2], r1[1], r1[0]); -*/ for (usize i = 0; i < LEN(x); i++) x[i] = r_prand64(); for (usize i = 0; i < LEN(y); i++) y[i] = r_prand64(); - // r_bench(bench_quadratic16, 1000); - // r_bench(bench_quadratic32, 1000); - // r_bench(bench_quadratic64, 1000); - // r_bench(bench_quadratic128, 1000); - // r_bench(bench_quadratic256, 1000); - // r_bench(bench_quadratic512, 1000); - // r_bench(bench_quadratic1024, 1000); - // r_bench(bench_quadratic2048, 1000); - // r_bench(bench_quadratic4096, 1000); + r_bench(bench_quadratic16, 1000); + r_bench(bench_quadratic32, 1000); + r_bench(bench_quadratic64, 1000); + r_bench(bench_quadratic128, 1000); + r_bench(bench_quadratic256, 1000); + r_bench(bench_quadratic512, 1000); + r_bench(bench_quadratic1024, 1000); + r_bench(bench_quadratic2048, 1000); + r_bench(bench_quadratic4096, 1000); r_bench(bench_karatsuba16, 1000); r_bench(bench_karatsuba32, 1000);