bigmul

big multiplication in C
git clone git://git.rr3.xyz/bigmul
Log | Files | Refs | README | LICENSE

commit 6929edbb9449c0929418f06fc6137aafc4b342cf
Author: Robert Russell <robert@rr3.xyz>
Date:   Tue, 31 Dec 2024 19:13:37 -0800

Add apparently working naive and Karatsuba multipliers

Diffstat:
A.gitignore | 2++
ALICENSE | 16++++++++++++++++
AMakefile | 11+++++++++++
Abigmul.c | 147+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aconfig.mk | 3+++
5 files changed, 179 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1 @@ +bigmul +\ No newline at end of file diff --git a/LICENSE b/LICENSE @@ -0,0 +1,15 @@ +ISC License + +Copyright (c) 2024, Robert Russell + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +\ No newline at end of file diff --git a/Makefile b/Makefile @@ -0,0 +1,11 @@ +.POSIX: + +include config.mk + +bigmul: bigmul.c config.mk + $(CC) $(CFLAGS) -o $@ $< -lrcx + +clean: + rm -f bigmul + +.PHONY: clean diff --git a/bigmul.c b/bigmul.c @@ -0,0 +1,147 @@ +#include <rcx/all.h> +#include <stdio.h> +#include <unistd.h> + +struct nat { + usize cap; + usize len; + u64 dat[]; +}; + +typedef struct nat Nat[1]; + +// TODO: Make sure that GCC inlines and compiles {add,sub}64 well. +// Try __builtin_{add,sub}c{,l,ll}. + +inline void +add64(u64 *co, u64 *r, u64 x, u64 y, u64 ci) { + u128 cr = (u128)x + (u128)y + (u128)ci; + *co = cr >> 64; + *r = cr; +} + +inline void +sub64(u64 *bo, u64 *r, u64 x, u64 y, u64 bi) { + u128 br = (u128)x - (u128)y - (u128)bi; + *bo = -(br >> 64); + *r = br; +} + +inline void +mul64(u64 *rh, u64 *rl, u64 x, u64 y) { + u128 r = (u128)x * (u128)y; + *rh = r >> 64; + *rl = r; +} + +/* +inline void +fma64(u64 *rh, u64 *rl, u64 x, u64 y, u64 z) { + u64 h, l, c; + mul64(&h, &l, x, y); + add64(&c, rl, l, z, 0); + *rh = h + c; +} +*/ + +inline void +fmaa64(u64 *rh, u64 *rl, u64 w, u64 x, u64 y, u64 z) { + u64 h, l, c, d; + mul64(&h, &l, w, x); // [h, l] = w * x + add64(&c, &l, l, y, 0); // [c, l] = l + y + add64(&d, rl, l, z, 0); // [d, rl] = l + z + *rh = h + c + d; // rh = h + c + d +} + +// Precondition: m >= n +void +add(u64 *r, u64 *x, usize m, u64 *y, usize n) { + u64 c = 0; + for (usize i = 0; i < n; i++) + add64(&c, &r[i], x[i], y[i], c); + for (usize i = n; i < m; i++) + add64(&c, &r[i], x[i], c, 0); + r[m] = c; +} + +// Precondition: m >= n +// TODO: sub is not commutative like add, so we need a "bus" operation +// ("sub" backwards) for when m < n. +void +sub(u64 *r, u64 *x, usize m, u64 *y, usize n) { + u64 b = 0; + for (usize i = 0; i < n; i++) + sub64(&b, &r[i], x[i], y[i], b); + for (usize i = n; i < m; i++) + sub64(&b, &r[i], x[i], b, 0); + r[m] = -b; +} + +// Precondition: r does not intersect x nor y +void +mul_quadratic(u64 *r, u64 *x, usize m, u64 *y, usize n) { + memset(r, 0, (m + n) * sizeof *r); + for (usize j = 0; j < n; j++) { + u64 c = 0; + for (usize i = 0; i < m; i++) + fmaa64(&c, &r[i + j], x[i], y[j], r[i + j], c); + r[m + j] = c; + } +} + +// Precondition: r does not intersect x nor y +void +mul_karatsuba(u64 *r, u64 *x, usize m, u64 *y, usize n) { + // If we allow m < 4 and n < 4, then the recursion is not well-founded. + // TODO: Determine best threshold for quadratic. + if (m < 4 && n < 4) { + mul_quadratic(r, x, m, y, n); + return; + } + + usize k = (MAX(m, n) + 1) / 2; + + // 1. Split x + usize mh = m > k ? m - k : 0, ml = MIN(k, m); + u64 *xh = x + ml, *xl = x; // x = xh * b^k + xl + + // 2. Split y + usize nh = n > k ? n - k : 0, nl = MIN(k, n); + u64 *yh = y + nl, *yl = y; // y = yh * b^k + yl + + // 3. Assign/allocate memory + // Note that we use the output buffer r as temporary storage for + // intermediate results. + // TODO: Accept capacity of r as an argument, and use excess memory for t + // if it's big enough instead of allocating. Or at least allocate once in + // the first recursive instances of the function, and pass the memory down. + usize rw = m + n; + usize pw = MAX(ml, mh) + 1; u64 *p = r; + usize qw = MAX(nl, nh) + 1; u64 *q = r + pw; + usize tw = pw + qw; u64 *t = r_eallocn(tw, sizeof *t); + usize uw = ml + nl; u64 *u = r; + usize sw = mh + mh; u64 *s = r + 2 * k; + + // 4. Arithmetic + // TODO: Explain algorithm. + add(p, xl, ml, xh, mh); // p = xl + xh + add(q, yl, nl, yh, nh); // q = yl + yh + mul_karatsuba(t, p, pw, q, qw); // t = p * q + mul_karatsuba(u, xl, ml, yl, nl); // u = xl * yl + mul_karatsuba(s, xh, mh, yh, nh); // s = xh * yh + sub(t, t, tw, u, uw); // t -= u + sub(t, t, tw, s, sw); // t -= s + add(r + k, r + k, rw - k, t, tw); // r[k..] += t + + free(t); +} + +int +main(void) { + u64 x[] = { 0x1234123412341234, 0x5678567856785678, 0x89ab89ab89ab89ab, 0xcdefcdefcdefcdef }; + u64 y[] = { 0x4321432143214321, 0x8765876587658765, 0xba98ba98ba98ba98, 0xfedcfedcfedcfedc }; + u64 r0[LEN(x) + LEN(y)]; mul_quadratic(r0, x, LEN(x), y, LEN(y)); + u64 r1[LEN(x) + LEN(y)]; mul_karatsuba(r1, x, LEN(x), y, LEN(y)); + printf("0x%016lx%016lx%016lx%016lx%016lx%016lx%016lx%016lx\n", r0[7], r0[6], r0[5], r0[4], r0[3], r0[2], r0[1], r0[0]); + printf("0x%016lx%016lx%016lx%016lx%016lx%016lx%016lx%016lx\n", r1[7], r1[6], r1[5], r1[4], r1[3], r1[2], r1[1], r1[0]); +} diff --git a/config.mk b/config.mk @@ -0,0 +1,3 @@ +CFLAGS = -Wall -O3 -march=native + +CC = cc