Add apparently working naive and Karatsuba multipliers - bigmul

commit 6929edbb9449c0929418f06fc6137aafc4b342cf
Author: Robert Russell <robert@rr3.xyz>
Date:   Tue, 31 Dec 2024 19:13:37 -0800

Add apparently working naive and Karatsuba multipliers

Diffstat:
A .gitignore  | 2 ++
A LICENSE  | 16 ++++++++++++++++
A Makefile  | 11 +++++++++++
A bigmul.c  | 147 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A config.mk  | 3 +++

5 files changed, 179 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+bigmul
+\ No newline at end of file
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,15 @@
+ISC License
+
+Copyright (c) 2024, Robert Russell
+
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+\ No newline at end of file
diff --git a/Makefile b/Makefile
@@ -0,0 +1,11 @@
+.POSIX:
+
+include config.mk
+
+bigmul: bigmul.c config.mk
+	$(CC) $(CFLAGS) -o $@ $< -lrcx
+
+clean:
+	rm -f bigmul
+
+.PHONY: clean
diff --git a/bigmul.c b/bigmul.c
@@ -0,0 +1,147 @@
+#include <rcx/all.h>
+#include <stdio.h>
+#include <unistd.h>
+
+struct nat {
+	usize cap;
+	usize len;
+	u64 dat[];
+};
+
+typedef struct nat Nat[1];
+
+// TODO: Make sure that GCC inlines and compiles {add,sub}64 well.
+// Try __builtin_{add,sub}c{,l,ll}.
+
+inline void
+add64(u64 *co, u64 *r, u64 x, u64 y, u64 ci) {
+	u128 cr = (u128)x + (u128)y + (u128)ci;
+	*co = cr >> 64;
+	*r = cr;
+}
+
+inline void
+sub64(u64 *bo, u64 *r, u64 x, u64 y, u64 bi) {
+	u128 br = (u128)x - (u128)y - (u128)bi;
+	*bo = -(br >> 64);
+	*r = br;
+}
+
+inline void
+mul64(u64 *rh, u64 *rl, u64 x, u64 y) {
+	u128 r = (u128)x * (u128)y;
+	*rh = r >> 64;
+	*rl = r;
+}
+
+/*
+inline void
+fma64(u64 *rh, u64 *rl, u64 x, u64 y, u64 z) {
+	u64 h, l, c;
+	mul64(&h, &l, x, y);
+	add64(&c, rl, l, z, 0);
+	*rh = h + c;
+}
+*/
+
+inline void
+fmaa64(u64 *rh, u64 *rl, u64 w, u64 x, u64 y, u64 z) {
+	u64 h, l, c, d;
+	mul64(&h, &l, w, x);     // [h,  l] = w * x
+	add64(&c, &l, l, y, 0);  // [c,  l] = l + y
+	add64(&d, rl, l, z, 0);  // [d, rl] = l + z
+	*rh = h + c + d;         // rh = h + c + d
+}
+
+// Precondition: m >= n
+void
+add(u64 *r, u64 *x, usize m, u64 *y, usize n) {
+	u64 c = 0;
+	for (usize i = 0; i < n; i++)
+		add64(&c, &r[i], x[i], y[i], c);
+	for (usize i = n; i < m; i++)
+		add64(&c, &r[i], x[i], c, 0);
+	r[m] = c;
+}
+
+// Precondition: m >= n
+// TODO: sub is not commutative like add, so we need a "bus" operation
+// ("sub" backwards) for when m < n.
+void
+sub(u64 *r, u64 *x, usize m, u64 *y, usize n) {
+	u64 b = 0;
+	for (usize i = 0; i < n; i++)
+		sub64(&b, &r[i], x[i], y[i], b);
+	for (usize i = n; i < m; i++)
+		sub64(&b, &r[i], x[i], b, 0);
+	r[m] = -b;
+}
+
+// Precondition: r does not intersect x nor y
+void
+mul_quadratic(u64 *r, u64 *x, usize m, u64 *y, usize n) {
+	memset(r, 0, (m + n) * sizeof *r);
+	for (usize j = 0; j < n; j++) {
+		u64 c = 0;
+		for (usize i = 0; i < m; i++)
+			fmaa64(&c, &r[i + j], x[i], y[j], r[i + j], c);
+		r[m + j] = c;
+	}
+}
+
+// Precondition: r does not intersect x nor y
+void
+mul_karatsuba(u64 *r, u64 *x, usize m, u64 *y, usize n) {
+	// If we allow m < 4 and n < 4, then the recursion is not well-founded.
+	// TODO: Determine best threshold for quadratic.
+	if (m < 4 && n < 4) {
+		mul_quadratic(r, x, m, y, n);
+		return;
+	}
+
+	usize k = (MAX(m, n) + 1) / 2;
+
+	// 1. Split x
+	usize mh = m > k ? m - k : 0, ml = MIN(k, m);
+	u64 *xh = x + ml, *xl = x;  // x = xh * b^k + xl
+
+	// 2. Split y
+	usize nh = n > k ? n - k : 0, nl = MIN(k, n);
+	u64 *yh = y + nl, *yl = y;  // y = yh * b^k + yl
+
+	// 3. Assign/allocate memory
+	// Note that we use the output buffer r as temporary storage for
+	// intermediate results.
+	// TODO: Accept capacity of r as an argument, and use excess memory for t
+	// if it's big enough instead of allocating. Or at least allocate once in
+	// the first recursive instances of the function, and pass the memory down.
+	usize rw = m + n;
+	usize pw = MAX(ml, mh) + 1; u64 *p = r;
+	usize qw = MAX(nl, nh) + 1; u64 *q = r + pw;
+	usize tw = pw + qw;         u64 *t = r_eallocn(tw, sizeof *t);
+	usize uw = ml + nl;         u64 *u = r;
+	usize sw = mh + mh;         u64 *s = r + 2 * k;
+
+	// 4. Arithmetic
+	// TODO: Explain algorithm.
+	add(p, xl, ml, xh, mh);            // p = xl + xh
+	add(q, yl, nl, yh, nh);            // q = yl + yh
+	mul_karatsuba(t, p, pw, q, qw);    // t = p * q
+	mul_karatsuba(u, xl, ml, yl, nl);  // u = xl * yl
+	mul_karatsuba(s, xh, mh, yh, nh);  // s = xh * yh
+	sub(t, t, tw, u, uw);              // t -= u
+	sub(t, t, tw, s, sw);              // t -= s
+	add(r + k, r + k, rw - k, t, tw);  // r[k..] += t
+
+	free(t);
+}
+
+int
+main(void) {
+	u64 x[] = { 0x1234123412341234, 0x5678567856785678, 0x89ab89ab89ab89ab, 0xcdefcdefcdefcdef };
+	u64 y[] = { 0x4321432143214321, 0x8765876587658765, 0xba98ba98ba98ba98, 0xfedcfedcfedcfedc };
+	u64 r0[LEN(x) + LEN(y)]; mul_quadratic(r0, x, LEN(x), y, LEN(y));
+	u64 r1[LEN(x) + LEN(y)]; mul_karatsuba(r1, x, LEN(x), y, LEN(y));
+	printf("0x%016lx%016lx%016lx%016lx%016lx%016lx%016lx%016lx\n", r0[7], r0[6], r0[5], r0[4], r0[3], r0[2], r0[1], r0[0]);
+	printf("0x%016lx%016lx%016lx%016lx%016lx%016lx%016lx%016lx\n", r1[7], r1[6], r1[5], r1[4], r1[3], r1[2], r1[1], r1[0]);
+}
diff --git a/config.mk b/config.mk
@@ -0,0 +1,3 @@
+CFLAGS = -Wall -O3 -march=native
+
+CC = cc

	bigmul big multiplication in C
	git clone git://git.rr3.xyz/bigmul
	Log \| Files \| Refs \| README \| LICENSE

A	.gitignore	\|	2	++
A	LICENSE	\|	16	++++++++++++++++
A	Makefile	\|	11	+++++++++++
A	bigmul.c	\|	147	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	config.mk	\|	3	+++