Add README and clean up more - bigmul - big multiplication in C

commit 353a8eb15242460ce42434860a678efadf9d4c87
parent 7b0840047035241b7d2596c216111f84c47fac1e
Author: Robert Russell <robert@rr3.xyz>
Date:   Thu,  2 Jan 2025 18:18:54 -0800

Add README and clean up more

Diffstat:
A README  | 8 ++++++++
M bigmul.c  | 13 +++++--------

2 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/README b/README
@@ -0,0 +1,8 @@
+This is an implementation of arbitrary width multiplication in C. The main goal
+here was to learn what CPU hardware features might benefit arbitrary width
+multiplication (similar to how an ADDC (add with carry) instruction makes
+arbitrary width addition efficient). I think the main takeaway is that an
+FMAA (fused multiply-add-add) instruction would be useful, especially
+considering Dadda multipliers (and probably other hardware multiplier designs)
+can be extended with a relatively small amount of circuitry to support FMAA
+without increasing latency.
diff --git a/bigmul.c b/bigmul.c
@@ -11,14 +11,6 @@
 // karatsuba).
 #define KARATSUBA_THRESH 32
 
-struct nat {
-	usize cap;
-	usize len;
-	u64 dat[];
-};
-
-typedef struct nat Nat[1];
-
 
 /* ----- Wide u64 math ----- */
 
@@ -98,6 +90,8 @@ sub(u64 *r, u64 *x, usize m, u64 *y, usize n) {
 	return c;
 }
 
+// Precondition: capacity(r) >= m + n, capacity(x) = m, capactiy(y) = n
+// Precondition: r is disjoint with x and y
 void
 fma_quadratic(u64 *r, u64 *x, usize m, u64 *y, usize n) {
 	for (usize j = 0; j < n; j++) {
@@ -246,6 +240,9 @@ mul_karatsuba(u64 *r, u64 *x, usize m, u64 *y, usize n) {
 	(n < KARATSUBA_THRESH ? fma_quadratic : fma_karatsuba)(r, x, m, y, n);
 }
 
+
+/* ----- Benchmarks ----- */
+
 u64 x[4096];
 u64 y[4096];
 u64 r[8192];

	bigmul big multiplication in C
	git clone git://git.rr3.xyz/bigmul
	Log \| Files \| Refs \| README \| LICENSE