bigmul

big multiplication in C
git clone git://git.rr3.xyz/bigmul
Log | Files | Refs | README | LICENSE

commit 353a8eb15242460ce42434860a678efadf9d4c87
parent 7b0840047035241b7d2596c216111f84c47fac1e
Author: Robert Russell <robert@rr3.xyz>
Date:   Thu,  2 Jan 2025 18:18:54 -0800

Add README and clean up more

Diffstat:
AREADME | 8++++++++
Mbigmul.c | 13+++++--------
2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/README b/README @@ -0,0 +1,8 @@ +This is an implementation of arbitrary width multiplication in C. The main goal +here was to learn what CPU hardware features might benefit arbitrary width +multiplication (similar to how an ADDC (add with carry) instruction makes +arbitrary width addition efficient). I think the main takeaway is that an +FMAA (fused multiply-add-add) instruction would be useful, especially +considering Dadda multipliers (and probably other hardware multiplier designs) +can be extended with a relatively small amount of circuitry to support FMAA +without increasing latency. diff --git a/bigmul.c b/bigmul.c @@ -11,14 +11,6 @@ // karatsuba). #define KARATSUBA_THRESH 32 -struct nat { - usize cap; - usize len; - u64 dat[]; -}; - -typedef struct nat Nat[1]; - /* ----- Wide u64 math ----- */ @@ -98,6 +90,8 @@ sub(u64 *r, u64 *x, usize m, u64 *y, usize n) { return c; } +// Precondition: capacity(r) >= m + n, capacity(x) = m, capactiy(y) = n +// Precondition: r is disjoint with x and y void fma_quadratic(u64 *r, u64 *x, usize m, u64 *y, usize n) { for (usize j = 0; j < n; j++) { @@ -246,6 +240,9 @@ mul_karatsuba(u64 *r, u64 *x, usize m, u64 *y, usize n) { (n < KARATSUBA_THRESH ? fma_quadratic : fma_karatsuba)(r, x, m, y, n); } + +/* ----- Benchmarks ----- */ + u64 x[4096]; u64 y[4096]; u64 r[8192];