commit a00d44757c3d3d113a44359dc5520132067c5a41
parent d4c32369f4df75861e97d824cf891a942d71e7ed
Author: Robert Russell <robert@rr3.xyz>
Date: Thu, 2 Jan 2025 15:05:25 -0800
Don't zero memory twice in mul_karatsuba
Diffstat:
1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/bigmul.c b/bigmul.c
@@ -95,10 +95,8 @@ sub(u64 *r, u64 *x, usize m, u64 *y, usize n) {
return c;
}
-// Precondition: r does not intersect x nor y
void
-mul_quadratic(u64 *r, u64 *x, usize m, u64 *y, usize n) {
- memset(r, 0, (m + n) * sizeof *r);
+fma_quadratic(u64 *r, u64 *x, usize m, u64 *y, usize n) {
for (usize j = 0; j < n; j++) {
u64 c = 0;
for (usize i = 0; i < m; i++)
@@ -108,6 +106,13 @@ mul_quadratic(u64 *r, u64 *x, usize m, u64 *y, usize n) {
}
// Precondition: r does not intersect x nor y
+void
+mul_quadratic(u64 *r, u64 *x, usize m, u64 *y, usize n) {
+ memset(r, 0, (m + n) * sizeof *r);
+ fma_quadratic(r, x, m, y, n);
+}
+
+// Precondition: r does not intersect x nor y
// TODO: Document precondition regarding size of scratch memory.
void
karatsuba(u64 *r, u64 *x, u64 *y, usize n, u64 *scratch0) {
@@ -154,7 +159,7 @@ karatsuba(u64 *r, u64 *x, u64 *y, usize n, u64 *scratch0) {
* suffices to separate the case m < 4 || n < 4 for the recursion basis. */
if (n < KARATSUBA_THRESH) { // TODO: Calibrate, and ensure necessary bounds
- mul_quadratic(r, x, n, y, n);
+ fma_quadratic(r, x, n, y, n);
return;
}
@@ -223,7 +228,7 @@ mul_karatsuba(u64 *r, u64 *x, usize m, u64 *y, usize n) {
}
if (m == 0) break;
if (m < KARATSUBA_THRESH) { // TODO: Calibrate.
- mul_quadratic(prod, x, m, y, n);
+ fma_quadratic(prod, x, m, y, n);
add(r, prod, m + n, r, n);
break;
}