commit f95c4539f60540ab87be338da44e369a4c783f87
parent a07e744de0ae57a64d6713dd29a0b8bdc2ad07cd
Author: Robert Russell <robertrussell.72001@gmail.com>
Date: Wed, 10 Apr 2024 12:13:08 -0700
Start reorganizing into composable programs
Diffstat:
| M | .gitignore | | | 4 | ++-- |
| M | Makefile | | | 6 | ++++++ |
| A | common.c | | | 103 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | common.h | | | 15 | +++++++++++++++ |
| A | spstabilize.c | | | 88 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
5 files changed, 214 insertions(+), 2 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -1 +1,2 @@
-sandpiles
-\ No newline at end of file
+sandpiles
+spstabilize
diff --git a/Makefile b/Makefile
@@ -1,2 +1,8 @@
+CC = gcc
+CFLAGS = -march=native -O3 -Wall
+
sandpiles: sandpiles.c
gcc -march=native -O3 -g -Wall -o $@ sandpiles.c -lrcx
+
+spstabilize: spstabilize.c common.c common.h
+ $(CC) -o $@ $(CFLAGS) spstabilize.c common.c -lrcx
diff --git a/common.c b/common.c
@@ -0,0 +1,103 @@
+#include <rcx/all.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "common.h"
+
+u32 *
+sp_eallocz(u32 w, u32 h) {
+ return r_eallocz((w + 2) * (h + 2) * sizeof(u32)); // TODO: align?
+}
+
+void
+sp_fwrite(FILE *f, Sandpile sp) {
+ usize w = sp.w;
+ usize h = sp.h;
+
+ char header[16];
+ memcpy(header, "sandpile", 8);
+ r_writeb32(header + 8, w);
+ r_writeb32(header + 12, h);
+ if (!fwrite(header, sizeof header, 1, f))
+ r_fatalf("sp_fwrite: failed to write header");
+
+ for (usize y = 1; y <= h; y++) {
+ for (usize x = 1; x <= w; x++) {
+ char buf[4];
+ r_writeb32(buf, sp.s[y * (w + 2) + x]);
+ if (!fwrite(buf, sizeof buf, 1, f))
+ r_fatalf("sp_fwrite: failed to write sand data");
+ }
+ }
+}
+
+Sandpile
+sp_fread(FILE *f) {
+ char header[16];
+ if (!fread(header, sizeof header, 1, f))
+ r_fatalf("sp_fread: failed to read header");
+ if (memcmp(header, "sandpile", 8) != 0)
+ r_fatalf("sp_fread: invalid magic");
+ usize w = r_readb32(header + 8);
+ usize h = r_readb32(header + 12);
+
+ u32 *s = sp_eallocz(w, h);
+ for (usize y = 1; y <= h; y++) {
+ for (usize x = 1; x <= w; x++) {
+ char buf[4];
+ if (!fread(buf, sizeof buf, 1, f))
+ r_fatalf("sp_fread: failed to read sand data");
+ s[y * (w + 2) + x] = r_readb32(buf);
+ }
+ }
+
+ return (Sandpile){.w = w, .h = h, .s = s};
+}
+
+/*
+Image
+sp_encode(Sandpile sp) {
+ usize w = sp.w;
+ usize h = sp.h;
+
+ usize size = 8 + 4 + 4 + w * h * 4;
+ void *data = r_ealloc(size);
+ u8 *cur = data;
+
+ memcpy(cur, "sandpile", 8); cur += 8;
+ r_writeb32(cur, w); cur += 4;
+ r_writeb32(cur, h); cur += 4;
+
+ for (usize y = 1; y <= h; y++) {
+ for (usize x = 1; x <= w; x++) {
+ r_writeb32(cur, sp.s[y * (w + 2) + x]);
+ cur += 4;
+ }
+ }
+
+ return (Image){.size = size, .data = data}
+}
+Sandpile
+sp_decode(Image img) {
+ u8 *cur = img.data;
+ if (img.size < 16)
+ r_fatalf("sp_decode: invalid header");
+
+ if (memcmp(cur, "sandpile", 8) != 0)
+ r_fatalf("sp_decode: invalid magic");
+ cur += 8;
+
+ usize w = r_readb32(cur); cur += 4;
+ usize h = r_readb32(cur); cur += 4;
+ if (img.size != 16 + w * h * 4)
+ r_fatalf("sp_decode: invalid image size");
+
+ u32 *s = sp_eallocz(w, h);
+ for (usize y = 1; y <= h; y++) {
+ memcpy(&s[y * (w + 2) + 1], cur, w * 4);
+ cur += w * 4;
+ }
+
+ return (Sandpile){.w = w, .h = h, .s = s};
+}
+*/
diff --git a/common.h b/common.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#include <rcx/def.h>
+#include <stdio.h>
+
+typedef struct sandpile Sandpile;
+
+struct sandpile {
+ u32 w, h;
+ u32 *s;
+};
+
+u32 *sp_eallocz(u32 w, u32 h);
+void sp_fwrite(FILE *f, Sandpile sp);
+Sandpile sp_fread(FILE *f);
diff --git a/spstabilize.c b/spstabilize.c
@@ -0,0 +1,88 @@
+#include <rcx/all.h>
+#include <rcx/simd.h>
+#include <stdio.h>
+
+#include "common.h"
+
+#ifndef R_HAVE_AVX2
+#error "AVX2 support required"
+#endif
+
+Sandpile
+sp_stabilize(Sandpile sp) {
+ usize w = sp.w;
+ usize h = sp.h;
+
+ u32 *sand[2];
+ sand[0] = sp.s;
+ sand[1] = sp_eallocz(w, h);
+
+ isize nxv = (isize)w / 8; // Number of x vectors that fit in w
+ v8u32 v3 = v8u32_fill(3);
+ for (usize i = 0;; i = !i) {
+ usize unstable = 0;
+
+ for (isize y = 1; y <= h; y++) {
+ isize j = y * ((isize)w + 2) + 1;
+
+ for (isize xv = 0; xv < nxv; xv++, j += 8) {
+ v8u32 a = v8u32_loadu((v8u32a1 *)&sand[i][j]);
+ a = v8u32_and(a, v3);
+
+ #define ADD(dx, dy) \
+ do { \
+ isize dj = (dy) * ((isize)w + 2) + (dx); \
+ v8u32 b = v8u32_loadu((v8u32a1 *)&sand[i][j + dj]); \
+ b = v8u32_sri(b, 2); \
+ a = v8u32_add(a, b); \
+ } while (0)
+ ADD(+1, +0);
+ ADD(+0, +1);
+ ADD(-1, +0);
+ ADD(+0, -1);
+ #undef ADD
+
+ v8u32 g = v8u32_cmpgt(a, v3);
+ unstable += !v8u32_testz(g, g);
+
+ v8u32_storeu((v8u32a1 *)&sand[!i][j], a);
+ }
+
+ // TODO: Try dealing with tail with masked vector instead? Note
+ // that this would require a minimum width/height of 3.
+ for (isize x = 8*nxv; x < (isize)w; x++, j++) {
+ u32 a = sand[i][j];
+ a = a & 3;
+
+ #define ADD(dx, dy) \
+ do { \
+ isize dj = (dy) * ((isize)w + 2) + (dx); \
+ u32 b = sand[i][j + dj]; \
+ b = b >> 2; \
+ a = a + b; \
+ } while (0)
+ ADD(+1, +0);
+ ADD(+0, +1);
+ ADD(-1, +0);
+ ADD(+0, -1);
+ #undef ADD
+
+ unstable += a > 3;
+
+ sand[!i][j] = a;
+ }
+ }
+
+ if (!unstable) {
+ free(sand[i]);
+ return (Sandpile){.w = w, .h = h, .s = sand[!i]};
+ }
+ }
+}
+
+int
+main(void) {
+ Sandpile sp = sp_fread(stdin);
+ sp = sp_stabilize(sp);
+ sp_fwrite(stdout, sp);
+}