Start reorganizing into composable programs - sandpiles

commit f95c4539f60540ab87be338da44e369a4c783f87
parent a07e744de0ae57a64d6713dd29a0b8bdc2ad07cd
Author: Robert Russell <robertrussell.72001@gmail.com>
Date:   Wed, 10 Apr 2024 12:13:08 -0700

Start reorganizing into composable programs

Diffstat:
M .gitignore  | 4 ++--
M Makefile  | 6 ++++++
A common.c  | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A common.h  | 15 +++++++++++++++
A spstabilize.c  | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

5 files changed, 214 insertions(+), 2 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -1 +1,2 @@
-sandpiles
-\ No newline at end of file
+sandpiles
+spstabilize
diff --git a/Makefile b/Makefile
@@ -1,2 +1,8 @@
+CC = gcc
+CFLAGS = -march=native -O3 -Wall
+
 sandpiles: sandpiles.c
 	gcc -march=native -O3 -g -Wall -o $@ sandpiles.c -lrcx
+
+spstabilize: spstabilize.c common.c common.h
+	$(CC) -o $@ $(CFLAGS) spstabilize.c common.c -lrcx
diff --git a/common.c b/common.c
@@ -0,0 +1,103 @@
+#include <rcx/all.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "common.h"
+
+u32 *
+sp_eallocz(u32 w, u32 h) {
+	return r_eallocz((w + 2) * (h + 2) * sizeof(u32)); // TODO: align?
+}
+
+void
+sp_fwrite(FILE *f, Sandpile sp) {
+	usize w = sp.w;
+	usize h = sp.h;
+
+	char header[16];
+	memcpy(header, "sandpile", 8);
+	r_writeb32(header + 8, w);
+	r_writeb32(header + 12, h);
+	if (!fwrite(header, sizeof header, 1, f))
+		r_fatalf("sp_fwrite: failed to write header");
+
+	for (usize y = 1; y <= h; y++) {
+		for (usize x = 1; x <= w; x++) {
+			char buf[4];
+			r_writeb32(buf, sp.s[y * (w + 2) + x]);
+			if (!fwrite(buf, sizeof buf, 1, f))
+				r_fatalf("sp_fwrite: failed to write sand data");
+		}
+	}
+}
+
+Sandpile
+sp_fread(FILE *f) {
+	char header[16];
+	if (!fread(header, sizeof header, 1, f))
+		r_fatalf("sp_fread: failed to read header");
+	if (memcmp(header, "sandpile", 8) != 0)
+		r_fatalf("sp_fread: invalid magic");
+	usize w = r_readb32(header + 8);
+	usize h = r_readb32(header + 12);
+
+	u32 *s = sp_eallocz(w, h);
+	for (usize y = 1; y <= h; y++) {
+		for (usize x = 1; x <= w; x++) {
+			char buf[4];
+			if (!fread(buf, sizeof buf, 1, f))
+				r_fatalf("sp_fread: failed to read sand data");
+			s[y * (w + 2) + x] = r_readb32(buf);
+		}
+	}
+
+	return (Sandpile){.w = w, .h = h, .s = s};
+}
+
+/*
+Image
+sp_encode(Sandpile sp) {
+	usize w = sp.w;
+	usize h = sp.h;
+
+	usize size = 8 + 4 + 4 + w * h * 4;
+	void *data = r_ealloc(size);
+	u8 *cur = data;
+
+	memcpy(cur, "sandpile", 8); cur += 8;
+	r_writeb32(cur, w); cur += 4;
+	r_writeb32(cur, h); cur += 4;
+
+	for (usize y = 1; y <= h; y++) {
+		for (usize x = 1; x <= w; x++) {
+			r_writeb32(cur, sp.s[y * (w + 2) + x]);
+			cur += 4;
+		}
+	}
+
+	return (Image){.size = size, .data = data}
+}
+Sandpile
+sp_decode(Image img) {
+	u8 *cur = img.data;
+	if (img.size < 16)
+		r_fatalf("sp_decode: invalid header");
+
+	if (memcmp(cur, "sandpile", 8) != 0)
+		r_fatalf("sp_decode: invalid magic");
+	cur += 8;
+
+	usize w = r_readb32(cur); cur += 4;
+	usize h = r_readb32(cur); cur += 4;
+	if (img.size != 16 + w * h * 4)
+		r_fatalf("sp_decode: invalid image size");
+
+	u32 *s = sp_eallocz(w, h);
+	for (usize y = 1; y <= h; y++) {
+		memcpy(&s[y * (w + 2) + 1], cur, w * 4);
+		cur += w * 4;
+	}
+
+	return (Sandpile){.w = w, .h = h, .s = s};
+}
+*/
diff --git a/common.h b/common.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#include <rcx/def.h>
+#include <stdio.h>
+
+typedef struct sandpile Sandpile;
+
+struct sandpile {
+	u32 w, h;
+	u32 *s;
+};
+
+u32 *sp_eallocz(u32 w, u32 h);
+void sp_fwrite(FILE *f, Sandpile sp);
+Sandpile sp_fread(FILE *f);
diff --git a/spstabilize.c b/spstabilize.c
@@ -0,0 +1,88 @@
+#include <rcx/all.h>
+#include <rcx/simd.h>
+#include <stdio.h>
+
+#include "common.h"
+
+#ifndef R_HAVE_AVX2
+#error "AVX2 support required"
+#endif
+
+Sandpile
+sp_stabilize(Sandpile sp) {
+	usize w = sp.w;
+	usize h = sp.h;
+
+	u32 *sand[2];
+	sand[0] = sp.s;
+	sand[1] = sp_eallocz(w, h);
+
+	isize nxv = (isize)w / 8; // Number of x vectors that fit in w
+	v8u32 v3 = v8u32_fill(3);
+	for (usize i = 0;; i = !i) {
+		usize unstable = 0;
+
+		for (isize y = 1; y <= h; y++) {
+			isize j = y * ((isize)w + 2) + 1;
+
+			for (isize xv = 0; xv < nxv; xv++, j += 8) {
+				v8u32 a = v8u32_loadu((v8u32a1 *)&sand[i][j]);
+				a = v8u32_and(a, v3);
+
+				#define ADD(dx, dy) \
+					do { \
+						isize dj = (dy) * ((isize)w + 2) + (dx); \
+						v8u32 b = v8u32_loadu((v8u32a1 *)&sand[i][j + dj]); \
+						b = v8u32_sri(b, 2); \
+						a = v8u32_add(a, b); \
+					} while (0)
+				ADD(+1, +0);
+				ADD(+0, +1);
+				ADD(-1, +0);
+				ADD(+0, -1);
+				#undef ADD
+
+				v8u32 g = v8u32_cmpgt(a, v3);
+				unstable += !v8u32_testz(g, g);
+
+				v8u32_storeu((v8u32a1 *)&sand[!i][j], a);
+			}
+
+			// TODO: Try dealing with tail with masked vector instead? Note
+			// that this would require a minimum width/height of 3.
+			for (isize x = 8*nxv; x < (isize)w; x++, j++) {
+				u32 a = sand[i][j];
+				a = a & 3;
+
+				#define ADD(dx, dy) \
+					do { \
+						isize dj = (dy) * ((isize)w + 2) + (dx); \
+						u32 b = sand[i][j + dj]; \
+						b = b >> 2; \
+						a = a + b; \
+					} while (0)
+				ADD(+1, +0);
+				ADD(+0, +1);
+				ADD(-1, +0);
+				ADD(+0, -1);
+				#undef ADD
+
+				unstable += a > 3;
+
+				sand[!i][j] = a;
+			}
+		}
+
+		if (!unstable) {
+			free(sand[i]);
+			return (Sandpile){.w = w, .h = h, .s = sand[!i]};
+		}
+	}
+}
+
+int
+main(void) {
+	Sandpile sp = sp_fread(stdin);
+	sp = sp_stabilize(sp);
+	sp_fwrite(stdout, sp);
+}

	sandpiles sandpile art
	git clone git://git.rr3.xyz/sandpiles
	Log \| Files \| Refs \| README \| LICENSE

M	.gitignore	\|	4	++--
M	Makefile	\|	6	++++++
A	common.c	\|	103	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	common.h	\|	15	+++++++++++++++
A	spstabilize.c	\|	88	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++