commit 82e17f8fc43b89bf653d0ef8b986f0609ca4f435
Author: Robert Russell <robertrussell.72001@gmail.com>
Date: Tue, 9 Apr 2024 22:21:13 -0700
First commit
Diffstat:
| A | Makefile | | | 2 | ++ |
| A | queue.c | | | 146 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | sandpiles.c | | | 168 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
3 files changed, 316 insertions(+), 0 deletions(-)
diff --git a/Makefile b/Makefile
@@ -0,0 +1,2 @@
+sandpiles: sandpiles.c
+ gcc -march=native -O3 -g -Wall -o $@ sandpiles.c -lrcx
diff --git a/queue.c b/queue.c
@@ -0,0 +1,146 @@
+#include <errno.h>
+#include <rcx/all.h>
+#include <stdio.h>
+#include <string.h>
+
+typedef struct pos Pos;
+typedef struct rgba Rgba;
+typedef struct palette Palette;
+typedef struct farbfeld Farbfeld;
+typedef struct sandpile Sandpile;
+
+struct pos {
+ isize x, y;
+};
+
+struct rgba {
+ u16 r, g, b, a;
+};
+
+struct palette {
+ Rgba rgba[4];
+};
+
+struct farbfeld {
+ usize size;
+ void *img;
+};
+
+struct sandpile {
+ u32 w, h;
+ u32 s[];
+};
+
+#define RIGHT (Pos){.x = +1, .y = +0}
+#define UP (Pos){.x = +0, .y = +1}
+#define LEFT (Pos){.x = -1, .y = +0}
+#define DOWN (Pos){.x = +0, .y = -1}
+
+#define LIST_METHOD(name) q##name
+#define LIST_T Pos
+#define LIST_REALLOC r_erealloc
+#include <rcx/list/static.h>
+
+Pos
+pos_add(Pos a, Pos b) {
+ return (Pos){.x = a.x + b.x, .y = a.y + b.y};
+}
+
+u32 *
+sand(Sandpile *sp, Pos p) {
+ usize i = (usize)p.y * (usize)sp->w + (usize)p.x;
+ return &sp->s[i];
+}
+
+void
+stabilize(Sandpile *sp) {
+ Pos *q = 0;
+ Pos p;
+
+ for (p.x = 0; p.x < sp->w; p.x++) {
+ for (p.y = 0; p.y < sp->h; p.y++) {
+ if (*sand(sp, p) >= 4) qpush(&q, p);
+ }
+ }
+
+ while (qlen(&q) > 0) {
+ p = qpop(&q);
+
+ u32 *s = sand(sp, p);
+ u32 a = *s / 4;
+ *s %= 4;
+
+ #define TOPPLE(dir) do { \
+ Pos n = pos_add(p, dir); \
+ u32 *s = sand(sp, n); \
+ *s += a; \
+ if (*s >= 4) qpush(&q, n); \
+ } while (0)
+ if (p.x < sp->w - 1) TOPPLE(LEFT);
+ if (p.y < sp->h - 1) TOPPLE(UP);
+ if (p.x > 0) TOPPLE(RIGHT);
+ if (p.y > 0) TOPPLE(DOWN);
+ #undef TOPPLE
+ }
+}
+
+Sandpile *
+new(u32 w, u32 h) {
+ Sandpile *sp = r_ealloc(sizeof *sp + (usize)w * (usize)h * sizeof(u32));
+ sp->w = w;
+ sp->h = h;
+ return sp;
+}
+
+Farbfeld
+farbfeld(Sandpile *sp, Palette *palette) {
+ usize size = 8u + 4u + 4u + (usize)sp->w * (usize)sp->h * 8u;
+ void *img = r_ealloc(size);
+ u8 *cur = img;
+
+ // Write header
+ memcpy(cur, "farbfeld", 8);
+ r_writeb32(cur + 8, sp->w);
+ r_writeb32(cur + 12, sp->h);
+ cur += 16;
+
+ // Encode palette
+ u16 enc_palette[4][4];
+ for (usize i = 0; i < 4; i++) {
+ enc_palette[i][0] = r_htob16(palette->rgba[i].r);
+ enc_palette[i][1] = r_htob16(palette->rgba[i].g);
+ enc_palette[i][2] = r_htob16(palette->rgba[i].b);
+ enc_palette[i][3] = r_htob16(palette->rgba[i].a);
+ }
+
+ // Write pixels
+ Pos p;
+ for (p.x = 0; p.x < sp->w; p.x++) {
+ for (p.y = 0; p.y < sp->h; p.y++) {
+ u32 s = *sand(sp, p);
+ memcpy(cur, enc_palette[s < 4 ? s : 3], 8);
+ cur += 8;
+ }
+ }
+
+ return (Farbfeld){.size = size, .img = img};
+}
+
+int
+main(void) {
+ Sandpile *sp = new(101, 101);
+ *sand(sp, (Pos){.x = 50, .y = 50}) = 10000u;
+ stabilize(sp);
+
+ Palette palette = {
+ .rgba = {
+ { 0x4a4a, 0x4242, 0x3838, 0xffff },
+ { 0x4d4d, 0x5353, 0x5959, 0xffff },
+ { 0x5050, 0x8484, 0x8484, 0xffff },
+ { 0x7979, 0xc9c9, 0x9e9e, 0xffff },
+ }
+ };
+ Farbfeld ff = farbfeld(sp, &palette);
+ if (r_write_all(0, 1, ff.img, ff.size) < 0)
+ throw("write: %s", strerror(errno));
+}
diff --git a/sandpiles.c b/sandpiles.c
@@ -0,0 +1,168 @@
+#include <errno.h>
+#include <rcx/all.h>
+#include <rcx/simd.h>
+#include <stdio.h>
+#include <string.h>
+
+#ifndef R_HAVE_AVX2
+#error "AVX2 support required"
+#endif
+
+#define SP_MAX_DIMEN U32_MAX
+
+typedef struct rgba Rgba;
+typedef struct palette Palette;
+typedef struct farbfeld Farbfeld;
+
+struct rgba {
+ u16 r, g, b, a;
+};
+
+struct palette {
+ Rgba rgba[4];
+};
+
+struct farbfeld {
+ usize size;
+ void *img;
+};
+
+u32 *
+sp_new(usize w, usize h) {
+ REQUIRE(w <= SP_MAX_DIMEN && h <= SP_MAX_DIMEN, "sp_new: too large");
+ return r_eallocz((w + 2) * (h + 2) * sizeof(u32)); // TODO: align?
+}
+
+void
+sp_set(usize w, usize h, u32 *sp, usize x, usize y, u32 s) {
+ ASSERT(x < w && y < h, "sp_set: out of bounds");
+ sp[(y + 1) * (w + 2) + (x + 1)] = s;
+}
+
+u32 *
+sp_stabilize4(usize w, usize h, u32 *sp) {
+ u32 *sand[2];
+ sand[0] = sp;
+ sand[1] = sp_new(w, h);
+
+ isize nxv = (isize)w / 8; // Number of x vectors that fit in w
+ v8u32 v3 = v8u32_fill(3);
+ for (usize i = 0;; i = !i) {
+ usize unstable = 0;
+
+ for (isize y = 1; y <= h; y++) {
+ isize j = y * ((isize)w + 2) + 1;
+
+ for (isize xv = 0; xv < nxv; xv++, j += 8) {
+ v8u32 a = v8u32_loadu((v8u32a1 *)&sand[i][j]);
+ a = v8u32_and(a, v3);
+
+ #define ADD(dx, dy) \
+ do { \
+ isize dj = (dy) * ((isize)w + 2) + (dx); \
+ v8u32 b = v8u32_loadu((v8u32a1 *)&sand[i][j + dj]); \
+ b = v8u32_sri(b, 2); \
+ a = v8u32_add(a, b); \
+ } while (0)
+ ADD(+1, +0);
+ ADD(+0, +1);
+ ADD(-1, +0);
+ ADD(+0, -1);
+ #undef ADD
+
+ v8u32 g = v8u32_cmpgt(a, v3);
+ unstable += !v8u32_testz(g, g);
+
+ v8u32_storeu((v8u32a1 *)&sand[!i][j], a);
+ }
+
+ // TODO: Try dealing with tail with masked vector instead? Note
+ // that this would require a minimum width/height of 3.
+ for (isize x = 8*nxv; x < (isize)w; x++, j++) {
+ u32 a = sand[i][j];
+ a = a & 3;
+
+ #define ADD(dx, dy) \
+ do { \
+ isize dj = (dy) * ((isize)w + 2) + (dx); \
+ u32 b = sand[i][j + dj]; \
+ b = b >> 2; \
+ a = a + b; \
+ } while (0)
+ ADD(+1, +0);
+ ADD(+0, +1);
+ ADD(-1, +0);
+ ADD(+0, -1);
+ #undef ADD
+
+ unstable += a > 3;
+
+ sand[!i][j] = a;
+ }
+ }
+
+ if (!unstable) {
+ free(sand[i]);
+ return sand[!i];
+ }
+ }
+}
+
+// TODO: void stabilize8(...)
+
+Farbfeld
+farbfeld(usize w, usize h, u32 *sp, Palette *palette) {
+ usize size = 8u + 4u + 4u + w * h * 8u;
+ void *img = r_ealloc(size);
+ u8 *cur = img;
+
+ // Write header
+ memcpy(cur, "farbfeld", 8);
+ r_writeb32(cur + 8, w);
+ r_writeb32(cur + 12, h);
+ cur += 16;
+
+ // Encode palette
+ u16 enc_palette[4][4];
+ for (usize i = 0; i < 4; i++) {
+ enc_palette[i][0] = r_htob16(palette->rgba[i].r);
+ enc_palette[i][1] = r_htob16(palette->rgba[i].g);
+ enc_palette[i][2] = r_htob16(palette->rgba[i].b);
+ enc_palette[i][3] = r_htob16(palette->rgba[i].a);
+ }
+
+ // Write pixels
+ for (usize y = 1; y <= h; y++) {
+ for (usize x = 1; x <= w; x++) {
+ u32 s = sp[y * (w + 2) + x];
+ memcpy(cur, enc_palette[s < 4 ? s : 3], 8);
+ cur += 8;
+ }
+ }
+
+ return (Farbfeld){.size = size, .img = img};
+}
+
+int
+main(void) {
+ usize w = 500;
+ usize h = 500;
+ u32 *sp = sp_new(w, h);
+ for (usize y = 150; y < h-150; y++) {
+ for (usize x = 150; x < w-150; x++)
+ sp_set(w, h, sp, x, y, 20);
+ }
+ sp = sp_stabilize4(w, h, sp);
+
+ Palette palette = {
+ .rgba = {
+ { 0x4a4a, 0x4242, 0x3838, 0xffff },
+ { 0x4d4d, 0x5353, 0x5959, 0xffff },
+ { 0x5050, 0x8484, 0x8484, 0xffff },
+ { 0x7979, 0xc9c9, 0x9e9e, 0xffff },
+ }
+ };
+ Farbfeld ff = farbfeld(w, h, sp, &palette);
+ if (r_write_all(0, 1, ff.img, ff.size) < 0)
+ throw("write: %s", strerror(errno));
+}