commit 4d33684aaff41f7519b8d750941e7b085695b389
parent 21de801e8d5476c8559240437fec11b3f27ca120
Author: Robert Russell <robertrussell.72001@gmail.com>
Date: Wed, 15 Mar 2023 22:59:56 -0700
Add buffer and string module
Work in progress. I will add various string functions like
prefix/postfix checking, whitespace triming, searching, etc.
as I need them.
str.h is probably deprecated.
Diffstat:
| M | Makefile | | | 6 | ++++-- |
| M | config.mk | | | 2 | +- |
| M | inc/all.h | | | 2 | ++ |
| A | inc/buffer.h | | | 96 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | inc/string.h | | | 82 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/buffer.c | | | 96 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/string.c | | | 18 | ++++++++++++++++++ |
7 files changed, 299 insertions(+), 3 deletions(-)
diff --git a/Makefile b/Makefile
@@ -6,10 +6,11 @@ SRC =\
src/alloc.c\
src/bench.c\
src/bits.c\
+ src/buffer.c\
src/debug.c\
- src/error.c\
src/log.c\
src/str.c\
+ src/string.c\
src/unicode.c\
src/unix.c\
src/utf8.c\
@@ -25,11 +26,12 @@ librcx.a: $(SRC:.c=.o)
src/alloc.o: src/alloc.c inc/alloc.h inc/def.h inc/log.h inc/rcx.h inc/internal/util.h config.mk
src/bench.o: src/bench.c inc/bench.h inc/def.h inc/log.h inc/rcx.h config.mk
src/bits.o: src/bits.c inc/bits.h inc/def.h inc/rcx.h config.mk
+src/buffer.o: src/buffer.c inc/alloc.h inc/buffer.h inc/debug.h inc/def.h inc/log.h inc/rcx.h inc/string.h config.mk
src/debug.o: src/debug.c inc/debug.h inc/def.h inc/rcx.h config.mk
-src/error.o: src/error.c inc/def.h inc/error.h inc/log.h inc/rcx.h config.mk
src/log.o: src/log.c inc/def.h inc/log.h inc/rcx.h config.mk
src/opt.o: src/opt.c inc/def.h inc/opt.h inc/rcx.h config.mk
src/str.o: src/str.c inc/alloc.h inc/debug.h inc/def.h inc/log.h inc/rcx.h inc/str.h config.mk
+src/string.o: src/string.c inc/buffer.h inc/debug.h inc/def.h inc/rcx.h inc/string.h inc/utf8.h config.mk
src/unicode.o: src/unicode.c inc/def.h inc/rcx.h gen/ucattab.inc config.mk
src/unix.o: src/unix.c inc/debug.h inc/def.h inc/rcx.h inc/unix.h config.mk
src/utf8.o: src/utf8.c inc/def.h inc/rcx.h inc/utf8.h config.mk
diff --git a/config.mk b/config.mk
@@ -2,7 +2,7 @@ PREFIX = /usr/local
INCPREFIX = $(PREFIX)/include
LIBPREFIX = $(PREFIX)/lib
-CFLAGS = -O2 -Wall -Iinc
+CFLAGS = -O2 -Wall -iquote inc
CC = cc
AR = ar
diff --git a/inc/all.h b/inc/all.h
@@ -1,6 +1,7 @@
/* Everything except bench.h */
#include "alloc.h"
#include "bits.h"
+#include "buffer.h"
#include "debug.h"
#include "deque.h"
#include "error.h"
@@ -8,6 +9,7 @@
#include "opt.h"
#include "rcx.h"
#include "str.h"
+#include "string.h"
#include "unicode.h"
#include "unix.h"
#include "utf8.h"
diff --git a/inc/buffer.h b/inc/buffer.h
@@ -0,0 +1,96 @@
+#pragma once
+
+#include "alloc.h"
+#include "debug.h"
+#include "def.h"
+#include "log.h"
+
+/* TODO: Optimization opportunity: Use headers with different cap field sizes
+ * to reduce waste in the common case of small strings. The flags field can be
+ * used to encode which header is used. It's probably only worth having
+ * variants for u8 and usize cap; further distinguishing u16 and u32 doesn't
+ * really make sense, since eventually the header size becomes negligible.
+ * We could also use a flag bit to disable the refcnt field. */
+
+typedef struct buf_hdr_ BufHdr_;
+typedef u8 *Buf;
+
+enum buf_flags_ {
+ BUF_FLAG_FREE_ = 1<<0,
+};
+
+struct buf_hdr_ {
+ usize cap;
+ u32 refcnt;
+ u8 flags;
+ u8 data[];
+};
+
+#define buf_hdr_const_(n) struct { \
+ usize cap; \
+ u32 refcnt; \
+ u8 flags; \
+ u8 data[n]; \
+ }
+
+#define buf_stack(s, ...) ((Buf)((BufHdr_ *)&(buf_hdr_const_( \
+ VA_DEFAULT(,##__VA_ARGS__, sizeof(s) - 1))){ \
+ .cap = VA_DEFAULT(,##__VA_ARGS__, sizeof(s) - 1), \
+ .refcnt = 1, \
+ .flags = 0, \
+ .data = s /* This is an error if s is not a string literal */ \
+ } + 1))
+
+#define buf_static(s, ...) ((Buf)((BufHdr_ *)STATIC(buf_hdr_const_( \
+ VA_DEFAULT(,##__VA_ARGS__, sizeof(s) - 1)), { \
+ .cap = VA_DEFAULT(,##__VA_ARGS__, sizeof(s) - 1), \
+ .refcnt = 1, \
+ .flags = 0, \
+ .data = s /* This is an error if s is not a string literal */ \
+ }) + 1))
+
+Buf buf_memory(void *mem, usize cap);
+
+Buf buf_alloc(usize cap);
+Buf buf_allocz(usize cap);
+Buf buf_ealloc(usize cap);
+Buf buf_eallocz(usize cap);
+int buf_realloc(Buf *b, usize cap);
+int buf_reallocz(Buf *b, usize cap);
+int buf_erealloc(Buf *b, usize cap);
+int buf_ereallocz(Buf *b, usize cap);
+
+static inline usize
+buf_cap(Buf b) {
+ return ((BufHdr_ *)b - 1)->cap;
+}
+
+static inline Buf
+buf_ref(Buf b) {
+ BufHdr_ *h = (BufHdr_ *)b - 1;
+ if unlikely(h->refcnt == U32_MAX) {
+ /* XXX: This is a tough situation to be in. For now, we mark the buffer
+ * as nonfreeable, because surely a memory leak is better than
+ * spontaneously freeing the buffer. Another option would be to just
+ * abort the program. */
+ r_errorf("buf_ref: reference count overflow");
+ h->flags &= ~BUF_FLAG_FREE_;
+ }
+ h->refcnt += 1;
+ return b;
+}
+
+static inline void
+buf_drop(Buf b) {
+ BufHdr_ *h = (BufHdr_ *)b - 1;
+ assert(h->refcnt > 0, "buf_drop: no references held");
+ h->refcnt -= 1;
+ if (h->refcnt == 0 && (h->flags & BUF_FLAG_FREE_))
+ free(h);
+}
+
+/* XXX: circular include */
+#include "string.h"
+
+#define buf_slice(b, l, ...) buf_slice_(b, l, VA_DEFAULT(,##__VA_ARGS__, buf_cap(b)))
+Str buf_slice_(Buf b, usize l, usize u);
diff --git a/inc/string.h b/inc/string.h
@@ -0,0 +1,82 @@
+#pragma once
+
+#include <string.h>
+
+#include "debug.h"
+#include "def.h"
+#include "utf8.h"
+
+typedef struct str Str;
+
+struct str {
+ usize off;
+ usize len;
+ u8 *data; /* &buf->data[off] */
+};
+
+/* XXX: circular include */
+#include "buffer.h"
+
+#define str_stack(s, ...) (Str){ \
+ .off = 0, \
+ .len = sizeof(s) - 1, \
+ .data = buf_stack(s,##__VA_ARGS__) \
+ }
+
+#define str_static(s, ...) (Str){ \
+ .off = 0, \
+ .len = sizeof(s) - 1, \
+ .data = buf_static(s,##__VA_ARGS__) \
+ }
+
+static inline usize
+str_off(Str s) {
+ return s.off;
+}
+
+static inline usize
+str_len(Str s) {
+ return s.len;
+}
+
+static inline Buf
+str_buf(Str s) {
+ return (void *)(s.data - s.off);
+}
+
+static inline u8 *
+str_bytes(Str s) {
+ return s.data;
+}
+
+static inline Str
+str_ref(Str s) {
+ buf_ref(str_buf(s));
+ return s;
+}
+
+static inline void
+str_drop(Str s) {
+ buf_drop(str_buf(s));
+}
+
+static inline int
+str_cmp(Str s, Str t) {
+ int d = memcmp(s.data, t.data, MIN(s.len, t.len));
+ if (d != 0 || s.len == t.len) return d;
+ else if (s.len < t.len) return -1;
+ else return 1;
+}
+
+static inline bool
+str_eq(Str s, Str t) {
+ return s.len == t.len && !memcmp(s.data, t.data, s.len);
+}
+
+#define str_slice(s, l, ...) str_slice_(s, l, VA_DEFAULT(,##__VA_ARGS__, str_len(s)))
+Str str_slice_(Str s, isize l, isize u);
+
+static inline usize
+str_utf8_decode(rune *c, Str s) {
+ return r_utf8_decode(c, (char *)s.data, s.len);
+}
diff --git a/src/buffer.c b/src/buffer.c
@@ -0,0 +1,96 @@
+#include "alloc.h"
+#include "buffer.h"
+#include "debug.h"
+#include "rcx.h"
+
+Buf
+buf_memory(void *mem, usize cap) {
+ require(((uptr)mem & (alignof(BufHdr_) - 1)) == 0,
+ "buf_memory: bad alignment");
+ BufHdr_ *h = mem;
+ *h = (BufHdr_){cap, 1, 0};
+ return h->data;
+}
+
+Buf
+buf_alloc(usize cap) {
+ BufHdr_ *h = r_alloc(sizeof *h + cap);
+ if (!h) return 0;
+ *h = (BufHdr_){cap, 1, BUF_FLAG_FREE_};
+ return h->data;
+}
+
+Buf
+buf_allocz(usize cap) {
+ BufHdr_ *h = r_allocz(sizeof *h + cap);
+ if (!h) return 0;
+ *h = (BufHdr_){cap, 1, BUF_FLAG_FREE_};
+ return h->data;
+}
+
+Buf
+buf_ealloc(usize cap) {
+ BufHdr_ *h = r_ealloc(sizeof *h + cap);
+ *h = (BufHdr_){cap, 1, BUF_FLAG_FREE_};
+ return h->data;
+}
+
+Buf
+buf_eallocz(usize cap) {
+ BufHdr_ *h = r_eallocz(sizeof *h + cap);
+ *h = (BufHdr_){cap, 1, BUF_FLAG_FREE_};
+ return h->data;
+}
+
+int
+buf_realloc(Buf *b, usize cap) {
+ BufHdr_ *h = *b ? (BufHdr_ *)*b - 1 : 0;
+ if (r_realloc(&h, sizeof *h + cap) < 0) return -1;
+ if (!*b) *h = (BufHdr_){cap, 1, BUF_FLAG_FREE_};
+ *b = h->data;
+ return 0;
+}
+
+int
+buf_reallocz(Buf *b, usize cap) {
+ BufHdr_ *h = *b ? (BufHdr_ *)*b - 1 : 0;
+ if (r_reallocz(&h, sizeof *h + h->cap, sizeof *h + cap) < 0) return -1;
+ if (!*b) *h = (BufHdr_){cap, 1, BUF_FLAG_FREE_};
+ *b = h->data;
+ return 0;
+}
+
+int
+buf_erealloc(Buf *b, usize cap) {
+ BufHdr_ *h = *b ? (BufHdr_ *)*b - 1 : 0;
+ r_erealloc(&h, sizeof *h + cap);
+ if (!*b) *h = (BufHdr_){cap, 1, BUF_FLAG_FREE_};
+ *b = h->data;
+ return 0;
+}
+
+int
+buf_ereallocz(Buf *b, usize cap) {
+ BufHdr_ *h = *b ? (BufHdr_ *)*b - 1 : 0;
+ r_ereallocz(&h, sizeof *h + h->cap, sizeof *h + cap);
+ if (!*b) *h = (BufHdr_){cap, 1, BUF_FLAG_FREE_};
+ *b = h->data;
+ return 0;
+}
+
+Str
+buf_slice_(Buf b, usize l, usize u) {
+ usize cap = buf_cap(b);
+
+ if (l < 0) l += cap;
+ assert(0 <= l && l <= cap, "buf_slice: l out of bounds");
+
+ if (u < 0) u += cap;
+ assert(0 <= u && u <= cap, "buf_slice: u out of bounds");
+
+ return (Str){
+ .off = l,
+ .len = l < u ? u - l : 0,
+ .data = b + l,
+ };
+}
diff --git a/src/string.c b/src/string.c
@@ -0,0 +1,18 @@
+#include "debug.h"
+#include "rcx.h"
+#include "string.h"
+
+Str
+str_slice_(Str s, isize l, isize u) {
+ if (l < 0) l += s.len;
+ assert(0 <= l && l <= s.len, "str_slice: l out of bounds");
+
+ if (u < 0) u += s.len;
+ assert(0 <= u && u <= s.len, "str_slice: u out of bounds");
+
+ return (Str){
+ .off = s.off + l,
+ .len = l < u ? u - l : 0,
+ .data = s.data + l,
+ };
+}