commit 5bea8433a892d3f156d37e2b82658173814ce54b
parent d1efe7d91defa30bfc1db3814c6e034277e2d1bf
Author: Robert Russell <robertrussell.72001@gmail.com>
Date: Tue, 13 Jun 2023 16:03:01 -0700
Rework buffers and strings
The previous API where Str's could be reference counted or not was
mad. Buf's pretty much stayed the same in that they have a flag
indicating whether they are RCed or not, but we broke Str into
Str (non-RC and not necessarily backed by a Buf) and RcStr (a view
into an Buf). You can only slice an RC Buf into an RcStr, but non-RC
Buf's can be sliced to Str or RcStr (and, like before, the reference
count is effectively ignored in the latter case). Also, to made it
harder to create memory leaks, names in the buf_... and str_...
family now have useful information. E.g.,
buf_slice -> buf_slice_rc_ref
str_printf -> str_alloc_printf
I think I will continue the convention of ending buf_... and
rcstr_... functions with ..._ref or ..._noref as appropriate. Also,
it would be nice to make a rule of putting "alloc" in the name of
functions that allocate as a key step of their operation, like in
the second example above.
Crucially, Str's are now only 16 bytes (down from 24), which should
reduce memory usage a lot in programs that deal with many strings
(e.g., the Suq compiler).
Diffstat:
4 files changed, 97 insertions(+), 54 deletions(-)
diff --git a/inc/buffer.h b/inc/buffer.h
@@ -69,10 +69,15 @@ buf_cap(Buf b) {
return ((BufHdr_ *)b - 1)->cap;
}
+static inline u64
+buf_refcnt(Buf b) {
+ return ((BufHdr_ *)b - 1)->refcnt;
+}
+
static inline Buf
buf_ref(Buf b) {
BufHdr_ *h = (BufHdr_ *)b - 1;
- if unlikely(h->refcnt == U32_MAX) {
+ if unlikely (h->refcnt == U32_MAX) {
/* XXX: This is a tough situation to be in. For now, we mark the buffer
* as nonfreeable, because surely a memory leak is better than
* spontaneously freeing the buffer. Another option would be to just
@@ -96,5 +101,5 @@ buf_drop(Buf b) {
/* XXX: circular include */
#include "string.h"
-#define buf_slice(b, l, ...) buf_slice_(b, l, VA_DEFAULT(,##__VA_ARGS__, buf_cap(b)))
-Str buf_slice_(Buf b, usize l, usize u);
+Str buf_slice(Buf b, usize l, usize u);
+RcStr buf_slice_rc_ref(Buf b, usize l, usize u);
diff --git a/inc/string.h b/inc/string.h
@@ -11,8 +11,14 @@
#define STR_FMT(s) (assert(str_len(s) <= INT_MAX), (int)str_len(s)), str_bytes(s)
typedef struct str Str;
+typedef struct rc_str RcStr;
struct str {
+ usize len;
+ u8 *data;
+};
+
+struct rc_str {
usize off;
usize len;
u8 *data; /* &buf->data[off] */
@@ -22,51 +28,25 @@ struct str {
#include "buffer.h"
#define str_stack(s, ...) (Str){ \
- .off = 0, \
.len = sizeof(s) - 1, \
- .data = buf_stack(s,##__VA_ARGS__) \
+ .data = (u8[]){s} \
}
#define str_static(s, ...) (Str){ \
- .off = 0, \
.len = sizeof(s) - 1, \
- .data = buf_static(s,##__VA_ARGS__) \
+ .data = s \
}
-int str_printf(Str *str, char *fmt, ...);
-int str_vprintf(Str *str, char *fmt, va_list args);
-
-static inline usize
-str_off(Str s) {
- return s.off;
-}
-
static inline usize
str_len(Str s) {
return s.len;
}
-static inline Buf
-str_buf(Str s) {
- return (void *)(s.data - s.off);
-}
-
static inline u8 *
str_bytes(Str s) {
return s.data;
}
-static inline Str
-str_ref(Str s) {
- buf_ref(str_buf(s));
- return s;
-}
-
-static inline void
-str_drop(Str s) {
- buf_drop(str_buf(s));
-}
-
static inline int
str_cmp(Str s, Str t) {
int d = memcmp(s.data, t.data, MIN(s.len, t.len));
@@ -80,12 +60,49 @@ str_eq(Str s, Str t) {
return s.len == t.len && !memcmp(s.data, t.data, s.len);
}
-#define str_slice(s, l, ...) str_slice_(s, l, VA_DEFAULT(,##__VA_ARGS__, str_len(s)))
-Str str_slice_(Str s, isize l, isize u);
+static inline usize
+str_utf8_decode(rune *c, Str s) {
+ return r_utf8_decode(c, (char *)s.data, s.len);
+}
+
+/* Like sprintf and vsprintf, but for Str's. The result must be freed with
+ * free(str_bytes(*str)). */
+int str_alloc_printf(Str *str, char *fmt, ...);
+int str_alloc_vprintf(Str *str, char *fmt, va_list args);
+
+Str str_slice(Str s, isize l, isize u);
char *str_alloc_cstr(Str s);
static inline usize
-str_utf8_decode(rune *c, Str s) {
- return r_utf8_decode(c, (char *)s.data, s.len);
+rcstr_off(RcStr s) {
+ return s.off;
+}
+
+static inline usize
+rcstr_len(RcStr s) {
+ return s.len;
+}
+
+static inline Buf
+rcstr_buf(RcStr s) {
+ return (void *)(s.data - s.off);
}
+
+static inline u8 *
+rcstr_bytes(RcStr s) {
+ return s.data;
+}
+
+static inline RcStr
+rcstr_ref(RcStr s) {
+ buf_ref(rcstr_buf(s));
+ return s;
+}
+
+static inline void
+rcstr_drop(RcStr s) {
+ buf_drop(rcstr_buf(s));
+}
+
+/* TODO: functions like above, but operating on RcStr's instead of Str's */
diff --git a/src/buffer.c b/src/buffer.c
@@ -79,16 +79,37 @@ buf_ereallocz(Buf *b, usize cap) {
}
Str
-buf_slice_(Buf b, usize l, usize u) {
+buf_slice(Buf b, usize l, usize u) {
+ BufHdr_ *h = (BufHdr_ *)b - 1;
+ assert(!(h->flags & BUF_FLAG_FREE_), "buf_slice: can not take "
+ "non-reference-counted slice of reference-counted Buf");
+
usize cap = buf_cap(b);
if (l < 0) l += cap;
- assert(0 <= l && l <= cap, "buf_slice: l out of bounds");
+ assert(0 <= l && l <= cap, "buf_slice: lower index out of bounds");
if (u < 0) u += cap;
- assert(0 <= u && u <= cap, "buf_slice: u out of bounds");
+ assert(0 <= u && u <= cap, "buf_slice: upper index out of bounds");
return (Str){
+ .len = l < u ? u - l : 0,
+ .data = b + l,
+ };
+}
+
+RcStr
+buf_slice_rc_ref(Buf b, usize l, usize u) {
+ usize cap = buf_cap(b);
+
+ if (l < 0) l += cap;
+ assert(0 <= l && l <= cap, "buf_slice_rc_ref: lower index out of bounds");
+
+ if (u < 0) u += cap;
+ assert(0 <= u && u <= cap, "buf_slice_rc_ref: upper index out of bounds");
+
+ buf_ref(b);
+ return (RcStr){
.off = l,
.len = l < u ? u - l : 0,
.data = b + l,
diff --git a/src/string.c b/src/string.c
@@ -7,16 +7,16 @@
#include "string.h"
int
-str_printf(Str *str, char *fmt, ...) {
+str_alloc_printf(Str *str, char *fmt, ...) {
va_list args;
va_start(args, fmt);
- int ret = str_vprintf(str, fmt, args);
+ int ret = str_alloc_vprintf(str, fmt, args);
va_end(args);
return ret;
}
int
-str_vprintf(Str *str, char *fmt, va_list args) {
+str_alloc_vprintf(Str *str, char *fmt, va_list args) {
/* XXX: Using stdlib printf means we waste 1 byte for null term. */
va_list args2;
@@ -24,32 +24,32 @@ str_vprintf(Str *str, char *fmt, va_list args) {
int ret = vsnprintf(0, 0, fmt, args2);
va_end(args2);
if (ret < 0) return -1;
- usize len = (usize)ret + 1u;
+ usize len = ret;
- Buf buf = buf_alloc(len);
+ usize buflen = len + 1u;
+ char *buf = r_alloc(buflen);
if (!buf) return -1;
- ret = vsnprintf((char *)buf, len, fmt, args);
+ ret = vsnprintf(buf, buflen, fmt, args);
if (ret < 0) {
- buf_drop(buf);
+ free(buf);
return -1;
}
- *str = buf_slice(buf, 0);
+ str->len = len;
+ str->data = (u8 *)buf;
return 0;
-
}
Str
-str_slice_(Str s, isize l, isize u) {
+str_slice(Str s, isize l, isize u) {
if (l < 0) l += s.len;
- assert(0 <= l && l <= s.len, "str_slice: l out of bounds");
+ assert(0 <= l && l <= s.len, "str_slice: lower index out of bounds");
if (u < 0) u += s.len;
- assert(0 <= u && u <= s.len, "str_slice: u out of bounds");
+ assert(0 <= u && u <= s.len, "str_slice: upper index out of bounds");
return (Str){
- .off = s.off + l,
.len = l < u ? u - l : 0,
.data = s.data + l,
};
@@ -57,10 +57,10 @@ str_slice_(Str s, isize l, isize u) {
char *
str_alloc_cstr(Str s) {
- char *c = r_alloc(str_len(s) + 1);
+ char *c = r_alloc(s.len + 1);
if (!c) return 0;
- memcpy(c, str_bytes(s), str_len(s));
- c[str_len(s)] = '\0';
+ memcpy(c, s.data, s.len);
+ c[s.len] = '\0';
return c;
}