rcx

library of miscellaneous bits of C code
git clone git://git.rr3.xyz/rcx
Log | Files | Refs | README | LICENSE

commit 4d33684aaff41f7519b8d750941e7b085695b389
parent 21de801e8d5476c8559240437fec11b3f27ca120
Author: Robert Russell <robertrussell.72001@gmail.com>
Date:   Wed, 15 Mar 2023 22:59:56 -0700

Add buffer and string module

Work in progress. I will add various string functions like
prefix/postfix checking, whitespace triming, searching, etc.
as I need them.

str.h is probably deprecated.

Diffstat:
MMakefile | 6++++--
Mconfig.mk | 2+-
Minc/all.h | 2++
Ainc/buffer.h | 96+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainc/string.h | 82+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/buffer.c | 96+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/string.c | 18++++++++++++++++++
7 files changed, 299 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile @@ -6,10 +6,11 @@ SRC =\ src/alloc.c\ src/bench.c\ src/bits.c\ + src/buffer.c\ src/debug.c\ - src/error.c\ src/log.c\ src/str.c\ + src/string.c\ src/unicode.c\ src/unix.c\ src/utf8.c\ @@ -25,11 +26,12 @@ librcx.a: $(SRC:.c=.o) src/alloc.o: src/alloc.c inc/alloc.h inc/def.h inc/log.h inc/rcx.h inc/internal/util.h config.mk src/bench.o: src/bench.c inc/bench.h inc/def.h inc/log.h inc/rcx.h config.mk src/bits.o: src/bits.c inc/bits.h inc/def.h inc/rcx.h config.mk +src/buffer.o: src/buffer.c inc/alloc.h inc/buffer.h inc/debug.h inc/def.h inc/log.h inc/rcx.h inc/string.h config.mk src/debug.o: src/debug.c inc/debug.h inc/def.h inc/rcx.h config.mk -src/error.o: src/error.c inc/def.h inc/error.h inc/log.h inc/rcx.h config.mk src/log.o: src/log.c inc/def.h inc/log.h inc/rcx.h config.mk src/opt.o: src/opt.c inc/def.h inc/opt.h inc/rcx.h config.mk src/str.o: src/str.c inc/alloc.h inc/debug.h inc/def.h inc/log.h inc/rcx.h inc/str.h config.mk +src/string.o: src/string.c inc/buffer.h inc/debug.h inc/def.h inc/rcx.h inc/string.h inc/utf8.h config.mk src/unicode.o: src/unicode.c inc/def.h inc/rcx.h gen/ucattab.inc config.mk src/unix.o: src/unix.c inc/debug.h inc/def.h inc/rcx.h inc/unix.h config.mk src/utf8.o: src/utf8.c inc/def.h inc/rcx.h inc/utf8.h config.mk diff --git a/config.mk b/config.mk @@ -2,7 +2,7 @@ PREFIX = /usr/local INCPREFIX = $(PREFIX)/include LIBPREFIX = $(PREFIX)/lib -CFLAGS = -O2 -Wall -Iinc +CFLAGS = -O2 -Wall -iquote inc CC = cc AR = ar diff --git a/inc/all.h b/inc/all.h @@ -1,6 +1,7 @@ /* Everything except bench.h */ #include "alloc.h" #include "bits.h" +#include "buffer.h" #include "debug.h" #include "deque.h" #include "error.h" @@ -8,6 +9,7 @@ #include "opt.h" #include "rcx.h" #include "str.h" +#include "string.h" #include "unicode.h" #include "unix.h" #include "utf8.h" diff --git a/inc/buffer.h b/inc/buffer.h @@ -0,0 +1,96 @@ +#pragma once + +#include "alloc.h" +#include "debug.h" +#include "def.h" +#include "log.h" + +/* TODO: Optimization opportunity: Use headers with different cap field sizes + * to reduce waste in the common case of small strings. The flags field can be + * used to encode which header is used. It's probably only worth having + * variants for u8 and usize cap; further distinguishing u16 and u32 doesn't + * really make sense, since eventually the header size becomes negligible. + * We could also use a flag bit to disable the refcnt field. */ + +typedef struct buf_hdr_ BufHdr_; +typedef u8 *Buf; + +enum buf_flags_ { + BUF_FLAG_FREE_ = 1<<0, +}; + +struct buf_hdr_ { + usize cap; + u32 refcnt; + u8 flags; + u8 data[]; +}; + +#define buf_hdr_const_(n) struct { \ + usize cap; \ + u32 refcnt; \ + u8 flags; \ + u8 data[n]; \ + } + +#define buf_stack(s, ...) ((Buf)((BufHdr_ *)&(buf_hdr_const_( \ + VA_DEFAULT(,##__VA_ARGS__, sizeof(s) - 1))){ \ + .cap = VA_DEFAULT(,##__VA_ARGS__, sizeof(s) - 1), \ + .refcnt = 1, \ + .flags = 0, \ + .data = s /* This is an error if s is not a string literal */ \ + } + 1)) + +#define buf_static(s, ...) ((Buf)((BufHdr_ *)STATIC(buf_hdr_const_( \ + VA_DEFAULT(,##__VA_ARGS__, sizeof(s) - 1)), { \ + .cap = VA_DEFAULT(,##__VA_ARGS__, sizeof(s) - 1), \ + .refcnt = 1, \ + .flags = 0, \ + .data = s /* This is an error if s is not a string literal */ \ + }) + 1)) + +Buf buf_memory(void *mem, usize cap); + +Buf buf_alloc(usize cap); +Buf buf_allocz(usize cap); +Buf buf_ealloc(usize cap); +Buf buf_eallocz(usize cap); +int buf_realloc(Buf *b, usize cap); +int buf_reallocz(Buf *b, usize cap); +int buf_erealloc(Buf *b, usize cap); +int buf_ereallocz(Buf *b, usize cap); + +static inline usize +buf_cap(Buf b) { + return ((BufHdr_ *)b - 1)->cap; +} + +static inline Buf +buf_ref(Buf b) { + BufHdr_ *h = (BufHdr_ *)b - 1; + if unlikely(h->refcnt == U32_MAX) { + /* XXX: This is a tough situation to be in. For now, we mark the buffer + * as nonfreeable, because surely a memory leak is better than + * spontaneously freeing the buffer. Another option would be to just + * abort the program. */ + r_errorf("buf_ref: reference count overflow"); + h->flags &= ~BUF_FLAG_FREE_; + } + h->refcnt += 1; + return b; +} + +static inline void +buf_drop(Buf b) { + BufHdr_ *h = (BufHdr_ *)b - 1; + assert(h->refcnt > 0, "buf_drop: no references held"); + h->refcnt -= 1; + if (h->refcnt == 0 && (h->flags & BUF_FLAG_FREE_)) + free(h); +} + +/* XXX: circular include */ +#include "string.h" + +#define buf_slice(b, l, ...) buf_slice_(b, l, VA_DEFAULT(,##__VA_ARGS__, buf_cap(b))) +Str buf_slice_(Buf b, usize l, usize u); diff --git a/inc/string.h b/inc/string.h @@ -0,0 +1,82 @@ +#pragma once + +#include <string.h> + +#include "debug.h" +#include "def.h" +#include "utf8.h" + +typedef struct str Str; + +struct str { + usize off; + usize len; + u8 *data; /* &buf->data[off] */ +}; + +/* XXX: circular include */ +#include "buffer.h" + +#define str_stack(s, ...) (Str){ \ + .off = 0, \ + .len = sizeof(s) - 1, \ + .data = buf_stack(s,##__VA_ARGS__) \ + } + +#define str_static(s, ...) (Str){ \ + .off = 0, \ + .len = sizeof(s) - 1, \ + .data = buf_static(s,##__VA_ARGS__) \ + } + +static inline usize +str_off(Str s) { + return s.off; +} + +static inline usize +str_len(Str s) { + return s.len; +} + +static inline Buf +str_buf(Str s) { + return (void *)(s.data - s.off); +} + +static inline u8 * +str_bytes(Str s) { + return s.data; +} + +static inline Str +str_ref(Str s) { + buf_ref(str_buf(s)); + return s; +} + +static inline void +str_drop(Str s) { + buf_drop(str_buf(s)); +} + +static inline int +str_cmp(Str s, Str t) { + int d = memcmp(s.data, t.data, MIN(s.len, t.len)); + if (d != 0 || s.len == t.len) return d; + else if (s.len < t.len) return -1; + else return 1; +} + +static inline bool +str_eq(Str s, Str t) { + return s.len == t.len && !memcmp(s.data, t.data, s.len); +} + +#define str_slice(s, l, ...) str_slice_(s, l, VA_DEFAULT(,##__VA_ARGS__, str_len(s))) +Str str_slice_(Str s, isize l, isize u); + +static inline usize +str_utf8_decode(rune *c, Str s) { + return r_utf8_decode(c, (char *)s.data, s.len); +} diff --git a/src/buffer.c b/src/buffer.c @@ -0,0 +1,96 @@ +#include "alloc.h" +#include "buffer.h" +#include "debug.h" +#include "rcx.h" + +Buf +buf_memory(void *mem, usize cap) { + require(((uptr)mem & (alignof(BufHdr_) - 1)) == 0, + "buf_memory: bad alignment"); + BufHdr_ *h = mem; + *h = (BufHdr_){cap, 1, 0}; + return h->data; +} + +Buf +buf_alloc(usize cap) { + BufHdr_ *h = r_alloc(sizeof *h + cap); + if (!h) return 0; + *h = (BufHdr_){cap, 1, BUF_FLAG_FREE_}; + return h->data; +} + +Buf +buf_allocz(usize cap) { + BufHdr_ *h = r_allocz(sizeof *h + cap); + if (!h) return 0; + *h = (BufHdr_){cap, 1, BUF_FLAG_FREE_}; + return h->data; +} + +Buf +buf_ealloc(usize cap) { + BufHdr_ *h = r_ealloc(sizeof *h + cap); + *h = (BufHdr_){cap, 1, BUF_FLAG_FREE_}; + return h->data; +} + +Buf +buf_eallocz(usize cap) { + BufHdr_ *h = r_eallocz(sizeof *h + cap); + *h = (BufHdr_){cap, 1, BUF_FLAG_FREE_}; + return h->data; +} + +int +buf_realloc(Buf *b, usize cap) { + BufHdr_ *h = *b ? (BufHdr_ *)*b - 1 : 0; + if (r_realloc(&h, sizeof *h + cap) < 0) return -1; + if (!*b) *h = (BufHdr_){cap, 1, BUF_FLAG_FREE_}; + *b = h->data; + return 0; +} + +int +buf_reallocz(Buf *b, usize cap) { + BufHdr_ *h = *b ? (BufHdr_ *)*b - 1 : 0; + if (r_reallocz(&h, sizeof *h + h->cap, sizeof *h + cap) < 0) return -1; + if (!*b) *h = (BufHdr_){cap, 1, BUF_FLAG_FREE_}; + *b = h->data; + return 0; +} + +int +buf_erealloc(Buf *b, usize cap) { + BufHdr_ *h = *b ? (BufHdr_ *)*b - 1 : 0; + r_erealloc(&h, sizeof *h + cap); + if (!*b) *h = (BufHdr_){cap, 1, BUF_FLAG_FREE_}; + *b = h->data; + return 0; +} + +int +buf_ereallocz(Buf *b, usize cap) { + BufHdr_ *h = *b ? (BufHdr_ *)*b - 1 : 0; + r_ereallocz(&h, sizeof *h + h->cap, sizeof *h + cap); + if (!*b) *h = (BufHdr_){cap, 1, BUF_FLAG_FREE_}; + *b = h->data; + return 0; +} + +Str +buf_slice_(Buf b, usize l, usize u) { + usize cap = buf_cap(b); + + if (l < 0) l += cap; + assert(0 <= l && l <= cap, "buf_slice: l out of bounds"); + + if (u < 0) u += cap; + assert(0 <= u && u <= cap, "buf_slice: u out of bounds"); + + return (Str){ + .off = l, + .len = l < u ? u - l : 0, + .data = b + l, + }; +} diff --git a/src/string.c b/src/string.c @@ -0,0 +1,18 @@ +#include "debug.h" +#include "rcx.h" +#include "string.h" + +Str +str_slice_(Str s, isize l, isize u) { + if (l < 0) l += s.len; + assert(0 <= l && l <= s.len, "str_slice: l out of bounds"); + + if (u < 0) u += s.len; + assert(0 <= u && u <= s.len, "str_slice: u out of bounds"); + + return (Str){ + .off = s.off + l, + .len = l < u ? u - l : 0, + .data = s.data + l, + }; +}