commit 88a9d951aaf1e00b601fc287f9cdfd1aa90d0c45
parent ebec62e8b16d178054c7dcc44bb5748033b99f26
Author: robert <robertrussell.72001@gmail.com>
Date: Mon, 11 Jul 2022 20:08:17 -0700
Initial commit
Diffstat:
15 files changed, 746 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+*.o
+*.a
diff --git a/example/opt.c b/example/opt.c
@@ -0,0 +1,38 @@
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "cext/cext.h"
+#include "cext/opt.h"
+
+int
+main(int argc, char **argv) {
+ opt_ctx opt;
+
+ opt_init(&opt, &argc, argv);
+ while (opt_parse(&opt)) {
+ if (opt.s == 'a') {
+ assert(opt.avail);
+ printf("short a: %s\n", opt_arg(&opt));
+ } else if (opt.s == 'b') {
+ printf("short b\n");
+ } else if (!strcmp(opt.l, "xxx")) {
+ assert(opt.avail);
+ printf("long xxx: %s\n", opt_arg(&opt));
+ } else if (!strcmp(opt.l, "yyy")) {
+ assert(!opt.attached);
+ printf("long yyy\n");
+ } else {
+ if (opt.s)
+ printf("unknown short opt: %c\n", opt.s);
+ else
+ printf("unknown long opt: %s\n", opt.l);
+ exit(1);
+ }
+ }
+
+ printf("\n%d arguments:\n", argc-1);
+ for (size_t i = 1; i < argc; i++)
+ printf("\t%s\n", argv[i]);
+}
diff --git a/include/cext/all.h b/include/cext/all.h
@@ -0,0 +1,6 @@
+#include "cext/alloc.h"
+#include "cext/cext.h"
+#include "cext/log.h"
+#include "cext/opt.h"
+#include "cext/utf8.h"
+#include "cext/vec.h"
diff --git a/include/cext/alloc.h b/include/cext/alloc.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include "cext/def.h"
+
+/* A consistently-named set of memory allocators: {,e}{,re}alloc{,n,f}{,z}
+ * e- => allocation failures are fatal
+ * re- => realloc-style allocator
+ * -n => array allocator (with overflow check)
+ * -f => struct with flexible array member allocator (with overflow check)
+ * -z => new memory initialized to 0.
+ * All these allocators are interoperable with the stdlib allocators. */
+void *alloc(usize size); /* aka malloc */
+void *allocz(usize size);
+void *allocn(usize len, usize size);
+void *allocnz(usize len, usize size); /* aka calloc */
+void *allocf(usize hsize, usize flen, usize fsize);
+void *allocfz(usize hsize, usize flen, usize fsize);
+void *realloc(void *p, usize size);
+void *reallocz(void *p, usize osize, usize nsize);
+void *reallocn(void *p, usize len, usize size);
+void *reallocnz(void *p, usize olen, usize nlen, usize size);
+void *reallocf(void *p, usize hsize, usize flen, usize fsize);
+void *reallocfz(void *p,usize hsize, usize oflen, usize nflen, usize fsize);
+void *ealloc(usize size);
+void *eallocz(usize size);
+void *eallocn(usize len, usize size);
+void *eallocnz(usize len, usize size);
+void *eallocf(usize hsize, usize flen, usize fsize);
+void *eallocfz(usize hsize, usize flen, usize fsize);
+void *erealloc(void *p, usize size);
+void *ereallocz(void *p, usize osize, usize nsize);
+void *ereallocn(void *p, usize len, usize size);
+void *ereallocnz(void *p, usize olen, usize nlen, usize size);
+void *ereallocf(void *p, usize hsize, usize flen, usize fsize);
+void *ereallocfz(void *p, usize hsize, usize oflen, usize nflen, usize fsize);
+
+void free(void *p);
diff --git a/include/cext/cext.h b/include/cext/cext.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <inttypes.h>
+#include <stddef.h>
+
+/* Standard headers that should be part of the language proper */
+#include <stdarg.h>
+#include <stdbool.h>
+#if __STDC_VERSION__ >= 201100L
+#include <stdalign.h>
+#include <stdnoreturn.h>
+#endif
+
+#include "cext/def.h"
diff --git a/include/cext/def.h b/include/cext/def.h
@@ -0,0 +1,104 @@
+#pragma once
+
+#include "stddef.h"
+#include "stdint.h"
+
+#define JOIN_AUX(a,b) a##b
+#define JOIN(a,b) JOIN_AUX(a,b)
+
+#define LEN(a) (sizeof (a) / sizeof (a)[0])
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+
+typedef unsigned char uchar;
+typedef unsigned short ushort;
+typedef unsigned int uint;
+typedef unsigned long ulong;
+typedef long long llong;
+typedef unsigned long long ullong;
+
+#define I8_MIN INT8_MIN
+#define I8_MAX INT8_MAX
+#define I8_C INT8_C
+typedef int8_t i8;
+
+#define I16_MIN INT16_MIN
+#define I16_MAX INT16_MAX
+#define I16_C INT16_C
+typedef int16_t i16;
+
+#define I32_MIN INT32_MIN
+#define I32_MAX INT32_MAX
+#define I32_C INT32_C
+typedef int32_t i32;
+
+#define I64_MIN INT64_MIN
+#define I64_MAX INT64_MAX
+#define I64_C INT64_C
+typedef int64_t i64;
+
+#define IMAX_MIN INTMAX_MIN
+#define IMAX_MAX INTMAX_MAX
+#define IMAX_C INTMAX_C
+typedef intmax_t imax;
+
+#define IPTR_MIN INTPTR_MIN
+#define IPTR_MAX INTPTR_MAX
+typedef intptr_t iptr;
+
+/* typedef ssize_t isize; */
+
+#define U8_MAX UINT8_MAX
+#define U8_C UINT8_C
+typedef uint8_t u8;
+
+#define U16_MAX UINT16_MAX
+#define U16_C UINT16_C
+typedef uint16_t u16;
+
+#define U32_MAX UINT32_MAX
+#define U32_C UINT32_C
+typedef uint32_t u32;
+
+#define U64_MAX UINT64_MAX
+#define U64_C UINT64_C
+typedef uint64_t u64;
+
+#define UMAX_MAX UINTMAX_MAX
+#define UMAX_C UINTMAX_C
+typedef uintmax_t umax;
+
+#define UPTR_MAX UINTPTR_MAX
+typedef uintptr_t uptr;
+
+#define USIZE_MAX SIZE_MAX
+typedef size_t usize;
+
+#ifdef __SIZEOF_INT128__
+#define CEXT_HAVE_128 1
+
+#define I128_MIN ((i128)-1 - I128_MAX)
+#define I128_MAX ((i128)(U128_MAX >> 1))
+typedef __int128 i128;
+
+#define U128_MAX (((u128)U64_MAX << 64) | U64_MAX)
+typedef unsigned __int128 u128;
+
+#endif
+
+#define RUNE_BAD RUNE_C(0xFFFD)
+#define RUNE_MAX RUNE_C(0x10FFFF)
+#define RUNE_C U32_C
+typedef u32 rune;
+
+#if __STDC_VERSION__ >= 201100L
+typedef max_align_t maxalign;
+#else
+/* Fallback which is probably correct */
+typedef struct {
+ intmax_t i; /* biggest integer */
+ long double d; /* biggest floating point */
+ void *p; /* data pointer */
+ void (*f)(void); /* function pointer */
+} maxalign;
+#endif
diff --git a/include/cext/log.h b/include/cext/log.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include <stdbool.h>
+
+#define infof(...) cext_log(__FILE__, __LINE__, 0, "INFO", "\x1b[32m", 0, __VA_ARGS__)
+#define warnf(...) cext_log(__FILE__, __LINE__, 1, "WARN", "\x1b[33m", 0, __VA_ARGS__)
+#define errorf(...) cext_log(__FILE__, __LINE__, 2, "ERROR", "\x1b[31m", 0, __VA_ARGS__)
+#define fatalf(...) cext_log(__FILE__, __LINE__, 3, "FATAL", "\x1b[31m", 1, __VA_ARGS__)
+
+void cext_log_init(int color, bool log_time, bool log_loc, int min_level);
+void cext_log(char *file, int line, int level, char *name, char *color, int code, char *fmt, ...);
diff --git a/include/cext/opt.h b/include/cext/opt.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include <stdbool.h>
+
+typedef struct opt_ctx {
+ char s, *l; /* Short/long opt; l is "" if s != 0 */
+ bool avail; /* Argument for option is available */
+ bool attached; /* Argument for option of form "-oARG" or "--opt=ARG" */
+
+ /* Internal */
+ char *arg;
+ bool arg_used;
+ char *cluster;
+ int *argc;
+ char **o;
+ char **a;
+} opt_ctx;
+
+void opt_init(opt_ctx *opt, int *argc, char **argv);
+bool opt_parse(opt_ctx *opt);
+char *opt_arg(opt_ctx *opt);
diff --git a/include/cext/utf8.h b/include/cext/utf8.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include "cext/def.h"
+
+#define UTF8_SIZE 4
+
+/* Return the number of bytes needed to encode c, or 0 if c is an invalid
+ * codepoint. If s is nonnull, then it must have length >= utf8encode(0, c),
+ * which is guaranteed to be at most UTF8_SIZE; in this case, if c is a valid
+ * codepoint, then encode c into s. */
+usize utf8encode(char *s, rune c);
+
+/* Decode the first rune in s and return the number of consumed bytes. If this
+ * succeeds and c is nonnull, then set *c to the decoded rune. Otherwise, no
+ * valid rune is legally encoded as a prefix of s; in this case, set *c to
+ * RUNE_BAD if c is nonnull, and return n such that
+ * - n = 0 iff s is null or an incomplete prefix of a valid rune;
+ * - n > 0 iff the first min(n+1,slen) bytes of s are not a prefix of any
+ * valid rune (but if n < slen, then s[n] might be the first byte of a
+ * valid rune). */
+usize utf8decode(rune *c, char *s, usize slen);
diff --git a/include/cext/vec.h b/include/cext/vec.h
@@ -0,0 +1,102 @@
+#pragma once
+
+#include <string.h>
+
+#include "cext/alloc.h"
+#include "cext/def.h"
+
+typedef struct {
+ usize len;
+ usize cap;
+ maxalign arr[];
+} vechdr;
+
+#define VECHDR(v) ((vechdr *)(v) - 1)
+
+/* Defaults */
+#define VEC_STATIC
+#define METHOD(name, prefix) JOIN(JOIN(prefix,_),name)
+#define VEC_MIN_CAP 8
+#define VEC_ALLOC ereallocf
+#define VEC_FREE free
+
+#define VEC_DECLARE(T, ...)\
+VEC_STATIC void METHOD(free,##__VA_ARGS__)(T **v); \
+VEC_STATIC usize METHOD(len,##__VA_ARGS__)(T **v); \
+VEC_STATIC usize METHOD(cap,##__VA_ARGS__)(T **v); \
+VEC_STATIC int METHOD(resize,##__VA_ARGS__)(T **v, usize cap); \
+VEC_STATIC int METHOD(reserve,##__VA_ARGS__)(T **v, usize n); \
+VEC_STATIC int METHOD(ins,##__VA_ARGS__)(T **v, usize i, T e); \
+VEC_STATIC int METHOD(push,##__VA_ARGS__)(T **v, T e); \
+VEC_STATIC T METHOD(del,##__VA_ARGS__)(T **v, usize i); \
+VEC_STATIC T METHOD(pop,##__VA_ARGS__)(T **v);
+
+#define VEC_DEFINE(T, ...)\
+void METHOD(free,##__VA_ARGS__)(T **v) { \
+ if (*v) \
+ VEC_FREE(VECHDR(*v)); \
+ *v = 0; \
+} \
+usize METHOD(len,##__VA_ARGS__)(T **v) { \
+ return *v ? VECHDR(*v)->len : 0; \
+} \
+usize METHOD(cap,##__VA_ARGS__)(T **v) { \
+ return *v ? VECHDR(*v)->cap : 0; \
+} \
+int METHOD(resize,##__VA_ARGS__)(T **v, usize cap) { \
+ if (cap == 0) { \
+ METHOD(free,##__VA_ARGS__)(v); \
+ } else { \
+ cap = MAX(cap, VEC_MIN_CAP); \
+ vechdr *h = *v ? VECHDR(*v) : 0; \
+ h = VEC_ALLOC(h, sizeof *h, cap, sizeof (*v)[0]); \
+ if (!h) \
+ return -1; \
+ h->len = MIN(h->len, cap); \
+ h->cap = cap; \
+ *v = (void *)(h + 1); \
+ } \
+ return 0; \
+} \
+int METHOD(reserve,##__VA_ARGS__)(T **v, usize n) { \
+ vechdr *h = *v ? VECHDR(*v) : 0; \
+ usize rem = h ? h->cap - h->len : 0; \
+ if (n > rem) { \
+ usize need = n - rem; \
+ usize cap = h ? h->cap + MAX(h->cap, need) : need; \
+ return METHOD(resize,##__VA_ARGS__)(v, cap); \
+ } else { \
+ return 0; \
+ } \
+} \
+int METHOD(ins,##__VA_ARGS__)(T **v, usize i, T e) { \
+ if (METHOD(reserve,##__VA_ARGS__)(v, 1)) \
+ return -1; \
+ memmove(&(*v)[i+1], &(*v)[i], (VECHDR(*v)->len - i) * sizeof (*v)[0]); \
+ (*v)[i] = e; \
+ VECHDR(*v)->len++; \
+ return 0; \
+} \
+int METHOD(push,##__VA_ARGS__)(T **v, T e) { \
+ return METHOD(ins,##__VA_ARGS__)(v, METHOD(len,##__VA_ARGS__)(v), e); \
+} \
+T METHOD(del,##__VA_ARGS__)(T **v, usize i) { \
+ T e = (*v)[i]; \
+ memmove(&(*v)[i], &(*v)[i+1], (VECHDR(*v)->len - i - 1) * sizeof (*v)[0]); \
+ VECHDR(*v)->len--; \
+ return e; \
+} \
+T METHOD(pop,##__VA_ARGS__)(T **v) { \
+ return METHOD(del,##__VA_ARGS__)(v, VECHDR(*v)->len - 1); \
+}
+
+/* TODO?
+insn/insnz
+deln
+clr => set length to 0 without resizing
+dup => duplicate/clone vector
+optionally take cmp function and define:
+ sort => qsort wrapper
+ bsearch => bsearch wrapper
+ lsearch => linear search on unsorted array
+*/
diff --git a/src/alloc.c b/src/alloc.c
@@ -0,0 +1,107 @@
+#include <stdlib.h>
+#include <string.h>
+
+#include "cext/cext.h"
+#include "cext/alloc.h"
+#include "cext/log.h"
+
+/* If s1 < MUL_NO_OVERFLOW and s2 < MUL_NO_OVERFLOW, then s1*s2 <= USIZE_MAX
+ * (but not conversely). This lets us avoid division in overflow checks. */
+#define MUL_NO_OVERFLOW ((usize) 1 << (sizeof(usize) * 4))
+
+static bool
+mul_will_overflow(usize a, usize b) {
+ return (a >= MUL_NO_OVERFLOW || b >= MUL_NO_OVERFLOW)
+ && a > 0 && USIZE_MAX/a < b;
+}
+
+#define FAIL_OVERFLOW() fatalf("allocation failure: overflow")
+
+/* Compute a + b*c with overflow checks. */
+static usize
+addmul(usize a, usize b, usize c) {
+ if (mul_will_overflow(b, c)) FAIL_OVERFLOW();
+ usize bc = b * c;
+ if (a > USIZE_MAX - bc) FAIL_OVERFLOW();
+ return a + bc;
+}
+
+void *
+alloc(usize size) {
+ return malloc(size);
+}
+
+void *
+allocz(usize size) {
+ return calloc(1, size);
+}
+
+void *
+allocn(usize len, usize size) {
+ return allocf(0, len, size);
+}
+
+void *
+allocnz(usize len, usize size) {
+ return calloc(len, size);
+}
+
+void *
+allocf(usize hsize, usize flen, usize fsize) {
+ return alloc(addmul(hsize, flen, fsize));
+}
+
+void *
+allocfz(usize hsize, usize flen, usize fsize) {
+ return allocz(addmul(hsize, flen, fsize));
+}
+
+void *
+reallocz(void *p, usize osize, usize nsize) {
+ p = realloc(p, nsize);
+ if (p && nsize > osize)
+ memset((char *) p + osize, 0, nsize - osize);
+ return p;
+}
+
+void *
+reallocn(void *p, usize len, usize size) {
+ return reallocf(p, 0, len, size);
+}
+
+void *
+reallocnz(void *p, usize olen, usize nlen, usize size) {
+ return reallocfz(p, 0, olen, nlen, size);
+}
+
+void *
+reallocf(void *p, usize hsize, usize flen, usize fsize) {
+ return realloc(p, addmul(hsize, flen, fsize));
+}
+
+void *
+reallocfz(void *p, usize hsize, usize oflen, usize nflen, usize fsize) {
+ return reallocz(p, hsize + oflen*fsize, addmul(hsize, nflen, fsize));
+}
+
+#define EALLOC(name, ...)\
+ void *e##name(__VA_ARGS__) {\
+ void *q = name(EALLOC_AUX
+#define EALLOC_AUX(...)\
+ __VA_ARGS__);\
+ if (!q) fatalf("allocation failure");\
+ return q;\
+ }
+
+EALLOC(alloc, usize size)(size)
+EALLOC(allocz, usize size)(size)
+EALLOC(allocn, usize len, usize size)(len, size)
+EALLOC(allocnz, usize len, usize size)(len, size)
+EALLOC(allocf, usize hsize, usize flen, usize fsize)(hsize, flen, fsize)
+EALLOC(allocfz, usize hsize, usize flen, usize fsize)(hsize, flen, fsize)
+EALLOC(realloc, void *p, usize size)(p, size)
+EALLOC(reallocz, void *p, usize osize, usize nsize)(p, osize, nsize)
+EALLOC(reallocn, void *p, usize len, usize size)(p, len, size)
+EALLOC(reallocnz, void *p, usize olen, usize nlen, usize size)(p, olen, nlen, size)
+EALLOC(reallocf, void *p, usize hsize, usize flen, usize fsize)(p, hsize, flen, fsize)
+EALLOC(reallocfz, void *p, usize hsize, usize oflen, usize nflen, usize fsize)(p, hsize, oflen, nflen, fsize)
diff --git a/src/log.c b/src/log.c
@@ -0,0 +1,79 @@
+#define _POSIX_C_SOURCE 199506L
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "cext/cext.h"
+#include "cext/log.h"
+
+#define ISO8601_SIZE (sizeof "YYYY-MM-DDThh:mm:ssZ")
+#define SGR(c) (use_color ? (c) : "")
+#define RESET "\x1b[m"
+#define BOLD "\x1b[1m"
+#define FAINT "\x1b[2m"
+
+static bool use_color = false;
+static bool log_time = false;
+static bool log_loc = false;
+static int min_level = 0;
+
+void
+cext_log_init(int color, bool log_time_, bool log_loc_, int min_level_) {
+ if (color > 0) { /* force on */
+ use_color = true;
+ } else if (color < 0) { /* force off */
+ use_color = false;
+ } else { /* detect */
+ char *no_color = getenv("NO_COLOR"); /* https://no-color.org */
+ use_color = isatty(fileno(stderr))
+ && !(no_color && no_color[0] != '\0');
+ }
+ log_time = log_time_;
+ log_loc = log_loc_;
+ min_level = min_level_;
+}
+
+static char *
+iso8601(char *buf, time_t t) {
+ struct tm tm;
+ gmtime_r(&t, &tm);
+ if (!strftime(buf, ISO8601_SIZE, "%Y-%m-%dT%H:%M:%SZ", &tm))
+ buf[0] = '\0'; /* strftime buffer contents are undefined on failure */
+ return buf;
+}
+
+void
+cext_log(
+ char *file, int line,
+ int level, char *name, char *color,
+ int code,
+ char *fmt, ...
+) {
+ if (level < min_level)
+ return;
+
+ char stamp[ISO8601_SIZE];
+ if (log_time)
+ iso8601(stamp, time(0)); /* Do this before locking stderr */
+
+ /* Lock stderr so other threads' IO does not get interleaved with ours. */
+ flockfile(stderr);
+
+ if (log_time)
+ fprintf(stderr, "%s%s%s ", SGR(FAINT), stamp, SGR(RESET));
+ fprintf(stderr, "%s%s%-5s%s ", SGR(BOLD), SGR(color), name, SGR(RESET));
+ if (log_loc && file)
+ fprintf(stderr, "%s%s:%d%s ", SGR(FAINT), file, line, SGR(RESET));
+
+ va_list args;
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+ fputc('\n', stderr);
+
+ funlockfile(stderr);
+
+ if (code)
+ exit(code);
+}
diff --git a/src/opt.c b/src/opt.c
@@ -0,0 +1,87 @@
+#include <assert.h>
+#include <string.h>
+
+#include "cext/cext.h"
+#include "cext/opt.h"
+
+void
+opt_init(opt_ctx *opt, int *argc, char **argv) {
+ opt->arg_used = false;
+ opt->cluster = 0;
+ opt->argc = argc;
+ opt->o = &argv[1];
+ opt->a = &argv[1];
+}
+
+bool
+opt_parse(opt_ctx *opt) {
+ if (opt->arg_used) {
+ if (opt->attached) {
+ opt->cluster = 0;
+ } else {
+ opt->o++;
+ (*opt->argc)--;
+ }
+ opt->arg_used = false;
+ }
+
+ if (opt->cluster) {
+ if ((opt->s = *++opt->cluster))
+ goto found_opt;
+ opt->cluster = 0;
+ }
+
+ bool skip = false;
+ for (; *opt->o; opt->o++) {
+ if (skip || (*opt->o)[0] != '-' || (*opt->o)[1] == '\0') {
+ /* Got an argument */
+ *opt->a++ = *opt->o;
+ continue;
+ }
+ (*opt->argc)--;
+ if ((*opt->o)[1] == '-' && (*opt->o)[2] == '\0') {
+ /* Got "--", so everything else is an argument */
+ skip = true;
+ continue;
+ }
+ /* Got an option */
+ if ((*opt->o)[1] == '-') { /* Long */
+ opt->s = 0;
+ opt->l = &(*opt->o)[2];
+ } else { /* Short */
+ opt->cluster = &(*opt->o)[1];
+ opt->s = *opt->cluster;
+ opt->l = "";
+ }
+ opt->o++;
+ goto found_opt;
+ }
+
+ /* End of option parsing */
+ *opt->a = 0;
+ return false;
+
+found_opt:
+ /* Find argument for option */
+ opt->arg = 0;
+ if ((opt->arg = strchr(opt->l, '='))) { /* "--opt=ARG" */
+ *opt->arg++ = '\0'; /* Null-terminate opt->l */
+ opt->attached = true;
+ } else if (opt->s && opt->cluster[1] != '\0') { /* "-oARG" */
+ opt->arg = &opt->cluster[1];
+ opt->attached = true;
+ } else { /* "-o ARG" or "--opt ARG" or nothing */
+ opt->arg = *opt->o;
+ opt->attached = false;
+ }
+ opt->avail = !!opt->arg;
+
+ return true;
+}
+
+char *
+opt_arg(opt_ctx *opt) {
+ assert(opt->avail);
+ opt->arg_used = true;
+ return opt->arg;
+}
diff --git a/src/str.c b/src/str.c
@@ -0,0 +1,39 @@
+/* TODO */
+
+int
+vaprintf(char **s, const char *fmt, va_list args) {
+ va_list args2;
+ va_copy(args2, args);
+ int len = vsnprintf(0, 0, fmt, args2);
+ va_end(args2);
+ if (len < 0)
+ return len;
+
+ char *buf = alloc(len+1);
+ if (!buf)
+ return -1;
+
+ int ret = vsnprintf(buf, len+1, fmt, args);
+ if (ret < 0)
+ free(buf);
+ else
+ *s = buf;
+
+ return ret;
+}
+
+int
+aprintf(char **s, const char *fmt, ...) {
+ va_list args;
+ va_start(args, fmt);
+ int ret = vaprintf(s, fmt, args);
+ va_end(args);
+ return ret;
+}
+
+char *
+estrdup(char *s) {
+ char *dup = strdup(s);
+ if (!dup) CEXT_ALLOC_FAIL(false);
+ return dup;
+}
diff --git a/src/utf8.c b/src/utf8.c
@@ -0,0 +1,78 @@
+#include "cext/cext.h"
+#include "cext/utf8.h"
+
+#define SURROGATE_MIN 0xD800
+#define SURROGATE_MAX 0xDFFF
+
+static const uchar utf8byte[] = { 0x0, 0xC0, 0xE0, 0xF0};
+static const uchar utf8mask[] = {0x80, 0xE0, 0xF0, 0xF8};
+static const rune utf8min[] = { 0x0, 0x80, 0x800, 0x10000};
+static const rune utf8max[] = {0x7F, 0x7FF, 0xFFFF, 0x10FFFF};
+
+static bool
+utf8overlong(rune c, usize len) {
+ return c < utf8min[len-1];
+}
+
+static bool
+utf8encodable(rune c) {
+ return c <= RUNE_MAX && (c < SURROGATE_MIN || c > SURROGATE_MAX);
+}
+
+static usize
+utf8len(rune c) {
+ if (!utf8encodable(c))
+ return 0;
+
+ usize len = 1;
+ while (c > utf8max[len-1])
+ len++;
+ return len;
+}
+
+usize
+utf8encode(char *s, rune c) {
+ usize len = utf8len(c);
+ if (!s || len == 0)
+ return len;
+
+ for (usize i = len-1; i > 0; i--) { /* Continuation bytes */
+ ((uchar *)s)[i] = 0x80 | (c & 0x3F);
+ c >>= 6;
+ }
+ ((uchar *)s)[0] = utf8byte[len-1] | (uchar)c; /* Leading byte */
+
+ return len;
+}
+
+usize
+utf8decode(rune *c, char *s, usize slen) {
+ if (c)
+ *c = RUNE_BAD;
+
+ if (!s || slen == 0) /* No input? */
+ return 0;
+
+ /* Determine encoded sequence length based on first byte */
+ usize len = 1;
+ for (; len <= UTF8_SIZE; len++) {
+ if (((uchar)s[0] & utf8mask[len-1]) == utf8byte[len-1])
+ break;
+ }
+ if (len > UTF8_SIZE) /* Invalid leading byte? */
+ return 1;
+ if (len > slen) /* Not enough input? */
+ return 0;
+
+ /* Decode codepoint */
+ rune r = (uchar)s[0] & ~utf8mask[len-1];
+ for (usize i = 1; i < len; i++) {
+ if (((uchar)s[i] & 0xC0) != 0x80) /* Invalid continuation byte? */
+ return i;
+ r = (r << 6) | ((uchar)s[i] & 0x3F);
+ }
+
+ if (c && utf8encodable(r) && !utf8overlong(r, len))
+ *c = r;
+ return len;
+}