commit bacaaf039091bd29603f4125cbc69c9e834fc71b
parent e167b6dbf0e90d90a749f18c87a15e4718d52869
Author: robert <robertrussell.72001@gmail.com>
Date: Wed, 10 Aug 2022 14:34:31 -0700
Namespace all utf8 functions
Diffstat:
2 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/inc/cext/utf8.h b/inc/cext/utf8.h
@@ -2,13 +2,13 @@
#include "cext/def.h"
-#define UTF8_SIZE 4
+#define CEXT_UTF8_SIZE 4
/* Return the number of bytes needed to encode c, or 0 if c is an invalid
- * codepoint. If s is nonnull, then it must have length >= utf8encode(0, c),
- * which is guaranteed to be at most UTF8_SIZE; in this case, if c is a valid
- * codepoint, then encode c into s. */
-usize utf8encode(char *s, rune c);
+ * codepoint. If s is nonnull, then it must have length >=
+ * cext_utf8_encode(0, c), which is guaranteed to be at most CEXT_UTF8_SIZE;
+ * in this case, if c is a valid codepoint, then encode c into s. */
+usize cext_utf8_encode(char *s, rune c);
/* Decode the first rune in s and return the number of consumed bytes. If this
* succeeds and c is nonnull, then set *c to the decoded rune. Otherwise, no
@@ -18,4 +18,4 @@ usize utf8encode(char *s, rune c);
* - n > 0 iff the first min(n+1,slen) bytes of s are not a prefix of any
* valid rune (but if n < slen, then s[n] might be the first byte of a
* valid rune). */
-usize utf8decode(rune *c, char *s, usize slen);
+usize cext_utf8_decode(rune *c, char *s, usize slen);
diff --git a/src/utf8.c b/src/utf8.c
@@ -10,18 +10,18 @@ static const rune utf8min[] = { 0x0, 0x80, 0x800, 0x10000};
static const rune utf8max[] = {0x7F, 0x7FF, 0xFFFF, 0x10FFFF};
static bool
-utf8overlong(rune c, usize len) {
+utf8_overlong(rune c, usize len) {
return c < utf8min[len-1];
}
static bool
-utf8encodable(rune c) {
+utf8_encodable(rune c) {
return c <= RUNE_MAX && (c < SURROGATE_MIN || c > SURROGATE_MAX);
}
static usize
-utf8len(rune c) {
- if (!utf8encodable(c))
+utf8_len(rune c) {
+ if (!utf8_encodable(c))
return 0;
usize len = 1;
@@ -31,8 +31,8 @@ utf8len(rune c) {
}
usize
-utf8encode(char *s, rune c) {
- usize len = utf8len(c);
+cext_utf8_encode(char *s, rune c) {
+ usize len = utf8_len(c);
if (!s || len == 0)
return len;
@@ -46,7 +46,7 @@ utf8encode(char *s, rune c) {
}
usize
-utf8decode(rune *c, char *s, usize slen) {
+cext_utf8_decode(rune *c, char *s, usize slen) {
if (c)
*c = RUNE_BAD;
@@ -55,11 +55,11 @@ utf8decode(rune *c, char *s, usize slen) {
/* Determine encoded sequence length based on first byte */
usize len = 1;
- for (; len <= UTF8_SIZE; len++) {
+ for (; len <= CEXT_UTF8_SIZE; len++) {
if (((uchar)s[0] & utf8mask[len-1]) == utf8byte[len-1])
break;
}
- if (len > UTF8_SIZE) /* Invalid leading byte? */
+ if (len > CEXT_UTF8_SIZE) /* Invalid leading byte? */
return 1;
/* Decode codepoint */
@@ -73,7 +73,7 @@ utf8decode(rune *c, char *s, usize slen) {
if (len > slen)
return 0; /* Looks good so far, but not enough input */
- if (c && utf8encodable(r) && !utf8overlong(r, len))
+ if (c && utf8_encodable(r) && !utf8_overlong(r, len))
*c = r;
return len;
}