rcx

library of miscellaneous bits of C code
git clone git://git.rr3.xyz/rcx
Log | Files | Refs | README | LICENSE

commit 6d04c0b296ccd15dc0b14aba620df298e4e4c28a
parent cd697484ae8b6cd0a3fcc3f808543df606d7004c
Author: robert <robertrussell.72001@gmail.com>
Date:   Wed, 13 Jul 2022 16:59:43 -0700

Fix utf8decode

utf8decode should, as documented, return 0 iff all of the input string
is a prefix of a valid rune.

Diffstat:
Msrc/utf8.c | 7++++---
1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/utf8.c b/src/utf8.c @@ -61,16 +61,17 @@ utf8decode(rune *c, char *s, usize slen) { } if (len > UTF8_SIZE) /* Invalid leading byte? */ return 1; - if (len > slen) /* Not enough input? */ - return 0; /* Decode codepoint */ rune r = (uchar)s[0] & ~utf8mask[len-1]; - for (usize i = 1; i < len; i++) { + usize l = MIN(len, slen); + for (usize i = 1; i < l; i++) { if (((uchar)s[i] & 0xC0) != 0x80) /* Invalid continuation byte? */ return i; r = (r << 6) | ((uchar)s[i] & 0x3F); } + if (len > slen) + return 0; /* Looks good so far, but not enough input */ if (c && utf8encodable(r) && !utf8overlong(r, len)) *c = r;