commit 6d04c0b296ccd15dc0b14aba620df298e4e4c28a
parent cd697484ae8b6cd0a3fcc3f808543df606d7004c
Author: robert <robertrussell.72001@gmail.com>
Date: Wed, 13 Jul 2022 16:59:43 -0700
Fix utf8decode
utf8decode should, as documented, return 0 iff all of the input string
is a prefix of a valid rune.
Diffstat:
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/src/utf8.c b/src/utf8.c
@@ -61,16 +61,17 @@ utf8decode(rune *c, char *s, usize slen) {
}
if (len > UTF8_SIZE) /* Invalid leading byte? */
return 1;
- if (len > slen) /* Not enough input? */
- return 0;
/* Decode codepoint */
rune r = (uchar)s[0] & ~utf8mask[len-1];
- for (usize i = 1; i < len; i++) {
+ usize l = MIN(len, slen);
+ for (usize i = 1; i < l; i++) {
if (((uchar)s[i] & 0xC0) != 0x80) /* Invalid continuation byte? */
return i;
r = (r << 6) | ((uchar)s[i] & 0x3F);
}
+ if (len > slen)
+ return 0; /* Looks good so far, but not enough input */
if (c && utf8encodable(r) && !utf8overlong(r, len))
*c = r;