Use more functions for explicit surrogate handling (#353)
- add `is_surrogate`, `get_hi_surrogate` and `get_lo_surrogate` - use surrogate functions instead of hard coded computations
This commit is contained in:
parent
1db884b140
commit
3f06c95558
2 changed files with 30 additions and 19 deletions
19
cutils.h
19
cutils.h
|
@ -384,14 +384,29 @@ static inline void dbuf_set_error(DynBuf *s)
|
|||
int unicode_to_utf8(uint8_t *buf, unsigned int c);
|
||||
int unicode_from_utf8(const uint8_t *p, int max_len, const uint8_t **pp);
|
||||
|
||||
static inline BOOL is_surrogate(uint32_t c)
|
||||
{
|
||||
return (c >> 11) == (0xD800 >> 11); // 0xD800-0xDFFF
|
||||
}
|
||||
|
||||
static inline BOOL is_hi_surrogate(uint32_t c)
|
||||
{
|
||||
return 54 == (c >> 10); // 0xD800-0xDBFF
|
||||
return (c >> 10) == (0xD800 >> 10); // 0xD800-0xDBFF
|
||||
}
|
||||
|
||||
static inline BOOL is_lo_surrogate(uint32_t c)
|
||||
{
|
||||
return 55 == (c >> 10); // 0xDC00-0xDFFF
|
||||
return (c >> 10) == (0xDC00 >> 10); // 0xDC00-0xDFFF
|
||||
}
|
||||
|
||||
static inline uint32_t get_hi_surrogate(uint32_t c)
|
||||
{
|
||||
return (c >> 10) - (0x10000 >> 10) + 0xD800;
|
||||
}
|
||||
|
||||
static inline uint32_t get_lo_surrogate(uint32_t c)
|
||||
{
|
||||
return (c & 0x3FF) | 0xDC00;
|
||||
}
|
||||
|
||||
static inline uint32_t from_surrogate(uint32_t hi, uint32_t lo)
|
||||
|
|
30
quickjs.c
30
quickjs.c
|
@ -3664,10 +3664,9 @@ static int string_buffer_putc(StringBuffer *s, uint32_t c)
|
|||
{
|
||||
if (unlikely(c >= 0x10000)) {
|
||||
/* surrogate pair */
|
||||
c -= 0x10000;
|
||||
if (string_buffer_putc16(s, (c >> 10) + 0xd800))
|
||||
if (string_buffer_putc16(s, get_hi_surrogate(c)))
|
||||
return -1;
|
||||
c = (c & 0x3ff) + 0xdc00;
|
||||
c = get_lo_surrogate(c);
|
||||
}
|
||||
return string_buffer_putc16(s, c);
|
||||
}
|
||||
|
@ -3883,9 +3882,8 @@ JSValue JS_NewStringLen(JSContext *ctx, const char *buf, size_t buf_len)
|
|||
} else if (c <= 0x10FFFF) {
|
||||
p = p_next;
|
||||
/* surrogate pair */
|
||||
c -= 0x10000;
|
||||
string_buffer_putc16(b, (c >> 10) + 0xd800);
|
||||
c = (c & 0x3ff) + 0xdc00;
|
||||
string_buffer_putc16(b, get_hi_surrogate(c));
|
||||
c = get_lo_surrogate(c);
|
||||
} else {
|
||||
/* invalid char */
|
||||
c = 0xfffd;
|
||||
|
@ -11508,7 +11506,7 @@ static JSValue JS_ToQuotedString(JSContext *ctx, JSValue val1)
|
|||
goto fail;
|
||||
break;
|
||||
default:
|
||||
if (c < 32 || is_hi_surrogate(c) || is_lo_surrogate(c)) {
|
||||
if (c < 32 || is_surrogate(c)) {
|
||||
snprintf(buf, sizeof(buf), "\\u%04x", c);
|
||||
if (string_buffer_puts8(b, buf))
|
||||
goto fail;
|
||||
|
@ -19796,8 +19794,7 @@ static __exception int json_next_token(JSParseState *s)
|
|||
js_parse_error(s, "Unexpected token '\\x%02x' in JSON", *p);
|
||||
} else {
|
||||
if (c > 0xFFFF) {
|
||||
/* get high surrogate */
|
||||
c = (c >> 10) - (0x10000 >> 10) + 0xD800;
|
||||
c = get_hi_surrogate(c);
|
||||
}
|
||||
js_parse_error(s, "Unexpected token '\\u%04x' in JSON", c);
|
||||
}
|
||||
|
@ -39555,12 +39552,12 @@ static JSValue js_string_isWellFormed(JSContext *ctx, JSValue this_val,
|
|||
|
||||
for (i = 0, n = p->len; i < n; i++) {
|
||||
c = p->u.str16[i];
|
||||
if (c < 0xD800 || c > 0xDFFF)
|
||||
if (!is_surrogate(c))
|
||||
continue;
|
||||
if (c > 0xDBFF || i+1 == n)
|
||||
if (is_lo_surrogate(c) || i + 1 == n)
|
||||
break;
|
||||
c = p->u.str16[++i];
|
||||
if (c < 0xDC00 || c > 0xDFFF)
|
||||
if (!is_lo_surrogate(c))
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -39597,14 +39594,14 @@ static JSValue js_string_toWellFormed(JSContext *ctx, JSValue this_val,
|
|||
p = JS_VALUE_GET_STRING(ret);
|
||||
for (i = 0, n = p->len; i < n; i++) {
|
||||
c = p->u.str16[i];
|
||||
if (c < 0xD800 || c > 0xDFFF)
|
||||
if (!is_surrogate(c))
|
||||
continue;
|
||||
if (c > 0xDBFF || i+1 == n) {
|
||||
if (is_lo_surrogate(c) || i + 1 == n) {
|
||||
p->u.str16[i] = 0xFFFD;
|
||||
continue;
|
||||
}
|
||||
c = p->u.str16[++i];
|
||||
if (c < 0xDC00 || c > 0xDFFF)
|
||||
if (!is_lo_surrogate(c))
|
||||
p->u.str16[--i] = 0xFFFD;
|
||||
}
|
||||
|
||||
|
@ -46865,8 +46862,7 @@ static JSValue js_global_decodeURI(JSContext *ctx, JSValue this_val,
|
|||
}
|
||||
c = (c << 6) | (c1 & 0x3f);
|
||||
}
|
||||
if (c < c_min || c > 0x10FFFF ||
|
||||
is_hi_surrogate(c) || is_lo_surrogate(c)) {
|
||||
if (c < c_min || c > 0x10FFFF || is_surrogate(c)) {
|
||||
js_throw_URIError(ctx, "malformed UTF-8");
|
||||
goto fail;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue