Use more functions for explicit surrogate handling (#353)
- add `is_surrogate`, `get_hi_surrogate` and `get_lo_surrogate` - use surrogate functions instead of hard coded computations
This commit is contained in:
parent
1db884b140
commit
3f06c95558
2 changed files with 30 additions and 19 deletions
19
cutils.h
19
cutils.h
|
@ -384,14 +384,29 @@ static inline void dbuf_set_error(DynBuf *s)
|
||||||
int unicode_to_utf8(uint8_t *buf, unsigned int c);
|
int unicode_to_utf8(uint8_t *buf, unsigned int c);
|
||||||
int unicode_from_utf8(const uint8_t *p, int max_len, const uint8_t **pp);
|
int unicode_from_utf8(const uint8_t *p, int max_len, const uint8_t **pp);
|
||||||
|
|
||||||
|
static inline BOOL is_surrogate(uint32_t c)
|
||||||
|
{
|
||||||
|
return (c >> 11) == (0xD800 >> 11); // 0xD800-0xDFFF
|
||||||
|
}
|
||||||
|
|
||||||
static inline BOOL is_hi_surrogate(uint32_t c)
|
static inline BOOL is_hi_surrogate(uint32_t c)
|
||||||
{
|
{
|
||||||
return 54 == (c >> 10); // 0xD800-0xDBFF
|
return (c >> 10) == (0xD800 >> 10); // 0xD800-0xDBFF
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline BOOL is_lo_surrogate(uint32_t c)
|
static inline BOOL is_lo_surrogate(uint32_t c)
|
||||||
{
|
{
|
||||||
return 55 == (c >> 10); // 0xDC00-0xDFFF
|
return (c >> 10) == (0xDC00 >> 10); // 0xDC00-0xDFFF
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint32_t get_hi_surrogate(uint32_t c)
|
||||||
|
{
|
||||||
|
return (c >> 10) - (0x10000 >> 10) + 0xD800;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint32_t get_lo_surrogate(uint32_t c)
|
||||||
|
{
|
||||||
|
return (c & 0x3FF) | 0xDC00;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline uint32_t from_surrogate(uint32_t hi, uint32_t lo)
|
static inline uint32_t from_surrogate(uint32_t hi, uint32_t lo)
|
||||||
|
|
30
quickjs.c
30
quickjs.c
|
@ -3664,10 +3664,9 @@ static int string_buffer_putc(StringBuffer *s, uint32_t c)
|
||||||
{
|
{
|
||||||
if (unlikely(c >= 0x10000)) {
|
if (unlikely(c >= 0x10000)) {
|
||||||
/* surrogate pair */
|
/* surrogate pair */
|
||||||
c -= 0x10000;
|
if (string_buffer_putc16(s, get_hi_surrogate(c)))
|
||||||
if (string_buffer_putc16(s, (c >> 10) + 0xd800))
|
|
||||||
return -1;
|
return -1;
|
||||||
c = (c & 0x3ff) + 0xdc00;
|
c = get_lo_surrogate(c);
|
||||||
}
|
}
|
||||||
return string_buffer_putc16(s, c);
|
return string_buffer_putc16(s, c);
|
||||||
}
|
}
|
||||||
|
@ -3883,9 +3882,8 @@ JSValue JS_NewStringLen(JSContext *ctx, const char *buf, size_t buf_len)
|
||||||
} else if (c <= 0x10FFFF) {
|
} else if (c <= 0x10FFFF) {
|
||||||
p = p_next;
|
p = p_next;
|
||||||
/* surrogate pair */
|
/* surrogate pair */
|
||||||
c -= 0x10000;
|
string_buffer_putc16(b, get_hi_surrogate(c));
|
||||||
string_buffer_putc16(b, (c >> 10) + 0xd800);
|
c = get_lo_surrogate(c);
|
||||||
c = (c & 0x3ff) + 0xdc00;
|
|
||||||
} else {
|
} else {
|
||||||
/* invalid char */
|
/* invalid char */
|
||||||
c = 0xfffd;
|
c = 0xfffd;
|
||||||
|
@ -11508,7 +11506,7 @@ static JSValue JS_ToQuotedString(JSContext *ctx, JSValue val1)
|
||||||
goto fail;
|
goto fail;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
if (c < 32 || is_hi_surrogate(c) || is_lo_surrogate(c)) {
|
if (c < 32 || is_surrogate(c)) {
|
||||||
snprintf(buf, sizeof(buf), "\\u%04x", c);
|
snprintf(buf, sizeof(buf), "\\u%04x", c);
|
||||||
if (string_buffer_puts8(b, buf))
|
if (string_buffer_puts8(b, buf))
|
||||||
goto fail;
|
goto fail;
|
||||||
|
@ -19796,8 +19794,7 @@ static __exception int json_next_token(JSParseState *s)
|
||||||
js_parse_error(s, "Unexpected token '\\x%02x' in JSON", *p);
|
js_parse_error(s, "Unexpected token '\\x%02x' in JSON", *p);
|
||||||
} else {
|
} else {
|
||||||
if (c > 0xFFFF) {
|
if (c > 0xFFFF) {
|
||||||
/* get high surrogate */
|
c = get_hi_surrogate(c);
|
||||||
c = (c >> 10) - (0x10000 >> 10) + 0xD800;
|
|
||||||
}
|
}
|
||||||
js_parse_error(s, "Unexpected token '\\u%04x' in JSON", c);
|
js_parse_error(s, "Unexpected token '\\u%04x' in JSON", c);
|
||||||
}
|
}
|
||||||
|
@ -39555,12 +39552,12 @@ static JSValue js_string_isWellFormed(JSContext *ctx, JSValue this_val,
|
||||||
|
|
||||||
for (i = 0, n = p->len; i < n; i++) {
|
for (i = 0, n = p->len; i < n; i++) {
|
||||||
c = p->u.str16[i];
|
c = p->u.str16[i];
|
||||||
if (c < 0xD800 || c > 0xDFFF)
|
if (!is_surrogate(c))
|
||||||
continue;
|
continue;
|
||||||
if (c > 0xDBFF || i+1 == n)
|
if (is_lo_surrogate(c) || i + 1 == n)
|
||||||
break;
|
break;
|
||||||
c = p->u.str16[++i];
|
c = p->u.str16[++i];
|
||||||
if (c < 0xDC00 || c > 0xDFFF)
|
if (!is_lo_surrogate(c))
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -39597,14 +39594,14 @@ static JSValue js_string_toWellFormed(JSContext *ctx, JSValue this_val,
|
||||||
p = JS_VALUE_GET_STRING(ret);
|
p = JS_VALUE_GET_STRING(ret);
|
||||||
for (i = 0, n = p->len; i < n; i++) {
|
for (i = 0, n = p->len; i < n; i++) {
|
||||||
c = p->u.str16[i];
|
c = p->u.str16[i];
|
||||||
if (c < 0xD800 || c > 0xDFFF)
|
if (!is_surrogate(c))
|
||||||
continue;
|
continue;
|
||||||
if (c > 0xDBFF || i+1 == n) {
|
if (is_lo_surrogate(c) || i + 1 == n) {
|
||||||
p->u.str16[i] = 0xFFFD;
|
p->u.str16[i] = 0xFFFD;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
c = p->u.str16[++i];
|
c = p->u.str16[++i];
|
||||||
if (c < 0xDC00 || c > 0xDFFF)
|
if (!is_lo_surrogate(c))
|
||||||
p->u.str16[--i] = 0xFFFD;
|
p->u.str16[--i] = 0xFFFD;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -46865,8 +46862,7 @@ static JSValue js_global_decodeURI(JSContext *ctx, JSValue this_val,
|
||||||
}
|
}
|
||||||
c = (c << 6) | (c1 & 0x3f);
|
c = (c << 6) | (c1 & 0x3f);
|
||||||
}
|
}
|
||||||
if (c < c_min || c > 0x10FFFF ||
|
if (c < c_min || c > 0x10FFFF || is_surrogate(c)) {
|
||||||
is_hi_surrogate(c) || is_lo_surrogate(c)) {
|
|
||||||
js_throw_URIError(ctx, "malformed UTF-8");
|
js_throw_URIError(ctx, "malformed UTF-8");
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue