Improve JSON parser conformity (#303)

- add JSON specific parsers for strings and numbers
- update JSON parse error messages
- fix `JSON.stringify` handling of boxed objects
- parse Flags in v8 mjsunit test files
- update v8.txt
This commit is contained in:
Charlie Gordon 2024-03-14 08:19:11 +01:00 committed by GitHub
parent 761ad7856f
commit 45f8dc247c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 150 additions and 66 deletions

155
quickjs.c
View file

@ -18645,11 +18645,6 @@ static __exception int js_parse_string(JSParseState *s, int sep,
goto invalid_char;
c = *p;
if (c < 0x20) {
if (!s->cur_func) {
if (do_throw)
js_parse_error(s, "invalid character in a JSON string");
goto fail;
}
if (sep == '`') {
if (c == '\r') {
if (p[1] == '\n')
@ -18699,9 +18694,7 @@ static __exception int js_parse_string(JSParseState *s, int sep,
continue;
default:
if (c >= '0' && c <= '9') {
if (!s->cur_func)
goto invalid_escape; /* JSON case */
if (!(s->cur_func->js_mode & JS_MODE_STRICT) && sep != '`')
if (s->cur_func && !(s->cur_func->js_mode & JS_MODE_STRICT) && sep != '`')
goto parse_escape;
if (c == '0' && !(p[1] >= '0' && p[1] <= '9')) {
p++;
@ -18711,10 +18704,9 @@ static __exception int js_parse_string(JSParseState *s, int sep,
/* Note: according to ES2021, \8 and \9 are not
accepted in strict mode or in templates. */
goto invalid_escape;
} else {
}
if (do_throw)
js_parse_error(s, "octal escape sequences are not allowed in strict mode");
}
goto fail;
}
} else if (c >= 0x80) {
@ -18731,10 +18723,7 @@ static __exception int js_parse_string(JSParseState *s, int sep,
parse_escape:
ret = lre_parse_escape(&p, TRUE);
if (ret == -1) {
invalid_escape:
if (do_throw)
js_parse_error(s, "malformed escape sequence in string literal");
goto fail;
goto invalid_escape;
} else if (ret < 0) {
/* ignore the '\' (could output a warning) */
p++;
@ -18764,6 +18753,10 @@ static __exception int js_parse_string(JSParseState *s, int sep,
if (do_throw)
js_parse_error(s, "invalid UTF-8 sequence");
goto fail;
invalid_escape:
if (do_throw)
js_parse_error(s, "malformed escape sequence in string literal");
goto fail;
invalid_char:
if (do_throw)
js_parse_error(s, "unexpected end of string");
@ -19439,6 +19432,107 @@ static __exception int next_token(JSParseState *s)
return -1;
}
static int json_parse_string(JSParseState *s, const uint8_t **pp)
{
const uint8_t *p = *pp;
int ret, i;
uint32_t c;
StringBuffer b_s, *b = &b_s;
if (string_buffer_init(s->ctx, b, 32))
goto fail;
for(;;) {
if (p >= s->buf_end) {
js_parse_error(s, "Unexpected end of JSON input");
goto fail;
}
c = *p++;
if (c == '"')
break;
if (c < 0x20) {
js_parse_error(s, "Bad control character in string literal in JSON");
goto fail;
}
if (c == '\\') {
c = *p++;
switch(c) {
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
case '\"': break;
case '\\': break;
case '/': break; /* for v8 compatibility */
case 'u':
c = 0;
for(i = 0; i < 4; i++) {
int h = from_hex(*p++);
if (h < 0)
goto invalid_escape;
c = (c << 4) | h;
}
break;
default:
invalid_escape:
js_parse_error(s, "Bad escaped character in JSON");
goto fail;
}
}
if (string_buffer_putc(b, c))
goto fail;
}
s->token.val = TOK_STRING;
s->token.u.str.sep = '"';
s->token.u.str.str = string_buffer_end(b);
*pp = p;
return 0;
fail:
string_buffer_free(b);
return -1;
}
static int json_parse_number(JSParseState *s, const uint8_t **pp)
{
const uint8_t *p = *pp;
const uint8_t *p_start = p;
if (*p == '+' || *p == '-')
p++;
if (!is_digit(*p))
return js_parse_error(s, "Unexpected token '%c'", *p_start);
if (p[0] == '0' && is_digit(p[1]))
return js_parse_error(s, "Unexpected number in JSON");
while (is_digit(*p))
p++;
if (*p == '.') {
p++;
if (!is_digit(*p))
return js_parse_error(s, "Unterminated fractional number in JSON");
while (is_digit(*p))
p++;
}
if (*p == 'e' || *p == 'E') {
p++;
if (*p == '+' || *p == '-')
p++;
if (!is_digit(*p))
return js_parse_error(s, "Exponent part is missing a number in JSON");
while (is_digit(*p))
p++;
}
s->token.val = TOK_NUMBER;
s->token.u.num.val = js_float64(strtod((const char *)p_start, NULL));
*pp = p;
return 0;
}
/* 'c' is the first character. Return JS_ATOM_NULL in case of error */
static JSAtom json_parse_ident(JSParseState *s, const uint8_t **pp, int c)
{
@ -19506,7 +19600,8 @@ static __exception int json_next_token(JSParseState *s)
/* JSON does not accept single quoted strings */
goto def_token;
case '\"':
if (js_parse_string(s, c, TRUE, p + 1, &s->token, &p))
p++;
if (json_parse_string(s, &p))
goto fail;
break;
case '\r': /* accept DOS and MAC newline sequences */
@ -19574,13 +19669,8 @@ static __exception int json_next_token(JSParseState *s)
case '9':
/* number */
parse_number:
{
JSValue ret = js_atof(s->ctx, (const char *)p, (const char **)&p, 10, 0);
if (JS_IsException(ret))
if (json_parse_number(s, &p))
goto fail;
s->token.val = TOK_NUMBER;
s->token.u.num.val = ret;
}
break;
default:
if (c >= 128) {
@ -42719,30 +42809,22 @@ static int js_json_to_str(JSContext *ctx, JSONStringifyContext *jsc,
tab = JS_UNDEFINED;
prop = JS_UNDEFINED;
switch (JS_VALUE_GET_NORM_TAG(val)) {
case JS_TAG_OBJECT:
if (JS_IsObject(val)) {
p = JS_VALUE_GET_OBJ(val);
cl = p->class_id;
if (cl == JS_CLASS_STRING) {
val = JS_ToStringFree(ctx, val);
if (JS_IsException(val))
goto exception;
val = JS_ToQuotedStringFree(ctx, val);
if (JS_IsException(val))
goto exception;
return string_buffer_concat_value_free(jsc->b, val);
goto concat_primitive;
} else if (cl == JS_CLASS_NUMBER) {
val = JS_ToNumberFree(ctx, val);
if (JS_IsException(val))
goto exception;
return string_buffer_concat_value_free(jsc->b, val);
} else if (cl == JS_CLASS_BOOLEAN) {
ret = string_buffer_concat_value(jsc->b, p->u.object_data);
JS_FreeValue(ctx, val);
return ret;
} else if (cl == JS_CLASS_BIG_INT) {
JS_ThrowTypeError(ctx, "BigInt are forbidden in JSON.stringify");
goto exception;
goto concat_primitive;
} else if (cl == JS_CLASS_BOOLEAN || cl == JS_CLASS_BIG_INT) {
set_value(ctx, &val, js_dup(p->u.object_data));
goto concat_primitive;
}
v = js_array_includes(ctx, jsc->stack, 1, &val);
if (JS_IsException(v))
@ -42855,6 +42937,9 @@ static int js_json_to_str(JSContext *ctx, JSONStringifyContext *jsc,
JS_FreeValue(ctx, indent1);
JS_FreeValue(ctx, prop);
return 0;
}
concat_primitive:
switch (JS_VALUE_GET_NORM_TAG(val)) {
case JS_TAG_STRING:
val = JS_ToQuotedStringFree(ctx, val);
if (JS_IsException(val))

32
v8.js
View file

@ -39,10 +39,28 @@ for (const file of files) {
if (source.includes('Realm.create()')) continue // TODO support Realm object
if (source.includes('// MODULE')) continue // TODO support modules
if (source.includes('// Files:')) continue // TODO support includes
// the default --stack-size is necessary to keep output of stack overflowing
// tests stable; it will be overridden by a Flags comment
let flags = { '--stack-size': 2048 }, flagstr = ""
// parse command line flags
for (let s of source.matchAll(/\/\/ Flags:(.+)/g)) {
for (let m of s[1].matchAll(/\s*([\S]+)/g)) {
const v = m[1].match(/([\S]+)=([\S]+)/)
if (v) {
flags[v[1]] = v[2]
flagstr += ` ${v[1]}=${v[2]}`
} else {
flags[m[1]] = true
flagstr += ` ${m[1]}`
}
}
}
// exclude tests that use V8 intrinsics like %OptimizeFunctionOnNextCall
if (source.includes ("--allow-natives-syntax")) continue
if (flags["--allow-natives-syntax"]) continue
// exclude tests that use V8 extensions
if (source.includes ("--expose-externalize-string")) continue
if (flags["--expose-externalize-string"]) continue
// parse environment variables
let env = {}, envstr = ""
for (let s of source.matchAll(/environment variables:(.+)/ig)) {
for (let m of s[1].matchAll(/\s*([\S]+)=([\S]+)/g)) {
@ -50,11 +68,13 @@ for (const file of files) {
envstr += ` ${m[1]}=${m[2]}`
}
}
//print(`=== ${file}${envstr}${flagstr}`)
print(`=== ${file}${envstr}`)
// the fixed --stack-size is necessary to keep output of stack overflowing
// tests stable; their stack traces are somewhat arbitrary otherwise
const args = [argv0, "--stack-size", `${2048 * 1024}`,
"-I", "mjsunit.js", "-I", tweak, file]
const args = [argv0,
"--stack-size", `${flags["--stack-size"]*1024}`,
"-I", "mjsunit.js",
"-I", tweak,
file]
const opts = {block:true, cwd:dir, env:env, usePath:false}
os.exec(args, opts)
}

25
v8.txt
View file

@ -452,20 +452,16 @@ Object <Error(SyntaxError: invalid assignment left-hand side)> is not an instanc
Object <Error(SyntaxError: invalid increment/decrement operand)> is not an instance of <ReferenceError> but of <SyntaxError>
=== invalid-source-element.js
=== json-errors.js
Failure: expected <"Unexpected end of JSON input"> found <"unexpected end of string">
Failure: expected <"Unexpected end of JSON input"> found <"unexpected end of string">
Failure: expected <"Unexpected end of JSON input"> found <"unexpected end of string">
Failure: expected <"Unexpected end of JSON input"> found <"unexpected end of string">
Failure:
expected:
"Unexpected token \n in JSON at position 3"
found:
"invalid character in a JSON string"
"Bad control character in string literal in JSON"
Failure:
expected:
"Unexpected token \n in JSON at position 3"
found:
"invalid character in a JSON string"
"Bad control character in string literal in JSON"
=== json-parser-recursive.js
=== json-replacer-number-wrapper-tostring.js
=== json-replacer-order.js
@ -475,23 +471,6 @@ Did not throw exception
Did not throw exception
=== json-stringify-stack.js
=== json.js
Did not throw exception
Did not throw exception
Did not throw exception
Did not throw exception
Did not throw exception
Did not throw exception
Did not throw exception
Failure:
expected:
"[37,null,1,\"foo\",\"37\",\"true\",null,\"has toJSON\",{},\"has toJSON\"]"
found:
"[37,NaN,1,\"foo\",\"37\",
Failure:
expected:
"[37,null,1,\"foo\",\"37\",\"true\",null,\"has toJSON\",{},\"has toJSON\"]"
found:
"[37,NaN,1,\"foo\",\"37\",
=== keyed-array-call.js
=== keyed-call-generic.js
=== keyed-call-ic.js