Improve JSON parser conformity (#303)

- add JSON specific parsers for strings and numbers
- update JSON parse error messages
- fix `JSON.stringify` handling of boxed objects
- parse Flags in v8 mjsunit test files
- update v8.txt
This commit is contained in:
Charlie Gordon 2024-03-14 08:19:11 +01:00 committed by GitHub
parent 761ad7856f
commit 45f8dc247c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 150 additions and 66 deletions

159
quickjs.c
View file

@ -18645,11 +18645,6 @@ static __exception int js_parse_string(JSParseState *s, int sep,
goto invalid_char; goto invalid_char;
c = *p; c = *p;
if (c < 0x20) { if (c < 0x20) {
if (!s->cur_func) {
if (do_throw)
js_parse_error(s, "invalid character in a JSON string");
goto fail;
}
if (sep == '`') { if (sep == '`') {
if (c == '\r') { if (c == '\r') {
if (p[1] == '\n') if (p[1] == '\n')
@ -18699,9 +18694,7 @@ static __exception int js_parse_string(JSParseState *s, int sep,
continue; continue;
default: default:
if (c >= '0' && c <= '9') { if (c >= '0' && c <= '9') {
if (!s->cur_func) if (s->cur_func && !(s->cur_func->js_mode & JS_MODE_STRICT) && sep != '`')
goto invalid_escape; /* JSON case */
if (!(s->cur_func->js_mode & JS_MODE_STRICT) && sep != '`')
goto parse_escape; goto parse_escape;
if (c == '0' && !(p[1] >= '0' && p[1] <= '9')) { if (c == '0' && !(p[1] >= '0' && p[1] <= '9')) {
p++; p++;
@ -18711,10 +18704,9 @@ static __exception int js_parse_string(JSParseState *s, int sep,
/* Note: according to ES2021, \8 and \9 are not /* Note: according to ES2021, \8 and \9 are not
accepted in strict mode or in templates. */ accepted in strict mode or in templates. */
goto invalid_escape; goto invalid_escape;
} else {
if (do_throw)
js_parse_error(s, "octal escape sequences are not allowed in strict mode");
} }
if (do_throw)
js_parse_error(s, "octal escape sequences are not allowed in strict mode");
goto fail; goto fail;
} }
} else if (c >= 0x80) { } else if (c >= 0x80) {
@ -18731,10 +18723,7 @@ static __exception int js_parse_string(JSParseState *s, int sep,
parse_escape: parse_escape:
ret = lre_parse_escape(&p, TRUE); ret = lre_parse_escape(&p, TRUE);
if (ret == -1) { if (ret == -1) {
invalid_escape: goto invalid_escape;
if (do_throw)
js_parse_error(s, "malformed escape sequence in string literal");
goto fail;
} else if (ret < 0) { } else if (ret < 0) {
/* ignore the '\' (could output a warning) */ /* ignore the '\' (could output a warning) */
p++; p++;
@ -18764,6 +18753,10 @@ static __exception int js_parse_string(JSParseState *s, int sep,
if (do_throw) if (do_throw)
js_parse_error(s, "invalid UTF-8 sequence"); js_parse_error(s, "invalid UTF-8 sequence");
goto fail; goto fail;
invalid_escape:
if (do_throw)
js_parse_error(s, "malformed escape sequence in string literal");
goto fail;
invalid_char: invalid_char:
if (do_throw) if (do_throw)
js_parse_error(s, "unexpected end of string"); js_parse_error(s, "unexpected end of string");
@ -19439,6 +19432,107 @@ static __exception int next_token(JSParseState *s)
return -1; return -1;
} }
static int json_parse_string(JSParseState *s, const uint8_t **pp)
{
const uint8_t *p = *pp;
int ret, i;
uint32_t c;
StringBuffer b_s, *b = &b_s;
if (string_buffer_init(s->ctx, b, 32))
goto fail;
for(;;) {
if (p >= s->buf_end) {
js_parse_error(s, "Unexpected end of JSON input");
goto fail;
}
c = *p++;
if (c == '"')
break;
if (c < 0x20) {
js_parse_error(s, "Bad control character in string literal in JSON");
goto fail;
}
if (c == '\\') {
c = *p++;
switch(c) {
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
case '\"': break;
case '\\': break;
case '/': break; /* for v8 compatibility */
case 'u':
c = 0;
for(i = 0; i < 4; i++) {
int h = from_hex(*p++);
if (h < 0)
goto invalid_escape;
c = (c << 4) | h;
}
break;
default:
invalid_escape:
js_parse_error(s, "Bad escaped character in JSON");
goto fail;
}
}
if (string_buffer_putc(b, c))
goto fail;
}
s->token.val = TOK_STRING;
s->token.u.str.sep = '"';
s->token.u.str.str = string_buffer_end(b);
*pp = p;
return 0;
fail:
string_buffer_free(b);
return -1;
}
static int json_parse_number(JSParseState *s, const uint8_t **pp)
{
const uint8_t *p = *pp;
const uint8_t *p_start = p;
if (*p == '+' || *p == '-')
p++;
if (!is_digit(*p))
return js_parse_error(s, "Unexpected token '%c'", *p_start);
if (p[0] == '0' && is_digit(p[1]))
return js_parse_error(s, "Unexpected number in JSON");
while (is_digit(*p))
p++;
if (*p == '.') {
p++;
if (!is_digit(*p))
return js_parse_error(s, "Unterminated fractional number in JSON");
while (is_digit(*p))
p++;
}
if (*p == 'e' || *p == 'E') {
p++;
if (*p == '+' || *p == '-')
p++;
if (!is_digit(*p))
return js_parse_error(s, "Exponent part is missing a number in JSON");
while (is_digit(*p))
p++;
}
s->token.val = TOK_NUMBER;
s->token.u.num.val = js_float64(strtod((const char *)p_start, NULL));
*pp = p;
return 0;
}
/* 'c' is the first character. Return JS_ATOM_NULL in case of error */ /* 'c' is the first character. Return JS_ATOM_NULL in case of error */
static JSAtom json_parse_ident(JSParseState *s, const uint8_t **pp, int c) static JSAtom json_parse_ident(JSParseState *s, const uint8_t **pp, int c)
{ {
@ -19506,7 +19600,8 @@ static __exception int json_next_token(JSParseState *s)
/* JSON does not accept single quoted strings */ /* JSON does not accept single quoted strings */
goto def_token; goto def_token;
case '\"': case '\"':
if (js_parse_string(s, c, TRUE, p + 1, &s->token, &p)) p++;
if (json_parse_string(s, &p))
goto fail; goto fail;
break; break;
case '\r': /* accept DOS and MAC newline sequences */ case '\r': /* accept DOS and MAC newline sequences */
@ -19574,13 +19669,8 @@ static __exception int json_next_token(JSParseState *s)
case '9': case '9':
/* number */ /* number */
parse_number: parse_number:
{ if (json_parse_number(s, &p))
JSValue ret = js_atof(s->ctx, (const char *)p, (const char **)&p, 10, 0); goto fail;
if (JS_IsException(ret))
goto fail;
s->token.val = TOK_NUMBER;
s->token.u.num.val = ret;
}
break; break;
default: default:
if (c >= 128) { if (c >= 128) {
@ -42719,30 +42809,22 @@ static int js_json_to_str(JSContext *ctx, JSONStringifyContext *jsc,
tab = JS_UNDEFINED; tab = JS_UNDEFINED;
prop = JS_UNDEFINED; prop = JS_UNDEFINED;
switch (JS_VALUE_GET_NORM_TAG(val)) { if (JS_IsObject(val)) {
case JS_TAG_OBJECT:
p = JS_VALUE_GET_OBJ(val); p = JS_VALUE_GET_OBJ(val);
cl = p->class_id; cl = p->class_id;
if (cl == JS_CLASS_STRING) { if (cl == JS_CLASS_STRING) {
val = JS_ToStringFree(ctx, val); val = JS_ToStringFree(ctx, val);
if (JS_IsException(val)) if (JS_IsException(val))
goto exception; goto exception;
val = JS_ToQuotedStringFree(ctx, val); goto concat_primitive;
if (JS_IsException(val))
goto exception;
return string_buffer_concat_value_free(jsc->b, val);
} else if (cl == JS_CLASS_NUMBER) { } else if (cl == JS_CLASS_NUMBER) {
val = JS_ToNumberFree(ctx, val); val = JS_ToNumberFree(ctx, val);
if (JS_IsException(val)) if (JS_IsException(val))
goto exception; goto exception;
return string_buffer_concat_value_free(jsc->b, val); goto concat_primitive;
} else if (cl == JS_CLASS_BOOLEAN) { } else if (cl == JS_CLASS_BOOLEAN || cl == JS_CLASS_BIG_INT) {
ret = string_buffer_concat_value(jsc->b, p->u.object_data); set_value(ctx, &val, js_dup(p->u.object_data));
JS_FreeValue(ctx, val); goto concat_primitive;
return ret;
} else if (cl == JS_CLASS_BIG_INT) {
JS_ThrowTypeError(ctx, "BigInt are forbidden in JSON.stringify");
goto exception;
} }
v = js_array_includes(ctx, jsc->stack, 1, &val); v = js_array_includes(ctx, jsc->stack, 1, &val);
if (JS_IsException(v)) if (JS_IsException(v))
@ -42855,6 +42937,9 @@ static int js_json_to_str(JSContext *ctx, JSONStringifyContext *jsc,
JS_FreeValue(ctx, indent1); JS_FreeValue(ctx, indent1);
JS_FreeValue(ctx, prop); JS_FreeValue(ctx, prop);
return 0; return 0;
}
concat_primitive:
switch (JS_VALUE_GET_NORM_TAG(val)) {
case JS_TAG_STRING: case JS_TAG_STRING:
val = JS_ToQuotedStringFree(ctx, val); val = JS_ToQuotedStringFree(ctx, val);
if (JS_IsException(val)) if (JS_IsException(val))

32
v8.js
View file

@ -39,10 +39,28 @@ for (const file of files) {
if (source.includes('Realm.create()')) continue // TODO support Realm object if (source.includes('Realm.create()')) continue // TODO support Realm object
if (source.includes('// MODULE')) continue // TODO support modules if (source.includes('// MODULE')) continue // TODO support modules
if (source.includes('// Files:')) continue // TODO support includes if (source.includes('// Files:')) continue // TODO support includes
// the default --stack-size is necessary to keep output of stack overflowing
// tests stable; it will be overridden by a Flags comment
let flags = { '--stack-size': 2048 }, flagstr = ""
// parse command line flags
for (let s of source.matchAll(/\/\/ Flags:(.+)/g)) {
for (let m of s[1].matchAll(/\s*([\S]+)/g)) {
const v = m[1].match(/([\S]+)=([\S]+)/)
if (v) {
flags[v[1]] = v[2]
flagstr += ` ${v[1]}=${v[2]}`
} else {
flags[m[1]] = true
flagstr += ` ${m[1]}`
}
}
}
// exclude tests that use V8 intrinsics like %OptimizeFunctionOnNextCall // exclude tests that use V8 intrinsics like %OptimizeFunctionOnNextCall
if (source.includes ("--allow-natives-syntax")) continue if (flags["--allow-natives-syntax"]) continue
// exclude tests that use V8 extensions // exclude tests that use V8 extensions
if (source.includes ("--expose-externalize-string")) continue if (flags["--expose-externalize-string"]) continue
// parse environment variables
let env = {}, envstr = "" let env = {}, envstr = ""
for (let s of source.matchAll(/environment variables:(.+)/ig)) { for (let s of source.matchAll(/environment variables:(.+)/ig)) {
for (let m of s[1].matchAll(/\s*([\S]+)=([\S]+)/g)) { for (let m of s[1].matchAll(/\s*([\S]+)=([\S]+)/g)) {
@ -50,11 +68,13 @@ for (const file of files) {
envstr += ` ${m[1]}=${m[2]}` envstr += ` ${m[1]}=${m[2]}`
} }
} }
//print(`=== ${file}${envstr}${flagstr}`)
print(`=== ${file}${envstr}`) print(`=== ${file}${envstr}`)
// the fixed --stack-size is necessary to keep output of stack overflowing const args = [argv0,
// tests stable; their stack traces are somewhat arbitrary otherwise "--stack-size", `${flags["--stack-size"]*1024}`,
const args = [argv0, "--stack-size", `${2048 * 1024}`, "-I", "mjsunit.js",
"-I", "mjsunit.js", "-I", tweak, file] "-I", tweak,
file]
const opts = {block:true, cwd:dir, env:env, usePath:false} const opts = {block:true, cwd:dir, env:env, usePath:false}
os.exec(args, opts) os.exec(args, opts)
} }

25
v8.txt
View file

@ -452,20 +452,16 @@ Object <Error(SyntaxError: invalid assignment left-hand side)> is not an instanc
Object <Error(SyntaxError: invalid increment/decrement operand)> is not an instance of <ReferenceError> but of <SyntaxError> Object <Error(SyntaxError: invalid increment/decrement operand)> is not an instance of <ReferenceError> but of <SyntaxError>
=== invalid-source-element.js === invalid-source-element.js
=== json-errors.js === json-errors.js
Failure: expected <"Unexpected end of JSON input"> found <"unexpected end of string">
Failure: expected <"Unexpected end of JSON input"> found <"unexpected end of string">
Failure: expected <"Unexpected end of JSON input"> found <"unexpected end of string">
Failure: expected <"Unexpected end of JSON input"> found <"unexpected end of string">
Failure: Failure:
expected: expected:
"Unexpected token \n in JSON at position 3" "Unexpected token \n in JSON at position 3"
found: found:
"invalid character in a JSON string" "Bad control character in string literal in JSON"
Failure: Failure:
expected: expected:
"Unexpected token \n in JSON at position 3" "Unexpected token \n in JSON at position 3"
found: found:
"invalid character in a JSON string" "Bad control character in string literal in JSON"
=== json-parser-recursive.js === json-parser-recursive.js
=== json-replacer-number-wrapper-tostring.js === json-replacer-number-wrapper-tostring.js
=== json-replacer-order.js === json-replacer-order.js
@ -475,23 +471,6 @@ Did not throw exception
Did not throw exception Did not throw exception
=== json-stringify-stack.js === json-stringify-stack.js
=== json.js === json.js
Did not throw exception
Did not throw exception
Did not throw exception
Did not throw exception
Did not throw exception
Did not throw exception
Did not throw exception
Failure:
expected:
"[37,null,1,\"foo\",\"37\",\"true\",null,\"has toJSON\",{},\"has toJSON\"]"
found:
"[37,NaN,1,\"foo\",\"37\",
Failure:
expected:
"[37,null,1,\"foo\",\"37\",\"true\",null,\"has toJSON\",{},\"has toJSON\"]"
found:
"[37,NaN,1,\"foo\",\"37\",
=== keyed-array-call.js === keyed-array-call.js
=== keyed-call-generic.js === keyed-call-generic.js
=== keyed-call-ic.js === keyed-call-ic.js