Improve JSON parser conformity (#303)
- add JSON specific parsers for strings and numbers - update JSON parse error messages - fix `JSON.stringify` handling of boxed objects - parse Flags in v8 mjsunit test files - update v8.txt
This commit is contained in:
parent
761ad7856f
commit
45f8dc247c
3 changed files with 150 additions and 66 deletions
155
quickjs.c
155
quickjs.c
|
@ -18645,11 +18645,6 @@ static __exception int js_parse_string(JSParseState *s, int sep,
|
||||||
goto invalid_char;
|
goto invalid_char;
|
||||||
c = *p;
|
c = *p;
|
||||||
if (c < 0x20) {
|
if (c < 0x20) {
|
||||||
if (!s->cur_func) {
|
|
||||||
if (do_throw)
|
|
||||||
js_parse_error(s, "invalid character in a JSON string");
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
if (sep == '`') {
|
if (sep == '`') {
|
||||||
if (c == '\r') {
|
if (c == '\r') {
|
||||||
if (p[1] == '\n')
|
if (p[1] == '\n')
|
||||||
|
@ -18699,9 +18694,7 @@ static __exception int js_parse_string(JSParseState *s, int sep,
|
||||||
continue;
|
continue;
|
||||||
default:
|
default:
|
||||||
if (c >= '0' && c <= '9') {
|
if (c >= '0' && c <= '9') {
|
||||||
if (!s->cur_func)
|
if (s->cur_func && !(s->cur_func->js_mode & JS_MODE_STRICT) && sep != '`')
|
||||||
goto invalid_escape; /* JSON case */
|
|
||||||
if (!(s->cur_func->js_mode & JS_MODE_STRICT) && sep != '`')
|
|
||||||
goto parse_escape;
|
goto parse_escape;
|
||||||
if (c == '0' && !(p[1] >= '0' && p[1] <= '9')) {
|
if (c == '0' && !(p[1] >= '0' && p[1] <= '9')) {
|
||||||
p++;
|
p++;
|
||||||
|
@ -18711,10 +18704,9 @@ static __exception int js_parse_string(JSParseState *s, int sep,
|
||||||
/* Note: according to ES2021, \8 and \9 are not
|
/* Note: according to ES2021, \8 and \9 are not
|
||||||
accepted in strict mode or in templates. */
|
accepted in strict mode or in templates. */
|
||||||
goto invalid_escape;
|
goto invalid_escape;
|
||||||
} else {
|
}
|
||||||
if (do_throw)
|
if (do_throw)
|
||||||
js_parse_error(s, "octal escape sequences are not allowed in strict mode");
|
js_parse_error(s, "octal escape sequences are not allowed in strict mode");
|
||||||
}
|
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
} else if (c >= 0x80) {
|
} else if (c >= 0x80) {
|
||||||
|
@ -18731,10 +18723,7 @@ static __exception int js_parse_string(JSParseState *s, int sep,
|
||||||
parse_escape:
|
parse_escape:
|
||||||
ret = lre_parse_escape(&p, TRUE);
|
ret = lre_parse_escape(&p, TRUE);
|
||||||
if (ret == -1) {
|
if (ret == -1) {
|
||||||
invalid_escape:
|
goto invalid_escape;
|
||||||
if (do_throw)
|
|
||||||
js_parse_error(s, "malformed escape sequence in string literal");
|
|
||||||
goto fail;
|
|
||||||
} else if (ret < 0) {
|
} else if (ret < 0) {
|
||||||
/* ignore the '\' (could output a warning) */
|
/* ignore the '\' (could output a warning) */
|
||||||
p++;
|
p++;
|
||||||
|
@ -18764,6 +18753,10 @@ static __exception int js_parse_string(JSParseState *s, int sep,
|
||||||
if (do_throw)
|
if (do_throw)
|
||||||
js_parse_error(s, "invalid UTF-8 sequence");
|
js_parse_error(s, "invalid UTF-8 sequence");
|
||||||
goto fail;
|
goto fail;
|
||||||
|
invalid_escape:
|
||||||
|
if (do_throw)
|
||||||
|
js_parse_error(s, "malformed escape sequence in string literal");
|
||||||
|
goto fail;
|
||||||
invalid_char:
|
invalid_char:
|
||||||
if (do_throw)
|
if (do_throw)
|
||||||
js_parse_error(s, "unexpected end of string");
|
js_parse_error(s, "unexpected end of string");
|
||||||
|
@ -19439,6 +19432,107 @@ static __exception int next_token(JSParseState *s)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int json_parse_string(JSParseState *s, const uint8_t **pp)
|
||||||
|
{
|
||||||
|
const uint8_t *p = *pp;
|
||||||
|
int ret, i;
|
||||||
|
uint32_t c;
|
||||||
|
StringBuffer b_s, *b = &b_s;
|
||||||
|
|
||||||
|
if (string_buffer_init(s->ctx, b, 32))
|
||||||
|
goto fail;
|
||||||
|
|
||||||
|
for(;;) {
|
||||||
|
if (p >= s->buf_end) {
|
||||||
|
js_parse_error(s, "Unexpected end of JSON input");
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
c = *p++;
|
||||||
|
if (c == '"')
|
||||||
|
break;
|
||||||
|
if (c < 0x20) {
|
||||||
|
js_parse_error(s, "Bad control character in string literal in JSON");
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
if (c == '\\') {
|
||||||
|
c = *p++;
|
||||||
|
switch(c) {
|
||||||
|
case 'b': c = '\b'; break;
|
||||||
|
case 'f': c = '\f'; break;
|
||||||
|
case 'n': c = '\n'; break;
|
||||||
|
case 'r': c = '\r'; break;
|
||||||
|
case 't': c = '\t'; break;
|
||||||
|
case '\"': break;
|
||||||
|
case '\\': break;
|
||||||
|
case '/': break; /* for v8 compatibility */
|
||||||
|
case 'u':
|
||||||
|
c = 0;
|
||||||
|
for(i = 0; i < 4; i++) {
|
||||||
|
int h = from_hex(*p++);
|
||||||
|
if (h < 0)
|
||||||
|
goto invalid_escape;
|
||||||
|
c = (c << 4) | h;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
invalid_escape:
|
||||||
|
js_parse_error(s, "Bad escaped character in JSON");
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (string_buffer_putc(b, c))
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
s->token.val = TOK_STRING;
|
||||||
|
s->token.u.str.sep = '"';
|
||||||
|
s->token.u.str.str = string_buffer_end(b);
|
||||||
|
*pp = p;
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
fail:
|
||||||
|
string_buffer_free(b);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int json_parse_number(JSParseState *s, const uint8_t **pp)
|
||||||
|
{
|
||||||
|
const uint8_t *p = *pp;
|
||||||
|
const uint8_t *p_start = p;
|
||||||
|
|
||||||
|
if (*p == '+' || *p == '-')
|
||||||
|
p++;
|
||||||
|
|
||||||
|
if (!is_digit(*p))
|
||||||
|
return js_parse_error(s, "Unexpected token '%c'", *p_start);
|
||||||
|
|
||||||
|
if (p[0] == '0' && is_digit(p[1]))
|
||||||
|
return js_parse_error(s, "Unexpected number in JSON");
|
||||||
|
|
||||||
|
while (is_digit(*p))
|
||||||
|
p++;
|
||||||
|
|
||||||
|
if (*p == '.') {
|
||||||
|
p++;
|
||||||
|
if (!is_digit(*p))
|
||||||
|
return js_parse_error(s, "Unterminated fractional number in JSON");
|
||||||
|
while (is_digit(*p))
|
||||||
|
p++;
|
||||||
|
}
|
||||||
|
if (*p == 'e' || *p == 'E') {
|
||||||
|
p++;
|
||||||
|
if (*p == '+' || *p == '-')
|
||||||
|
p++;
|
||||||
|
if (!is_digit(*p))
|
||||||
|
return js_parse_error(s, "Exponent part is missing a number in JSON");
|
||||||
|
while (is_digit(*p))
|
||||||
|
p++;
|
||||||
|
}
|
||||||
|
s->token.val = TOK_NUMBER;
|
||||||
|
s->token.u.num.val = js_float64(strtod((const char *)p_start, NULL));
|
||||||
|
*pp = p;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* 'c' is the first character. Return JS_ATOM_NULL in case of error */
|
/* 'c' is the first character. Return JS_ATOM_NULL in case of error */
|
||||||
static JSAtom json_parse_ident(JSParseState *s, const uint8_t **pp, int c)
|
static JSAtom json_parse_ident(JSParseState *s, const uint8_t **pp, int c)
|
||||||
{
|
{
|
||||||
|
@ -19506,7 +19600,8 @@ static __exception int json_next_token(JSParseState *s)
|
||||||
/* JSON does not accept single quoted strings */
|
/* JSON does not accept single quoted strings */
|
||||||
goto def_token;
|
goto def_token;
|
||||||
case '\"':
|
case '\"':
|
||||||
if (js_parse_string(s, c, TRUE, p + 1, &s->token, &p))
|
p++;
|
||||||
|
if (json_parse_string(s, &p))
|
||||||
goto fail;
|
goto fail;
|
||||||
break;
|
break;
|
||||||
case '\r': /* accept DOS and MAC newline sequences */
|
case '\r': /* accept DOS and MAC newline sequences */
|
||||||
|
@ -19574,13 +19669,8 @@ static __exception int json_next_token(JSParseState *s)
|
||||||
case '9':
|
case '9':
|
||||||
/* number */
|
/* number */
|
||||||
parse_number:
|
parse_number:
|
||||||
{
|
if (json_parse_number(s, &p))
|
||||||
JSValue ret = js_atof(s->ctx, (const char *)p, (const char **)&p, 10, 0);
|
|
||||||
if (JS_IsException(ret))
|
|
||||||
goto fail;
|
goto fail;
|
||||||
s->token.val = TOK_NUMBER;
|
|
||||||
s->token.u.num.val = ret;
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
if (c >= 128) {
|
if (c >= 128) {
|
||||||
|
@ -42719,30 +42809,22 @@ static int js_json_to_str(JSContext *ctx, JSONStringifyContext *jsc,
|
||||||
tab = JS_UNDEFINED;
|
tab = JS_UNDEFINED;
|
||||||
prop = JS_UNDEFINED;
|
prop = JS_UNDEFINED;
|
||||||
|
|
||||||
switch (JS_VALUE_GET_NORM_TAG(val)) {
|
if (JS_IsObject(val)) {
|
||||||
case JS_TAG_OBJECT:
|
|
||||||
p = JS_VALUE_GET_OBJ(val);
|
p = JS_VALUE_GET_OBJ(val);
|
||||||
cl = p->class_id;
|
cl = p->class_id;
|
||||||
if (cl == JS_CLASS_STRING) {
|
if (cl == JS_CLASS_STRING) {
|
||||||
val = JS_ToStringFree(ctx, val);
|
val = JS_ToStringFree(ctx, val);
|
||||||
if (JS_IsException(val))
|
if (JS_IsException(val))
|
||||||
goto exception;
|
goto exception;
|
||||||
val = JS_ToQuotedStringFree(ctx, val);
|
goto concat_primitive;
|
||||||
if (JS_IsException(val))
|
|
||||||
goto exception;
|
|
||||||
return string_buffer_concat_value_free(jsc->b, val);
|
|
||||||
} else if (cl == JS_CLASS_NUMBER) {
|
} else if (cl == JS_CLASS_NUMBER) {
|
||||||
val = JS_ToNumberFree(ctx, val);
|
val = JS_ToNumberFree(ctx, val);
|
||||||
if (JS_IsException(val))
|
if (JS_IsException(val))
|
||||||
goto exception;
|
goto exception;
|
||||||
return string_buffer_concat_value_free(jsc->b, val);
|
goto concat_primitive;
|
||||||
} else if (cl == JS_CLASS_BOOLEAN) {
|
} else if (cl == JS_CLASS_BOOLEAN || cl == JS_CLASS_BIG_INT) {
|
||||||
ret = string_buffer_concat_value(jsc->b, p->u.object_data);
|
set_value(ctx, &val, js_dup(p->u.object_data));
|
||||||
JS_FreeValue(ctx, val);
|
goto concat_primitive;
|
||||||
return ret;
|
|
||||||
} else if (cl == JS_CLASS_BIG_INT) {
|
|
||||||
JS_ThrowTypeError(ctx, "BigInt are forbidden in JSON.stringify");
|
|
||||||
goto exception;
|
|
||||||
}
|
}
|
||||||
v = js_array_includes(ctx, jsc->stack, 1, &val);
|
v = js_array_includes(ctx, jsc->stack, 1, &val);
|
||||||
if (JS_IsException(v))
|
if (JS_IsException(v))
|
||||||
|
@ -42855,6 +42937,9 @@ static int js_json_to_str(JSContext *ctx, JSONStringifyContext *jsc,
|
||||||
JS_FreeValue(ctx, indent1);
|
JS_FreeValue(ctx, indent1);
|
||||||
JS_FreeValue(ctx, prop);
|
JS_FreeValue(ctx, prop);
|
||||||
return 0;
|
return 0;
|
||||||
|
}
|
||||||
|
concat_primitive:
|
||||||
|
switch (JS_VALUE_GET_NORM_TAG(val)) {
|
||||||
case JS_TAG_STRING:
|
case JS_TAG_STRING:
|
||||||
val = JS_ToQuotedStringFree(ctx, val);
|
val = JS_ToQuotedStringFree(ctx, val);
|
||||||
if (JS_IsException(val))
|
if (JS_IsException(val))
|
||||||
|
|
32
v8.js
32
v8.js
|
@ -39,10 +39,28 @@ for (const file of files) {
|
||||||
if (source.includes('Realm.create()')) continue // TODO support Realm object
|
if (source.includes('Realm.create()')) continue // TODO support Realm object
|
||||||
if (source.includes('// MODULE')) continue // TODO support modules
|
if (source.includes('// MODULE')) continue // TODO support modules
|
||||||
if (source.includes('// Files:')) continue // TODO support includes
|
if (source.includes('// Files:')) continue // TODO support includes
|
||||||
|
|
||||||
|
// the default --stack-size is necessary to keep output of stack overflowing
|
||||||
|
// tests stable; it will be overridden by a Flags comment
|
||||||
|
let flags = { '--stack-size': 2048 }, flagstr = ""
|
||||||
|
// parse command line flags
|
||||||
|
for (let s of source.matchAll(/\/\/ Flags:(.+)/g)) {
|
||||||
|
for (let m of s[1].matchAll(/\s*([\S]+)/g)) {
|
||||||
|
const v = m[1].match(/([\S]+)=([\S]+)/)
|
||||||
|
if (v) {
|
||||||
|
flags[v[1]] = v[2]
|
||||||
|
flagstr += ` ${v[1]}=${v[2]}`
|
||||||
|
} else {
|
||||||
|
flags[m[1]] = true
|
||||||
|
flagstr += ` ${m[1]}`
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
// exclude tests that use V8 intrinsics like %OptimizeFunctionOnNextCall
|
// exclude tests that use V8 intrinsics like %OptimizeFunctionOnNextCall
|
||||||
if (source.includes ("--allow-natives-syntax")) continue
|
if (flags["--allow-natives-syntax"]) continue
|
||||||
// exclude tests that use V8 extensions
|
// exclude tests that use V8 extensions
|
||||||
if (source.includes ("--expose-externalize-string")) continue
|
if (flags["--expose-externalize-string"]) continue
|
||||||
|
// parse environment variables
|
||||||
let env = {}, envstr = ""
|
let env = {}, envstr = ""
|
||||||
for (let s of source.matchAll(/environment variables:(.+)/ig)) {
|
for (let s of source.matchAll(/environment variables:(.+)/ig)) {
|
||||||
for (let m of s[1].matchAll(/\s*([\S]+)=([\S]+)/g)) {
|
for (let m of s[1].matchAll(/\s*([\S]+)=([\S]+)/g)) {
|
||||||
|
@ -50,11 +68,13 @@ for (const file of files) {
|
||||||
envstr += ` ${m[1]}=${m[2]}`
|
envstr += ` ${m[1]}=${m[2]}`
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
//print(`=== ${file}${envstr}${flagstr}`)
|
||||||
print(`=== ${file}${envstr}`)
|
print(`=== ${file}${envstr}`)
|
||||||
// the fixed --stack-size is necessary to keep output of stack overflowing
|
const args = [argv0,
|
||||||
// tests stable; their stack traces are somewhat arbitrary otherwise
|
"--stack-size", `${flags["--stack-size"]*1024}`,
|
||||||
const args = [argv0, "--stack-size", `${2048 * 1024}`,
|
"-I", "mjsunit.js",
|
||||||
"-I", "mjsunit.js", "-I", tweak, file]
|
"-I", tweak,
|
||||||
|
file]
|
||||||
const opts = {block:true, cwd:dir, env:env, usePath:false}
|
const opts = {block:true, cwd:dir, env:env, usePath:false}
|
||||||
os.exec(args, opts)
|
os.exec(args, opts)
|
||||||
}
|
}
|
||||||
|
|
25
v8.txt
25
v8.txt
|
@ -452,20 +452,16 @@ Object <Error(SyntaxError: invalid assignment left-hand side)> is not an instanc
|
||||||
Object <Error(SyntaxError: invalid increment/decrement operand)> is not an instance of <ReferenceError> but of <SyntaxError>
|
Object <Error(SyntaxError: invalid increment/decrement operand)> is not an instance of <ReferenceError> but of <SyntaxError>
|
||||||
=== invalid-source-element.js
|
=== invalid-source-element.js
|
||||||
=== json-errors.js
|
=== json-errors.js
|
||||||
Failure: expected <"Unexpected end of JSON input"> found <"unexpected end of string">
|
|
||||||
Failure: expected <"Unexpected end of JSON input"> found <"unexpected end of string">
|
|
||||||
Failure: expected <"Unexpected end of JSON input"> found <"unexpected end of string">
|
|
||||||
Failure: expected <"Unexpected end of JSON input"> found <"unexpected end of string">
|
|
||||||
Failure:
|
Failure:
|
||||||
expected:
|
expected:
|
||||||
"Unexpected token \n in JSON at position 3"
|
"Unexpected token \n in JSON at position 3"
|
||||||
found:
|
found:
|
||||||
"invalid character in a JSON string"
|
"Bad control character in string literal in JSON"
|
||||||
Failure:
|
Failure:
|
||||||
expected:
|
expected:
|
||||||
"Unexpected token \n in JSON at position 3"
|
"Unexpected token \n in JSON at position 3"
|
||||||
found:
|
found:
|
||||||
"invalid character in a JSON string"
|
"Bad control character in string literal in JSON"
|
||||||
=== json-parser-recursive.js
|
=== json-parser-recursive.js
|
||||||
=== json-replacer-number-wrapper-tostring.js
|
=== json-replacer-number-wrapper-tostring.js
|
||||||
=== json-replacer-order.js
|
=== json-replacer-order.js
|
||||||
|
@ -475,23 +471,6 @@ Did not throw exception
|
||||||
Did not throw exception
|
Did not throw exception
|
||||||
=== json-stringify-stack.js
|
=== json-stringify-stack.js
|
||||||
=== json.js
|
=== json.js
|
||||||
Did not throw exception
|
|
||||||
Did not throw exception
|
|
||||||
Did not throw exception
|
|
||||||
Did not throw exception
|
|
||||||
Did not throw exception
|
|
||||||
Did not throw exception
|
|
||||||
Did not throw exception
|
|
||||||
Failure:
|
|
||||||
expected:
|
|
||||||
"[37,null,1,\"foo\",\"37\",\"true\",null,\"has toJSON\",{},\"has toJSON\"]"
|
|
||||||
found:
|
|
||||||
"[37,NaN,1,\"foo\",\"37\",
|
|
||||||
Failure:
|
|
||||||
expected:
|
|
||||||
"[37,null,1,\"foo\",\"37\",\"true\",null,\"has toJSON\",{},\"has toJSON\"]"
|
|
||||||
found:
|
|
||||||
"[37,NaN,1,\"foo\",\"37\",
|
|
||||||
=== keyed-array-call.js
|
=== keyed-array-call.js
|
||||||
=== keyed-call-generic.js
|
=== keyed-call-generic.js
|
||||||
=== keyed-call-ic.js
|
=== keyed-call-ic.js
|
||||||
|
|
Loading…
Reference in a new issue