Improve JSON parser conformity (#303)
- add JSON specific parsers for strings and numbers - update JSON parse error messages - fix `JSON.stringify` handling of boxed objects - parse Flags in v8 mjsunit test files - update v8.txt
This commit is contained in:
parent
761ad7856f
commit
45f8dc247c
3 changed files with 150 additions and 66 deletions
155
quickjs.c
155
quickjs.c
|
@ -18645,11 +18645,6 @@ static __exception int js_parse_string(JSParseState *s, int sep,
|
|||
goto invalid_char;
|
||||
c = *p;
|
||||
if (c < 0x20) {
|
||||
if (!s->cur_func) {
|
||||
if (do_throw)
|
||||
js_parse_error(s, "invalid character in a JSON string");
|
||||
goto fail;
|
||||
}
|
||||
if (sep == '`') {
|
||||
if (c == '\r') {
|
||||
if (p[1] == '\n')
|
||||
|
@ -18699,9 +18694,7 @@ static __exception int js_parse_string(JSParseState *s, int sep,
|
|||
continue;
|
||||
default:
|
||||
if (c >= '0' && c <= '9') {
|
||||
if (!s->cur_func)
|
||||
goto invalid_escape; /* JSON case */
|
||||
if (!(s->cur_func->js_mode & JS_MODE_STRICT) && sep != '`')
|
||||
if (s->cur_func && !(s->cur_func->js_mode & JS_MODE_STRICT) && sep != '`')
|
||||
goto parse_escape;
|
||||
if (c == '0' && !(p[1] >= '0' && p[1] <= '9')) {
|
||||
p++;
|
||||
|
@ -18711,10 +18704,9 @@ static __exception int js_parse_string(JSParseState *s, int sep,
|
|||
/* Note: according to ES2021, \8 and \9 are not
|
||||
accepted in strict mode or in templates. */
|
||||
goto invalid_escape;
|
||||
} else {
|
||||
}
|
||||
if (do_throw)
|
||||
js_parse_error(s, "octal escape sequences are not allowed in strict mode");
|
||||
}
|
||||
goto fail;
|
||||
}
|
||||
} else if (c >= 0x80) {
|
||||
|
@ -18731,10 +18723,7 @@ static __exception int js_parse_string(JSParseState *s, int sep,
|
|||
parse_escape:
|
||||
ret = lre_parse_escape(&p, TRUE);
|
||||
if (ret == -1) {
|
||||
invalid_escape:
|
||||
if (do_throw)
|
||||
js_parse_error(s, "malformed escape sequence in string literal");
|
||||
goto fail;
|
||||
goto invalid_escape;
|
||||
} else if (ret < 0) {
|
||||
/* ignore the '\' (could output a warning) */
|
||||
p++;
|
||||
|
@ -18764,6 +18753,10 @@ static __exception int js_parse_string(JSParseState *s, int sep,
|
|||
if (do_throw)
|
||||
js_parse_error(s, "invalid UTF-8 sequence");
|
||||
goto fail;
|
||||
invalid_escape:
|
||||
if (do_throw)
|
||||
js_parse_error(s, "malformed escape sequence in string literal");
|
||||
goto fail;
|
||||
invalid_char:
|
||||
if (do_throw)
|
||||
js_parse_error(s, "unexpected end of string");
|
||||
|
@ -19439,6 +19432,107 @@ static __exception int next_token(JSParseState *s)
|
|||
return -1;
|
||||
}
|
||||
|
||||
static int json_parse_string(JSParseState *s, const uint8_t **pp)
|
||||
{
|
||||
const uint8_t *p = *pp;
|
||||
int ret, i;
|
||||
uint32_t c;
|
||||
StringBuffer b_s, *b = &b_s;
|
||||
|
||||
if (string_buffer_init(s->ctx, b, 32))
|
||||
goto fail;
|
||||
|
||||
for(;;) {
|
||||
if (p >= s->buf_end) {
|
||||
js_parse_error(s, "Unexpected end of JSON input");
|
||||
goto fail;
|
||||
}
|
||||
c = *p++;
|
||||
if (c == '"')
|
||||
break;
|
||||
if (c < 0x20) {
|
||||
js_parse_error(s, "Bad control character in string literal in JSON");
|
||||
goto fail;
|
||||
}
|
||||
if (c == '\\') {
|
||||
c = *p++;
|
||||
switch(c) {
|
||||
case 'b': c = '\b'; break;
|
||||
case 'f': c = '\f'; break;
|
||||
case 'n': c = '\n'; break;
|
||||
case 'r': c = '\r'; break;
|
||||
case 't': c = '\t'; break;
|
||||
case '\"': break;
|
||||
case '\\': break;
|
||||
case '/': break; /* for v8 compatibility */
|
||||
case 'u':
|
||||
c = 0;
|
||||
for(i = 0; i < 4; i++) {
|
||||
int h = from_hex(*p++);
|
||||
if (h < 0)
|
||||
goto invalid_escape;
|
||||
c = (c << 4) | h;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
invalid_escape:
|
||||
js_parse_error(s, "Bad escaped character in JSON");
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
if (string_buffer_putc(b, c))
|
||||
goto fail;
|
||||
}
|
||||
s->token.val = TOK_STRING;
|
||||
s->token.u.str.sep = '"';
|
||||
s->token.u.str.str = string_buffer_end(b);
|
||||
*pp = p;
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
string_buffer_free(b);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int json_parse_number(JSParseState *s, const uint8_t **pp)
|
||||
{
|
||||
const uint8_t *p = *pp;
|
||||
const uint8_t *p_start = p;
|
||||
|
||||
if (*p == '+' || *p == '-')
|
||||
p++;
|
||||
|
||||
if (!is_digit(*p))
|
||||
return js_parse_error(s, "Unexpected token '%c'", *p_start);
|
||||
|
||||
if (p[0] == '0' && is_digit(p[1]))
|
||||
return js_parse_error(s, "Unexpected number in JSON");
|
||||
|
||||
while (is_digit(*p))
|
||||
p++;
|
||||
|
||||
if (*p == '.') {
|
||||
p++;
|
||||
if (!is_digit(*p))
|
||||
return js_parse_error(s, "Unterminated fractional number in JSON");
|
||||
while (is_digit(*p))
|
||||
p++;
|
||||
}
|
||||
if (*p == 'e' || *p == 'E') {
|
||||
p++;
|
||||
if (*p == '+' || *p == '-')
|
||||
p++;
|
||||
if (!is_digit(*p))
|
||||
return js_parse_error(s, "Exponent part is missing a number in JSON");
|
||||
while (is_digit(*p))
|
||||
p++;
|
||||
}
|
||||
s->token.val = TOK_NUMBER;
|
||||
s->token.u.num.val = js_float64(strtod((const char *)p_start, NULL));
|
||||
*pp = p;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* 'c' is the first character. Return JS_ATOM_NULL in case of error */
|
||||
static JSAtom json_parse_ident(JSParseState *s, const uint8_t **pp, int c)
|
||||
{
|
||||
|
@ -19506,7 +19600,8 @@ static __exception int json_next_token(JSParseState *s)
|
|||
/* JSON does not accept single quoted strings */
|
||||
goto def_token;
|
||||
case '\"':
|
||||
if (js_parse_string(s, c, TRUE, p + 1, &s->token, &p))
|
||||
p++;
|
||||
if (json_parse_string(s, &p))
|
||||
goto fail;
|
||||
break;
|
||||
case '\r': /* accept DOS and MAC newline sequences */
|
||||
|
@ -19574,13 +19669,8 @@ static __exception int json_next_token(JSParseState *s)
|
|||
case '9':
|
||||
/* number */
|
||||
parse_number:
|
||||
{
|
||||
JSValue ret = js_atof(s->ctx, (const char *)p, (const char **)&p, 10, 0);
|
||||
if (JS_IsException(ret))
|
||||
if (json_parse_number(s, &p))
|
||||
goto fail;
|
||||
s->token.val = TOK_NUMBER;
|
||||
s->token.u.num.val = ret;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if (c >= 128) {
|
||||
|
@ -42719,30 +42809,22 @@ static int js_json_to_str(JSContext *ctx, JSONStringifyContext *jsc,
|
|||
tab = JS_UNDEFINED;
|
||||
prop = JS_UNDEFINED;
|
||||
|
||||
switch (JS_VALUE_GET_NORM_TAG(val)) {
|
||||
case JS_TAG_OBJECT:
|
||||
if (JS_IsObject(val)) {
|
||||
p = JS_VALUE_GET_OBJ(val);
|
||||
cl = p->class_id;
|
||||
if (cl == JS_CLASS_STRING) {
|
||||
val = JS_ToStringFree(ctx, val);
|
||||
if (JS_IsException(val))
|
||||
goto exception;
|
||||
val = JS_ToQuotedStringFree(ctx, val);
|
||||
if (JS_IsException(val))
|
||||
goto exception;
|
||||
return string_buffer_concat_value_free(jsc->b, val);
|
||||
goto concat_primitive;
|
||||
} else if (cl == JS_CLASS_NUMBER) {
|
||||
val = JS_ToNumberFree(ctx, val);
|
||||
if (JS_IsException(val))
|
||||
goto exception;
|
||||
return string_buffer_concat_value_free(jsc->b, val);
|
||||
} else if (cl == JS_CLASS_BOOLEAN) {
|
||||
ret = string_buffer_concat_value(jsc->b, p->u.object_data);
|
||||
JS_FreeValue(ctx, val);
|
||||
return ret;
|
||||
} else if (cl == JS_CLASS_BIG_INT) {
|
||||
JS_ThrowTypeError(ctx, "BigInt are forbidden in JSON.stringify");
|
||||
goto exception;
|
||||
goto concat_primitive;
|
||||
} else if (cl == JS_CLASS_BOOLEAN || cl == JS_CLASS_BIG_INT) {
|
||||
set_value(ctx, &val, js_dup(p->u.object_data));
|
||||
goto concat_primitive;
|
||||
}
|
||||
v = js_array_includes(ctx, jsc->stack, 1, &val);
|
||||
if (JS_IsException(v))
|
||||
|
@ -42855,6 +42937,9 @@ static int js_json_to_str(JSContext *ctx, JSONStringifyContext *jsc,
|
|||
JS_FreeValue(ctx, indent1);
|
||||
JS_FreeValue(ctx, prop);
|
||||
return 0;
|
||||
}
|
||||
concat_primitive:
|
||||
switch (JS_VALUE_GET_NORM_TAG(val)) {
|
||||
case JS_TAG_STRING:
|
||||
val = JS_ToQuotedStringFree(ctx, val);
|
||||
if (JS_IsException(val))
|
||||
|
|
32
v8.js
32
v8.js
|
@ -39,10 +39,28 @@ for (const file of files) {
|
|||
if (source.includes('Realm.create()')) continue // TODO support Realm object
|
||||
if (source.includes('// MODULE')) continue // TODO support modules
|
||||
if (source.includes('// Files:')) continue // TODO support includes
|
||||
|
||||
// the default --stack-size is necessary to keep output of stack overflowing
|
||||
// tests stable; it will be overridden by a Flags comment
|
||||
let flags = { '--stack-size': 2048 }, flagstr = ""
|
||||
// parse command line flags
|
||||
for (let s of source.matchAll(/\/\/ Flags:(.+)/g)) {
|
||||
for (let m of s[1].matchAll(/\s*([\S]+)/g)) {
|
||||
const v = m[1].match(/([\S]+)=([\S]+)/)
|
||||
if (v) {
|
||||
flags[v[1]] = v[2]
|
||||
flagstr += ` ${v[1]}=${v[2]}`
|
||||
} else {
|
||||
flags[m[1]] = true
|
||||
flagstr += ` ${m[1]}`
|
||||
}
|
||||
}
|
||||
}
|
||||
// exclude tests that use V8 intrinsics like %OptimizeFunctionOnNextCall
|
||||
if (source.includes ("--allow-natives-syntax")) continue
|
||||
if (flags["--allow-natives-syntax"]) continue
|
||||
// exclude tests that use V8 extensions
|
||||
if (source.includes ("--expose-externalize-string")) continue
|
||||
if (flags["--expose-externalize-string"]) continue
|
||||
// parse environment variables
|
||||
let env = {}, envstr = ""
|
||||
for (let s of source.matchAll(/environment variables:(.+)/ig)) {
|
||||
for (let m of s[1].matchAll(/\s*([\S]+)=([\S]+)/g)) {
|
||||
|
@ -50,11 +68,13 @@ for (const file of files) {
|
|||
envstr += ` ${m[1]}=${m[2]}`
|
||||
}
|
||||
}
|
||||
//print(`=== ${file}${envstr}${flagstr}`)
|
||||
print(`=== ${file}${envstr}`)
|
||||
// the fixed --stack-size is necessary to keep output of stack overflowing
|
||||
// tests stable; their stack traces are somewhat arbitrary otherwise
|
||||
const args = [argv0, "--stack-size", `${2048 * 1024}`,
|
||||
"-I", "mjsunit.js", "-I", tweak, file]
|
||||
const args = [argv0,
|
||||
"--stack-size", `${flags["--stack-size"]*1024}`,
|
||||
"-I", "mjsunit.js",
|
||||
"-I", tweak,
|
||||
file]
|
||||
const opts = {block:true, cwd:dir, env:env, usePath:false}
|
||||
os.exec(args, opts)
|
||||
}
|
||||
|
|
25
v8.txt
25
v8.txt
|
@ -452,20 +452,16 @@ Object <Error(SyntaxError: invalid assignment left-hand side)> is not an instanc
|
|||
Object <Error(SyntaxError: invalid increment/decrement operand)> is not an instance of <ReferenceError> but of <SyntaxError>
|
||||
=== invalid-source-element.js
|
||||
=== json-errors.js
|
||||
Failure: expected <"Unexpected end of JSON input"> found <"unexpected end of string">
|
||||
Failure: expected <"Unexpected end of JSON input"> found <"unexpected end of string">
|
||||
Failure: expected <"Unexpected end of JSON input"> found <"unexpected end of string">
|
||||
Failure: expected <"Unexpected end of JSON input"> found <"unexpected end of string">
|
||||
Failure:
|
||||
expected:
|
||||
"Unexpected token \n in JSON at position 3"
|
||||
found:
|
||||
"invalid character in a JSON string"
|
||||
"Bad control character in string literal in JSON"
|
||||
Failure:
|
||||
expected:
|
||||
"Unexpected token \n in JSON at position 3"
|
||||
found:
|
||||
"invalid character in a JSON string"
|
||||
"Bad control character in string literal in JSON"
|
||||
=== json-parser-recursive.js
|
||||
=== json-replacer-number-wrapper-tostring.js
|
||||
=== json-replacer-order.js
|
||||
|
@ -475,23 +471,6 @@ Did not throw exception
|
|||
Did not throw exception
|
||||
=== json-stringify-stack.js
|
||||
=== json.js
|
||||
Did not throw exception
|
||||
Did not throw exception
|
||||
Did not throw exception
|
||||
Did not throw exception
|
||||
Did not throw exception
|
||||
Did not throw exception
|
||||
Did not throw exception
|
||||
Failure:
|
||||
expected:
|
||||
"[37,null,1,\"foo\",\"37\",\"true\",null,\"has toJSON\",{},\"has toJSON\"]"
|
||||
found:
|
||||
"[37,NaN,1,\"foo\",\"37\",
|
||||
Failure:
|
||||
expected:
|
||||
"[37,null,1,\"foo\",\"37\",\"true\",null,\"has toJSON\",{},\"has toJSON\"]"
|
||||
found:
|
||||
"[37,NaN,1,\"foo\",\"37\",
|
||||
=== keyed-array-call.js
|
||||
=== keyed-call-generic.js
|
||||
=== keyed-call-ic.js
|
||||
|
|
Loading…
Reference in a new issue