Improve internal string allocation methods (#398)

String values are allocated as temporary or final results. This commit
attempts to improve the consistency and performance of this step.

- define `JS_NewString` as an inline function to allow simple expansion
  of `strlen()` for string literals
- document string contents constraints regarding UTF-8 encoding.
- rename `js_new_string8` as `js_new_string8_len`. takes `const char *`.
- new inline function `js_new_string8` takes `const char *`, computes
  string length with `strlen` and calls `js_new_string8_len`. No overhead
  for string literals
- rename `js_new_string16` to `js_new_string16_len`
- use internal string allocation functions where appropriate, remove overhead
- allocate extra byte for null terminator in source code string
This commit is contained in:
Charlie Gordon 2024-05-10 12:43:35 +02:00 committed by GitHub
parent f9ecc1a598
commit b81d4deee4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 108 additions and 70 deletions

161
quickjs.c
View file

@ -1098,6 +1098,7 @@ static int JS_ToBoolFree(JSContext *ctx, JSValue val);
static int JS_ToInt32Free(JSContext *ctx, int32_t *pres, JSValue val); static int JS_ToInt32Free(JSContext *ctx, int32_t *pres, JSValue val);
static int JS_ToFloat64Free(JSContext *ctx, double *pres, JSValue val); static int JS_ToFloat64Free(JSContext *ctx, double *pres, JSValue val);
static int JS_ToUint8ClampFree(JSContext *ctx, int32_t *pres, JSValue val); static int JS_ToUint8ClampFree(JSContext *ctx, int32_t *pres, JSValue val);
static JSValue js_new_string8_len(JSContext *ctx, const char *buf, int len);
static JSValue js_compile_regexp(JSContext *ctx, JSValue pattern, static JSValue js_compile_regexp(JSContext *ctx, JSValue pattern,
JSValue flags); JSValue flags);
static JSValue js_regexp_constructor_internal(JSContext *ctx, JSValue ctor, static JSValue js_regexp_constructor_internal(JSContext *ctx, JSValue ctor,
@ -2820,7 +2821,8 @@ static JSAtom __JS_NewAtom(JSRuntime *rt, JSString *str, int atom_type)
return i; return i;
} }
/* only works with zero terminated 8 bit strings */ // XXX: `str` must be pure ASCII. No UTF-8 encoded strings
// XXX: `str` must not be the string representation of a small integer
static JSAtom __JS_NewAtomInit(JSRuntime *rt, const char *str, int len, static JSAtom __JS_NewAtomInit(JSRuntime *rt, const char *str, int len,
int atom_type) int atom_type)
{ {
@ -2833,6 +2835,7 @@ static JSAtom __JS_NewAtomInit(JSRuntime *rt, const char *str, int len,
return __JS_NewAtom(rt, p, atom_type); return __JS_NewAtom(rt, p, atom_type);
} }
// XXX: `str` must be raw 8-bit contents. No UTF-8 encoded strings
static JSAtom __JS_FindAtom(JSRuntime *rt, const char *str, size_t len, static JSAtom __JS_FindAtom(JSRuntime *rt, const char *str, size_t len,
int atom_type) int atom_type)
{ {
@ -2924,11 +2927,14 @@ static JSAtom JS_NewAtomStr(JSContext *ctx, JSString *p)
return __JS_NewAtom(rt, p, JS_ATOM_TYPE_STRING); return __JS_NewAtom(rt, p, JS_ATOM_TYPE_STRING);
} }
/* `str` may be pure ASCII or UTF-8 encoded */
JSAtom JS_NewAtomLen(JSContext *ctx, const char *str, size_t len) JSAtom JS_NewAtomLen(JSContext *ctx, const char *str, size_t len)
{ {
JSValue val; JSValue val;
if (len == 0 || !is_digit(*str)) { if (len == 0 || !is_digit(*str)) {
// TODO(chqrlie): this does not work if `str` has UTF-8 encoded contents
// bug example: `({ "\u00c3\u00a9": 1 }).\u00e9` evaluates to `1`.
JSAtom atom = __JS_FindAtom(ctx->rt, str, len, JS_ATOM_TYPE_STRING); JSAtom atom = __JS_FindAtom(ctx->rt, str, len, JS_ATOM_TYPE_STRING);
if (atom) if (atom)
return atom; return atom;
@ -2939,6 +2945,7 @@ JSAtom JS_NewAtomLen(JSContext *ctx, const char *str, size_t len)
return JS_NewAtomStr(ctx, JS_VALUE_GET_STRING(val)); return JS_NewAtomStr(ctx, JS_VALUE_GET_STRING(val));
} }
/* `str` may be pure ASCII or UTF-8 encoded */
JSAtom JS_NewAtom(JSContext *ctx, const char *str) JSAtom JS_NewAtom(JSContext *ctx, const char *str)
{ {
return JS_NewAtomLen(ctx, str, strlen(str)); return JS_NewAtomLen(ctx, str, strlen(str));
@ -2951,7 +2958,7 @@ JSAtom JS_NewAtomUInt32(JSContext *ctx, uint32_t n)
} else { } else {
char buf[16]; char buf[16];
size_t len = u32toa(buf, n); size_t len = u32toa(buf, n);
JSValue val = JS_NewStringLen(ctx, buf, len); JSValue val = js_new_string8_len(ctx, buf, len);
if (JS_IsException(val)) if (JS_IsException(val))
return JS_ATOM_NULL; return JS_ATOM_NULL;
return __JS_NewAtom(ctx->rt, JS_VALUE_GET_STRING(val), return __JS_NewAtom(ctx->rt, JS_VALUE_GET_STRING(val),
@ -2966,7 +2973,7 @@ static JSAtom JS_NewAtomInt64(JSContext *ctx, int64_t n)
} else { } else {
char buf[24]; char buf[24];
size_t len = i64toa(buf, n); size_t len = i64toa(buf, n);
JSValue val = JS_NewStringLen(ctx, buf, len); JSValue val = js_new_string8_len(ctx, buf, len);
if (JS_IsException(val)) if (JS_IsException(val))
return JS_ATOM_NULL; return JS_ATOM_NULL;
return __JS_NewAtom(ctx->rt, JS_VALUE_GET_STRING(val), return __JS_NewAtom(ctx->rt, JS_VALUE_GET_STRING(val),
@ -2999,6 +3006,7 @@ static JSValue JS_NewSymbolFromAtom(JSContext *ctx, JSAtom descr,
return JS_NewSymbolInternal(ctx, p, atom_type); return JS_NewSymbolInternal(ctx, p, atom_type);
} }
/* `description` may be pure ASCII or UTF-8 encoded */
JSValue JS_NewSymbol(JSContext *ctx, const char *description, JS_BOOL is_global) JSValue JS_NewSymbol(JSContext *ctx, const char *description, JS_BOOL is_global)
{ {
JSAtom atom = JS_NewAtom(ctx, description); JSAtom atom = JS_NewAtom(ctx, description);
@ -3070,7 +3078,7 @@ static JSValue __JS_AtomToValue(JSContext *ctx, JSAtom atom, BOOL force_string)
if (__JS_AtomIsTaggedInt(atom)) { if (__JS_AtomIsTaggedInt(atom)) {
size_t len = u32toa(buf, __JS_AtomToUInt32(atom)); size_t len = u32toa(buf, __JS_AtomToUInt32(atom));
return JS_NewStringLen(ctx, buf, len); return js_new_string8_len(ctx, buf, len);
} else { } else {
JSRuntime *rt = ctx->rt; JSRuntime *rt = ctx->rt;
JSAtomStruct *p; JSAtomStruct *p;
@ -3302,6 +3310,7 @@ const char *JS_AtomToCString(JSContext *ctx, JSAtom atom)
} }
/* return a string atom containing name concatenated with str1 */ /* return a string atom containing name concatenated with str1 */
/* `str1` may be pure ASCII or UTF-8 encoded */
static JSAtom js_atom_concat_str(JSContext *ctx, JSAtom name, const char *str1) static JSAtom js_atom_concat_str(JSContext *ctx, JSAtom name, const char *str1)
{ {
JSValue str; JSValue str;
@ -3430,6 +3439,7 @@ int JS_NewClass(JSRuntime *rt, JSClassID class_id, const JSClassDef *class_def)
int ret, len; int ret, len;
JSAtom name; JSAtom name;
// XXX: class_def->class_name must be raw 8-bit contents. No UTF-8 encoded strings
len = strlen(class_def->class_name); len = strlen(class_def->class_name);
name = __JS_FindAtom(rt, class_def->class_name, len, JS_ATOM_TYPE_STRING); name = __JS_FindAtom(rt, class_def->class_name, len, JS_ATOM_TYPE_STRING);
if (name == JS_ATOM_NULL) { if (name == JS_ATOM_NULL) {
@ -3442,13 +3452,11 @@ int JS_NewClass(JSRuntime *rt, JSClassID class_id, const JSClassDef *class_def)
return ret; return ret;
} }
static JSValue js_new_string8(JSContext *ctx, const uint8_t *buf, int len) // XXX: `buf` contains raw 8-bit data, no UTF-8 decoding is performed
// XXX: no special case for len == 0
static JSValue js_new_string8_len(JSContext *ctx, const char *buf, int len)
{ {
JSString *str; JSString *str;
if (len <= 0) {
return JS_AtomToString(ctx, JS_ATOM_empty_string);
}
str = js_alloc_string(ctx, len, 0); str = js_alloc_string(ctx, len, 0);
if (!str) if (!str)
return JS_EXCEPTION; return JS_EXCEPTION;
@ -3457,7 +3465,14 @@ static JSValue js_new_string8(JSContext *ctx, const uint8_t *buf, int len)
return JS_MKPTR(JS_TAG_STRING, str); return JS_MKPTR(JS_TAG_STRING, str);
} }
static JSValue js_new_string16(JSContext *ctx, const uint16_t *buf, int len) // XXX: `buf` contains raw 8-bit data, no UTF-8 decoding is performed
// XXX: no special case for the empty string
static inline JSValue js_new_string8(JSContext *ctx, const char *str)
{
return js_new_string8_len(ctx, str, strlen(str));
}
static JSValue js_new_string16_len(JSContext *ctx, const uint16_t *buf, int len)
{ {
JSString *str; JSString *str;
str = js_alloc_string(ctx, len, 1); str = js_alloc_string(ctx, len, 1);
@ -3470,11 +3485,11 @@ static JSValue js_new_string16(JSContext *ctx, const uint16_t *buf, int len)
static JSValue js_new_string_char(JSContext *ctx, uint16_t c) static JSValue js_new_string_char(JSContext *ctx, uint16_t c)
{ {
if (c < 0x100) { if (c < 0x100) {
uint8_t ch8 = c; char ch8 = c;
return js_new_string8(ctx, &ch8, 1); return js_new_string8_len(ctx, &ch8, 1);
} else { } else {
uint16_t ch16 = c; uint16_t ch16 = c;
return js_new_string16(ctx, &ch16, 1); return js_new_string16_len(ctx, &ch16, 1);
} }
} }
@ -3484,7 +3499,10 @@ static JSValue js_sub_string(JSContext *ctx, JSString *p, int start, int end)
if (start == 0 && end == p->len) { if (start == 0 && end == p->len) {
return js_dup(JS_MKPTR(JS_TAG_STRING, p)); return js_dup(JS_MKPTR(JS_TAG_STRING, p));
} }
if (p->is_wide_char && len > 0) { if (len <= 0) {
return JS_AtomToString(ctx, JS_ATOM_empty_string);
}
if (p->is_wide_char) {
JSString *str; JSString *str;
int i; int i;
uint16_t c = 0; uint16_t c = 0;
@ -3492,7 +3510,7 @@ static JSValue js_sub_string(JSContext *ctx, JSString *p, int start, int end)
c |= p->u.str16[i]; c |= p->u.str16[i];
} }
if (c > 0xFF) if (c > 0xFF)
return js_new_string16(ctx, p->u.str16 + start, len); return js_new_string16_len(ctx, p->u.str16 + start, len);
str = js_alloc_string(ctx, len, 0); str = js_alloc_string(ctx, len, 0);
if (!str) if (!str)
@ -3503,7 +3521,7 @@ static JSValue js_sub_string(JSContext *ctx, JSString *p, int start, int end)
str->u.str8[len] = '\0'; str->u.str8[len] = '\0';
return JS_MKPTR(JS_TAG_STRING, str); return JS_MKPTR(JS_TAG_STRING, str);
} else { } else {
return js_new_string8(ctx, p->u.str8 + start, len); return js_new_string8_len(ctx, (const char *)(p->u.str8 + start), len);
} }
} }
@ -3850,6 +3868,9 @@ JSValue JS_NewStringLen(JSContext *ctx, const char *buf, size_t buf_len)
StringBuffer b_s, *b = &b_s; StringBuffer b_s, *b = &b_s;
size_t len1; size_t len1;
if (buf_len <= 0) {
return JS_AtomToString(ctx, JS_ATOM_empty_string);
}
p_start = (const uint8_t *)buf; p_start = (const uint8_t *)buf;
p_end = p_start + buf_len; p_end = p_start + buf_len;
p = p_start; p = p_start;
@ -3860,7 +3881,7 @@ JSValue JS_NewStringLen(JSContext *ctx, const char *buf, size_t buf_len)
return JS_ThrowRangeError(ctx, "invalid string length"); return JS_ThrowRangeError(ctx, "invalid string length");
if (p == p_end) { if (p == p_end) {
/* ASCII string */ /* ASCII string */
return js_new_string8(ctx, (const uint8_t *)buf, buf_len); return js_new_string8_len(ctx, buf, buf_len);
} else { } else {
if (string_buffer_init(ctx, b, buf_len)) if (string_buffer_init(ctx, b, buf_len))
goto fail; goto fail;
@ -3933,11 +3954,7 @@ static JSValue JS_ConcatString3(JSContext *ctx, const char *str1,
return JS_EXCEPTION; return JS_EXCEPTION;
} }
JSValue JS_NewString(JSContext *ctx, const char *str) /* `str` may be pure ASCII or UTF-8 encoded */
{
return JS_NewStringLen(ctx, str, strlen(str));
}
JSValue JS_NewAtomString(JSContext *ctx, const char *str) JSValue JS_NewAtomString(JSContext *ctx, const char *str)
{ {
JSAtom atom = JS_NewAtom(ctx, str); JSAtom atom = JS_NewAtom(ctx, str);
@ -4976,6 +4993,7 @@ static int js_method_set_properties(JSContext *ctx, JSValue func_obj,
} }
/* Note: at least 'length' arguments will be readable in 'argv' */ /* Note: at least 'length' arguments will be readable in 'argv' */
/* `name` may be NULL, pure ASCII or UTF-8 encoded */
static JSValue JS_NewCFunction3(JSContext *ctx, JSCFunction *func, static JSValue JS_NewCFunction3(JSContext *ctx, JSCFunction *func,
const char *name, const char *name,
int length, JSCFunctionEnum cproto, int magic, int length, JSCFunctionEnum cproto, int magic,
@ -6508,6 +6526,7 @@ static void build_backtrace(JSContext *ctx, JSValue error_obj,
if (has_prepare) { if (has_prepare) {
js_new_callsite_data(ctx, &csd[i], sf); js_new_callsite_data(ctx, &csd[i], sf);
} else { } else {
/* func_name_str is UTF-8 encoded if needed */
func_name_str = get_func_name(ctx, sf->cur_func); func_name_str = get_func_name(ctx, sf->cur_func);
if (!func_name_str || func_name_str[0] == '\0') if (!func_name_str || func_name_str[0] == '\0')
str1 = "<anonymous>"; str1 = "<anonymous>";
@ -6581,11 +6600,10 @@ static void build_backtrace(JSContext *ctx, JSValue error_obj,
JS_FreeValue(ctx, rt->current_exception); JS_FreeValue(ctx, rt->current_exception);
rt->current_exception = saved_exception; rt->current_exception = saved_exception;
} else { } else {
dbuf_putc(&dbuf, '\0');
if (dbuf_error(&dbuf)) if (dbuf_error(&dbuf))
stack = JS_NULL; stack = JS_NULL;
else else
stack = JS_NewString(ctx, (char *)dbuf.buf); stack = JS_NewStringLen(ctx, (char *)dbuf.buf, dbuf.size);
dbuf_free(&dbuf); dbuf_free(&dbuf);
} }
@ -6612,6 +6630,7 @@ JSValue JS_NewError(JSContext *ctx)
return JS_NewObjectClass(ctx, JS_CLASS_ERROR); return JS_NewObjectClass(ctx, JS_CLASS_ERROR);
} }
/* fmt and arguments may be pure ASCII or UTF-8 encoded contents */
static JSValue JS_ThrowError2(JSContext *ctx, JSErrorEnum error_num, static JSValue JS_ThrowError2(JSContext *ctx, JSErrorEnum error_num,
const char *fmt, va_list ap, BOOL add_backtrace) const char *fmt, va_list ap, BOOL add_backtrace)
{ {
@ -8069,6 +8088,7 @@ JSValue JS_GetPropertyInt64(JSContext *ctx, JSValue obj, int64_t idx)
return val; return val;
} }
/* `prop` may be pure ASCII or UTF-8 encoded */
JSValue JS_GetPropertyStr(JSContext *ctx, JSValue this_obj, JSValue JS_GetPropertyStr(JSContext *ctx, JSValue this_obj,
const char *prop) const char *prop)
{ {
@ -8874,6 +8894,7 @@ int JS_SetPropertyInt64(JSContext *ctx, JSValue this_obj,
return res; return res;
} }
/* `prop` may be pure ASCII or UTF-8 encoded */
int JS_SetPropertyStr(JSContext *ctx, JSValue this_obj, int JS_SetPropertyStr(JSContext *ctx, JSValue this_obj,
const char *prop, JSValue val) const char *prop, JSValue val)
{ {
@ -9428,6 +9449,7 @@ int JS_DefinePropertyValueInt64(JSContext *ctx, JSValue this_obj,
val, flags); val, flags);
} }
/* `prop` may be pure ASCII or UTF-8 encoded */
int JS_DefinePropertyValueStr(JSContext *ctx, JSValue this_obj, int JS_DefinePropertyValueStr(JSContext *ctx, JSValue this_obj,
const char *prop, JSValue val, int flags) const char *prop, JSValue val, int flags)
{ {
@ -11000,13 +11022,14 @@ static JSValue js_bigint_to_string1(JSContext *ctx, JSValue val, int radix)
saved_sign = a->sign; saved_sign = a->sign;
if (a->expn == BF_EXP_ZERO) if (a->expn == BF_EXP_ZERO)
a->sign = 0; a->sign = 0;
// TODO(chqrlie) bf_ftoa should return the string length to the caller
str = bf_ftoa(NULL, a, radix, 0, BF_RNDZ | BF_FTOA_FORMAT_FRAC | str = bf_ftoa(NULL, a, radix, 0, BF_RNDZ | BF_FTOA_FORMAT_FRAC |
BF_FTOA_JS_QUIRKS); BF_FTOA_JS_QUIRKS);
a->sign = saved_sign; a->sign = saved_sign;
JS_FreeBigInt(ctx, a, &a_s); JS_FreeBigInt(ctx, a, &a_s);
if (!str) if (!str)
return JS_ThrowOutOfMemory(ctx); return JS_ThrowOutOfMemory(ctx);
ret = JS_NewString(ctx, str); ret = js_new_string8(ctx, str);
bf_free(ctx->bf_ctx, str); bf_free(ctx->bf_ctx, str);
return ret; return ret;
} }
@ -11258,7 +11281,7 @@ static JSValue js_dtoa(JSContext *ctx,
{ {
char buf[JS_DTOA_BUF_SIZE]; char buf[JS_DTOA_BUF_SIZE];
size_t len = js_dtoa1(&buf, d, radix, n_digits, flags); size_t len = js_dtoa1(&buf, d, radix, n_digits, flags);
return JS_NewStringLen(ctx, buf, len); return js_new_string8_len(ctx, buf, len);
} }
/* d is guaranteed to be finite */ /* d is guaranteed to be finite */
@ -11345,7 +11368,7 @@ static JSValue js_dtoa_radix(JSContext *ctx, double d, int radix)
done: done:
ptr[-1] = '-'; ptr[-1] = '-';
ptr -= sign; ptr -= sign;
return js_new_string8(ctx, (uint8_t *)ptr, ptr2 - ptr); return js_new_string8_len(ctx, ptr, ptr2 - ptr);
} }
JSValue JS_ToStringInternal(JSContext *ctx, JSValue val, BOOL is_ToPropertyKey) JSValue JS_ToStringInternal(JSContext *ctx, JSValue val, BOOL is_ToPropertyKey)
@ -11361,8 +11384,7 @@ JSValue JS_ToStringInternal(JSContext *ctx, JSValue val, BOOL is_ToPropertyKey)
return js_dup(val); return js_dup(val);
case JS_TAG_INT: case JS_TAG_INT:
len = i32toa(buf, JS_VALUE_GET_INT(val)); len = i32toa(buf, JS_VALUE_GET_INT(val));
str = buf; return js_new_string8_len(ctx, buf, len);
goto new_string;
case JS_TAG_BOOL: case JS_TAG_BOOL:
return JS_AtomToString(ctx, JS_VALUE_GET_BOOL(val) ? return JS_AtomToString(ctx, JS_VALUE_GET_BOOL(val) ?
JS_ATOM_true : JS_ATOM_false); JS_ATOM_true : JS_ATOM_false);
@ -11384,9 +11406,7 @@ JSValue JS_ToStringInternal(JSContext *ctx, JSValue val, BOOL is_ToPropertyKey)
} }
break; break;
case JS_TAG_FUNCTION_BYTECODE: case JS_TAG_FUNCTION_BYTECODE:
str = "[function bytecode]"; return js_new_string8(ctx, "[function bytecode]");
len = sizeof("[function bytecode]") - 1;
goto new_string;
case JS_TAG_SYMBOL: case JS_TAG_SYMBOL:
if (is_ToPropertyKey) { if (is_ToPropertyKey) {
return js_dup(val); return js_dup(val);
@ -11399,10 +11419,7 @@ JSValue JS_ToStringInternal(JSContext *ctx, JSValue val, BOOL is_ToPropertyKey)
case JS_TAG_BIG_INT: case JS_TAG_BIG_INT:
return js_bigint_to_string(ctx, val); return js_bigint_to_string(ctx, val);
default: default:
str = "[unsupported type]"; return js_new_string8(ctx, "[unsupported type]");
len = sizeof("[unsupported type]") - 1;
new_string:
return JS_NewStringLen(ctx, str, len);
} }
} }
@ -19013,6 +19030,7 @@ static JSAtom parse_ident(JSParseState *s, const uint8_t **pp,
} }
} }
} }
/* buf is pure ASCII or UTF-8 encoded */
atom = JS_NewAtomLen(s->ctx, buf, ident_pos); atom = JS_NewAtomLen(s->ctx, buf, ident_pos);
done: done:
if (unlikely(buf != ident_buf)) if (unlikely(buf != ident_buf))
@ -19667,6 +19685,7 @@ static JSAtom json_parse_ident(JSParseState *s, const uint8_t **pp, int c)
} }
} }
} }
/* buf contains pure ASCII */
atom = JS_NewAtomLen(s->ctx, buf, ident_pos); atom = JS_NewAtomLen(s->ctx, buf, ident_pos);
done: done:
if (unlikely(buf != ident_buf)) if (unlikely(buf != ident_buf))
@ -19926,6 +19945,7 @@ static void skip_shebang(const uint8_t **pp, const uint8_t *buf_end)
Heuristic: skip comments and expect 'import' keyword not followed Heuristic: skip comments and expect 'import' keyword not followed
by '(' or '.' or export keyword. by '(' or '.' or export keyword.
*/ */
/* input is pure ASCII or UTF-8 encoded source code */
BOOL JS_DetectModule(const char *input, size_t input_len) BOOL JS_DetectModule(const char *input, size_t input_len)
{ {
const uint8_t *p = (const uint8_t *)input; const uint8_t *p = (const uint8_t *)input;
@ -25734,6 +25754,7 @@ static int add_star_export_entry(JSContext *ctx, JSModuleDef *m,
} }
/* create a C module */ /* create a C module */
/* `name_str` may be pure ASCII or UTF-8 encoded */
JSModuleDef *JS_NewCModule(JSContext *ctx, const char *name_str, JSModuleDef *JS_NewCModule(JSContext *ctx, const char *name_str,
JSModuleInitFunc *func) JSModuleInitFunc *func)
{ {
@ -25747,6 +25768,7 @@ JSModuleDef *JS_NewCModule(JSContext *ctx, const char *name_str,
return m; return m;
} }
/* `export_name` may be pure ASCII or UTF-8 encoded */
int JS_AddModuleExport(JSContext *ctx, JSModuleDef *m, const char *export_name) int JS_AddModuleExport(JSContext *ctx, JSModuleDef *m, const char *export_name)
{ {
JSExportEntry *me; JSExportEntry *me;
@ -25763,6 +25785,7 @@ int JS_AddModuleExport(JSContext *ctx, JSModuleDef *m, const char *export_name)
return 0; return 0;
} }
/* `export_name` may be pure ASCII or UTF-8 encoded */
int JS_SetModuleExport(JSContext *ctx, JSModuleDef *m, const char *export_name, int JS_SetModuleExport(JSContext *ctx, JSModuleDef *m, const char *export_name,
JSValue val) JSValue val)
{ {
@ -25866,6 +25889,7 @@ static JSModuleDef *js_find_loaded_module(JSContext *ctx, JSAtom name)
} }
/* return NULL in case of exception (e.g. module could not be loaded) */ /* return NULL in case of exception (e.g. module could not be loaded) */
/* `base_cname` and `cname1` may be pure ASCII or UTF-8 encoded */
static JSModuleDef *js_host_resolve_imported_module(JSContext *ctx, static JSModuleDef *js_host_resolve_imported_module(JSContext *ctx,
const char *base_cname, const char *base_cname,
const char *cname1) const char *cname1)
@ -27338,6 +27362,7 @@ static __exception int js_parse_source_element(JSParseState *s)
return 0; return 0;
} }
/* `filename` may be pure ASCII or UTF-8 encoded */
static JSFunctionDef *js_new_function_def(JSContext *ctx, static JSFunctionDef *js_new_function_def(JSContext *ctx,
JSFunctionDef *parent, JSFunctionDef *parent,
BOOL is_eval, BOOL is_eval,
@ -32273,6 +32298,7 @@ JSValue JS_EvalFunction(JSContext *ctx, JSValue fun_obj)
} }
/* 'input' must be zero terminated i.e. input[input_len] = '\0'. */ /* 'input' must be zero terminated i.e. input[input_len] = '\0'. */
/* `export_name` and `input` may be pure ASCII or UTF-8 encoded */
static JSValue __JS_EvalInternal(JSContext *ctx, JSValue this_obj, static JSValue __JS_EvalInternal(JSContext *ctx, JSValue this_obj,
const char *input, size_t input_len, const char *input, size_t input_len,
const char *filename, int flags, int scope_idx) const char *filename, int flags, int scope_idx)
@ -34063,7 +34089,8 @@ static JSValue JS_ReadFunctionTag(BCReaderState *s)
if (b->source_len) { if (b->source_len) {
bc_read_trace(s, "source: %d bytes\n", b->source_len); bc_read_trace(s, "source: %d bytes\n", b->source_len);
s->ptr_last += b->source_len; // omit source code hex dump s->ptr_last += b->source_len; // omit source code hex dump
b->source = js_mallocz(ctx, b->source_len); /* b->source is a UTF-8 encoded null terminated C string */
b->source = js_mallocz(ctx, b->source_len + 1);
if (!b->source) if (!b->source)
goto fail; goto fail;
if (bc_get_buf(s, b->source, b->source_len)) if (bc_get_buf(s, b->source, b->source_len))
@ -34662,6 +34689,7 @@ static int check_exception_free(JSContext *ctx, JSValue obj)
return JS_IsException(obj); return JS_IsException(obj);
} }
/* `export_name` may be pure ASCII or UTF-8 encoded */
static JSAtom find_atom(JSContext *ctx, const char *name) static JSAtom find_atom(JSContext *ctx, const char *name)
{ {
JSAtom atom; JSAtom atom;
@ -34840,6 +34868,7 @@ int JS_SetModuleExportList(JSContext *ctx, JSModuleDef *m,
e->name, e->u.func.length, e->u.func.cproto, e->magic); e->name, e->u.func.length, e->u.func.cproto, e->magic);
break; break;
case JS_DEF_PROP_STRING: case JS_DEF_PROP_STRING:
/* `e->u.str` may be pure ASCII or UTF-8 encoded */
val = JS_NewString(ctx, e->u.str); val = JS_NewString(ctx, e->u.str);
break; break;
case JS_DEF_PROP_INT32: case JS_DEF_PROP_INT32:
@ -35746,9 +35775,9 @@ static JSValue js_object_toString(JSContext *ctx, JSValue this_val,
JSObject *p; JSObject *p;
if (JS_IsNull(this_val)) { if (JS_IsNull(this_val)) {
tag = JS_NewString(ctx, "Null"); tag = js_new_string8(ctx, "Null");
} else if (JS_IsUndefined(this_val)) { } else if (JS_IsUndefined(this_val)) {
tag = JS_NewString(ctx, "Undefined"); tag = js_new_string8(ctx, "Undefined");
} else { } else {
obj = JS_ToObject(ctx, this_val); obj = JS_ToObject(ctx, this_val);
if (JS_IsException(obj)) if (JS_IsException(obj))
@ -36505,6 +36534,7 @@ static JSValue js_function_toString(JSContext *ctx, JSValue this_val,
p = JS_VALUE_GET_OBJ(this_val); p = JS_VALUE_GET_OBJ(this_val);
if (js_class_has_bytecode(p->class_id)) { if (js_class_has_bytecode(p->class_id)) {
JSFunctionBytecode *b = p->u.func.function_bytecode; JSFunctionBytecode *b = p->u.func.function_bytecode;
/* `b->source` must be pure ASCII or UTF-8 encoded */
if (b->source) if (b->source)
return JS_NewStringLen(ctx, b->source, b->source_len); return JS_NewStringLen(ctx, b->source, b->source_len);
} }
@ -38999,7 +39029,7 @@ static JSValue js_number_toString(JSContext *ctx, JSValue this_val,
if (magic || JS_IsUndefined(argv[0])) { if (magic || JS_IsUndefined(argv[0])) {
if (JS_VALUE_GET_TAG(val) == JS_TAG_INT) { if (JS_VALUE_GET_TAG(val) == JS_TAG_INT) {
size_t len = i32toa(buf, JS_VALUE_GET_INT(val)); size_t len = i32toa(buf, JS_VALUE_GET_INT(val));
return JS_NewStringLen(ctx, buf, len); return js_new_string8_len(ctx, buf, len);
} }
base = 10; base = 10;
} else { } else {
@ -39009,7 +39039,7 @@ static JSValue js_number_toString(JSContext *ctx, JSValue this_val,
} }
if (JS_VALUE_GET_TAG(val) == JS_TAG_INT) { if (JS_VALUE_GET_TAG(val) == JS_TAG_INT) {
size_t len = i64toa_radix(buf, JS_VALUE_GET_INT(val), base); size_t len = i64toa_radix(buf, JS_VALUE_GET_INT(val), base);
return JS_NewStringLen(ctx, buf, len); return js_new_string8_len(ctx, buf, len);
} }
if (JS_ToFloat64Free(ctx, &d, val)) if (JS_ToFloat64Free(ctx, &d, val))
return JS_EXCEPTION; return JS_EXCEPTION;
@ -39353,12 +39383,7 @@ static JSValue js_string_fromCharCode(JSContext *ctx, JSValue this_val,
// shortcut for single argument common case // shortcut for single argument common case
if (argc == 1 && JS_VALUE_GET_TAG(argv[0]) == JS_TAG_INT) { if (argc == 1 && JS_VALUE_GET_TAG(argv[0]) == JS_TAG_INT) {
uint16_t c16 = JS_VALUE_GET_INT(argv[0]); uint16_t c16 = JS_VALUE_GET_INT(argv[0]);
if (c16 < 128) { return js_new_string_char(ctx, c16);
uint8_t c8 = c16;
return js_new_string8(ctx, &c8, 1);
} else {
return js_new_string16(ctx, &c16, 1);
}
} }
string_buffer_init(ctx, b, argc); string_buffer_init(ctx, b, argc);
@ -39385,18 +39410,13 @@ static JSValue js_string_fromCodePoint(JSContext *ctx, JSValue this_val,
c = JS_VALUE_GET_INT(argv[0]); c = JS_VALUE_GET_INT(argv[0]);
if (c < 0 || c > 0x10ffff) if (c < 0 || c > 0x10ffff)
goto range_error; goto range_error;
if (c < 128) { if (c <= 0xffff) {
uint8_t c8 = c; return js_new_string_char(ctx, c);
return js_new_string8(ctx, &c8, 1);
}
if (c < 0x10000) {
uint16_t c16 = c;
return js_new_string16(ctx, &c16, 1);
} else { } else {
uint16_t c16[2]; uint16_t c16[2];
c16[0] = get_hi_surrogate(c); c16[0] = get_hi_surrogate(c);
c16[1] = get_lo_surrogate(c); c16[1] = get_lo_surrogate(c);
return js_new_string16(ctx, c16, 2); return js_new_string16_len(ctx, c16, 2);
} }
} }
@ -39562,7 +39582,7 @@ static JSValue js_string_charAt(JSContext *ctx, JSValue this_val,
return JS_EXCEPTION; return JS_EXCEPTION;
} }
if (idx < 0 || idx >= p->len) { if (idx < 0 || idx >= p->len) {
ret = js_new_string8(ctx, NULL, 0); ret = JS_AtomToString(ctx, JS_ATOM_empty_string);
} else { } else {
c = string_get(p, idx); c = string_get(p, idx);
ret = js_new_string_char(ctx, c); ret = js_new_string_char(ctx, c);
@ -39725,7 +39745,7 @@ static JSValue js_string_toWellFormed(JSContext *ctx, JSValue this_val,
return str; // by definition well-formed return str; // by definition well-formed
// TODO(bnoordhuis) don't clone when input is well-formed // TODO(bnoordhuis) don't clone when input is well-formed
ret = js_new_string16(ctx, p->u.str16, p->len); ret = js_new_string16_len(ctx, p->u.str16, p->len);
JS_FreeValue(ctx, str); JS_FreeValue(ctx, str);
if (JS_IsException(ret)) if (JS_IsException(ret))
return JS_EXCEPTION; return JS_EXCEPTION;
@ -39941,7 +39961,7 @@ static JSValue js_string_match(JSContext *ctx, JSValue this_val,
args[0] = regexp; args[0] = regexp;
str = JS_UNDEFINED; str = JS_UNDEFINED;
if (atom == JS_ATOM_Symbol_matchAll) { if (atom == JS_ATOM_Symbol_matchAll) {
str = JS_NewString(ctx, "g"); str = js_new_string8(ctx, "g");
if (JS_IsException(str)) if (JS_IsException(str))
goto fail; goto fail;
args[args_len++] = str; args[args_len++] = str;
@ -40796,7 +40816,7 @@ static JSValue js_string_iterator_next(JSContext *ctx, JSValue this_val,
if (c <= 0xffff) { if (c <= 0xffff) {
return js_new_string_char(ctx, c); return js_new_string_char(ctx, c);
} else { } else {
return js_new_string16(ctx, p->u.str16 + start, 2); return js_new_string16_len(ctx, p->u.str16 + start, 2);
} }
} }
@ -41365,7 +41385,7 @@ static JSValue js_compile_regexp(JSContext *ctx, JSValue pattern,
return JS_EXCEPTION; return JS_EXCEPTION;
} }
ret = js_new_string8(ctx, re_bytecode_buf, re_bytecode_len); ret = js_new_string8_len(ctx, (char *)re_bytecode_buf, re_bytecode_len);
js_free(ctx, re_bytecode_buf); js_free(ctx, re_bytecode_buf);
return ret; return ret;
} }
@ -41572,7 +41592,7 @@ static JSValue js_regexp_get_source(JSContext *ctx, JSValue this_val)
if (p->len == 0) { if (p->len == 0) {
empty_regex: empty_regex:
return JS_NewString(ctx, "(?:)"); return js_new_string8(ctx, "(?:)");
} }
string_buffer_init2(ctx, b, p->len, p->is_wide_char); string_buffer_init2(ctx, b, p->len, p->is_wide_char);
@ -41685,7 +41705,9 @@ static JSValue js_regexp_get_flags(JSContext *ctx, JSValue this_val)
goto exception; goto exception;
if (res) if (res)
*p++ = 'y'; *p++ = 'y';
return JS_NewStringLen(ctx, str, p - str); if (p == str)
return JS_AtomToString(ctx, JS_ATOM_empty_string);
return js_new_string8_len(ctx, str, p - str);
exception: exception:
return JS_EXCEPTION; return JS_EXCEPTION;
@ -42943,6 +42965,7 @@ static JSValue json_parse_value(JSParseState *s)
return JS_EXCEPTION; return JS_EXCEPTION;
} }
/* 'buf' must be zero terminated i.e. buf[buf_len] = '\0'. */
JSValue JS_ParseJSON(JSContext *ctx, const char *buf, size_t buf_len, const char *filename) JSValue JS_ParseJSON(JSContext *ctx, const char *buf, size_t buf_len, const char *filename)
{ {
JSParseState s1, *s = &s1; JSParseState s1, *s = &s1;
@ -43184,7 +43207,7 @@ static int js_json_to_str(JSContext *ctx, JSONStringifyContext *jsc,
sep = JS_ConcatString3(ctx, "\n", js_dup(indent1), ""); sep = JS_ConcatString3(ctx, "\n", js_dup(indent1), "");
if (JS_IsException(sep)) if (JS_IsException(sep))
goto exception; goto exception;
sep1 = JS_NewString(ctx, " "); sep1 = js_new_string8(ctx, " ");
if (JS_IsException(sep1)) if (JS_IsException(sep1))
goto exception; goto exception;
} else { } else {
@ -46153,7 +46176,7 @@ static JSValue js_promise_all_resolve_element(JSContext *ctx,
obj = JS_NewObject(ctx); obj = JS_NewObject(ctx);
if (JS_IsException(obj)) if (JS_IsException(obj))
return JS_EXCEPTION; return JS_EXCEPTION;
str = JS_NewString(ctx, is_reject ? "rejected" : "fulfilled"); str = js_new_string8(ctx, is_reject ? "rejected" : "fulfilled");
if (JS_IsException(str)) if (JS_IsException(str))
goto fail1; goto fail1;
if (JS_DefinePropertyValue(ctx, obj, JS_ATOM_status, if (JS_DefinePropertyValue(ctx, obj, JS_ATOM_status,
@ -47488,7 +47511,7 @@ static JSValue get_date_string(JSContext *ctx, JSValue this_val,
if (fmt == 2) if (fmt == 2)
return JS_ThrowRangeError(ctx, "Date value is NaN"); return JS_ThrowRangeError(ctx, "Date value is NaN");
else else
return JS_NewString(ctx, "Invalid Date"); return js_new_string8(ctx, "Invalid Date");
} }
y = fields[0]; y = fields[0];
@ -47572,7 +47595,11 @@ static JSValue get_date_string(JSContext *ctx, JSValue this_val,
break; break;
} }
} }
return JS_NewStringLen(ctx, buf, pos); if (!pos) {
// XXX: should throw exception?
return JS_AtomToString(ctx, JS_ATOM_empty_string);
}
return js_new_string8_len(ctx, buf, pos);
} }
/* OS dependent: return the UTC time in ms since 1970. */ /* OS dependent: return the UTC time in ms since 1970. */
@ -52547,6 +52574,7 @@ static void js_new_callsite_data(JSContext *ctx, JSCallSiteData *csd, JSStackFra
JSObject *p; JSObject *p;
csd->func = js_dup(sf->cur_func); csd->func = js_dup(sf->cur_func);
/* func_name_str is UTF-8 encoded if needed */
func_name_str = get_func_name(ctx, sf->cur_func); func_name_str = get_func_name(ctx, sf->cur_func);
if (!func_name_str || func_name_str[0] == '\0') if (!func_name_str || func_name_str[0] == '\0')
csd->func_name = JS_NULL; csd->func_name = JS_NULL;
@ -52586,6 +52614,7 @@ static void js_new_callsite_data2(JSContext *ctx, JSCallSiteData *csd, const cha
csd->native = FALSE; csd->native = FALSE;
csd->line_num = line_num; csd->line_num = line_num;
csd->col_num = col_num; csd->col_num = col_num;
/* filename is UTF-8 encoded if needed (original argument to __JS_EvalInternal()) */
csd->filename = JS_NewString(ctx, filename); csd->filename = JS_NewString(ctx, filename);
if (JS_IsException(csd->filename)) { if (JS_IsException(csd->filename)) {
csd->filename = JS_NULL; csd->filename = JS_NULL;

View file

@ -29,6 +29,7 @@
#include <stdio.h> #include <stdio.h>
#include <stdint.h> #include <stdint.h>
#include <string.h>
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
@ -53,6 +54,12 @@ typedef struct JSClass JSClass;
typedef uint32_t JSClassID; typedef uint32_t JSClassID;
typedef uint32_t JSAtom; typedef uint32_t JSAtom;
/* Unless documented otherwise, C string pointers (`char *` or `const char *`)
are assumed to verify these constraints:
- unless a length is passed separately, the string has a null terminator
- string contents is either pure ASCII or is UTF-8 encoded.
*/
#if INTPTR_MAX < INT64_MAX #if INTPTR_MAX < INT64_MAX
/* Use NAN boxing for 32bit builds. */ /* Use NAN boxing for 32bit builds. */
#define JS_NAN_BOXING #define JS_NAN_BOXING
@ -447,7 +454,7 @@ typedef JSValue JSClassCall(JSContext *ctx, JSValue func_obj,
int flags); int flags);
typedef struct JSClassDef { typedef struct JSClassDef {
const char *class_name; const char *class_name; /* pure ASCII only! */
JSClassFinalizer *finalizer; JSClassFinalizer *finalizer;
JSClassGCMark *gc_mark; JSClassGCMark *gc_mark;
/* if call != NULL, the object is a function. If (flags & /* if call != NULL, the object is a function. If (flags &
@ -634,7 +641,9 @@ JS_EXTERN int JS_ToBigInt64(JSContext *ctx, int64_t *pres, JSValue val);
JS_EXTERN int JS_ToInt64Ext(JSContext *ctx, int64_t *pres, JSValue val); JS_EXTERN int JS_ToInt64Ext(JSContext *ctx, int64_t *pres, JSValue val);
JS_EXTERN JSValue JS_NewStringLen(JSContext *ctx, const char *str1, size_t len1); JS_EXTERN JSValue JS_NewStringLen(JSContext *ctx, const char *str1, size_t len1);
JS_EXTERN JSValue JS_NewString(JSContext *ctx, const char *str); static inline JSValue JS_NewString(JSContext *ctx, const char *str) {
return JS_NewStringLen(ctx, str, strlen(str));
}
JS_EXTERN JSValue JS_NewAtomString(JSContext *ctx, const char *str); JS_EXTERN JSValue JS_NewAtomString(JSContext *ctx, const char *str);
JS_EXTERN JSValue JS_ToString(JSContext *ctx, JSValue val); JS_EXTERN JSValue JS_ToString(JSContext *ctx, JSValue val);
JS_EXTERN JSValue JS_ToPropertyKey(JSContext *ctx, JSValue val); JS_EXTERN JSValue JS_ToPropertyKey(JSContext *ctx, JSValue val);
@ -902,7 +911,7 @@ JS_EXTERN void JS_SetConstructor(JSContext *ctx, JSValue func_obj,
/* C property definition */ /* C property definition */
typedef struct JSCFunctionListEntry { typedef struct JSCFunctionListEntry {
const char *name; const char *name; /* pure ASCII or UTF-8 encoded */
uint8_t prop_flags; uint8_t prop_flags;
uint8_t def_type; uint8_t def_type;
int16_t magic; int16_t magic;
@ -924,7 +933,7 @@ typedef struct JSCFunctionListEntry {
const struct JSCFunctionListEntry *tab; const struct JSCFunctionListEntry *tab;
int len; int len;
} prop_list; } prop_list;
const char *str; const char *str; /* pure ASCII or UTF-8 encoded */
int32_t i32; int32_t i32;
int64_t i64; int64_t i64;
double f64; double f64;