Add utility functions for string to integer conversions (#366)
* Add utility functions, improve integer conversion functions - move `is_be()` to cutils.h - add `is_upper_ascii()` and `to_upper_ascii()` - add extensive benchmark for integer conversion variants in **tests/test_conv.c** - add `u32toa()`, `i32toa()`, `u64toa()`, `i64toa()` based on register shift variant - add `u32toa_radix()`, `u64toa_radix()`, `i64toa_radix()` based on length_loop variant - use direct converters instead of `snprintf()` - copy NaN and Infinity directly in `js_dtoa1()` - optimize `js_number_toString()` for small integers - use `JS_NewStringLen()` instead of `JS_NewString()` when possible - add more precise conversion tests in microbench.js - disable some benchmark tests for gcc (they cause ASAN failures)
This commit is contained in:
7 changed files with 2079 additions and 92 deletions
@ -324,6 +324,9 @@ if(BUILD_EXAMPLES AND NOT WIN32)
target_link_libraries(test_fib ${qjs_libs})
# Install target
@ -53,6 +53,9 @@ $(QJS): $(BUILD_DIR)
cmake --build $(BUILD_DIR) --target qjsc -j $(JOBS)
$(BUILD_DIR)/test_conv: $(BUILD_DIR) tests/test_conv.c
cmake --build $(BUILD_DIR) --target test_conv
install: $(QJS) $(QJSC)
cmake --build $(BUILD_DIR) --target install
@ -86,6 +89,9 @@ test: $(QJS)
$(QJS) tests/test_worker.js
$(QJS) tests/test_queue_microtask.js
testconv: $(BUILD_DIR)/test_conv
test262: $(QJS)
$(RUN262) -m -c test262.conf -a
@ -213,6 +213,8 @@ void dbuf_free(DynBuf *s)
memset(s, 0, sizeof(*s));
/*--- Unicode / UTF-8 utility functions --*/
/* Note: at most 31 bits are encoded. At most UTF8_CHAR_LEN_MAX bytes
are output. */
int unicode_to_utf8(uint8_t *buf, unsigned int c)
@ -315,6 +317,231 @@ int unicode_from_utf8(const uint8_t *p, int max_len, const uint8_t **pp)
return c;
/*--- integer to string conversions --*/
/* All conversion functions:
- require a destination array `buf` of sufficient length
- write the string representation at the beginning of `buf`
- null terminate the string
- return the string length
/* 2 <= base <= 36 */
char const digits36[36] = "0123456789abcdefghijklmnopqrstuvwxyz";
/* using u32toa_shift variant */
#define gen_digit(buf, c) if (is_be()) \
buf = (buf >> 8) | ((uint64_t)(c) << ((sizeof(buf) - 1) * 8)); \
else \
buf = (buf << 8) | (c)
size_t u7toa_shift(char dest[minimum_length(8)], uint32_t n)
size_t len = 1;
uint64_t buf = 0;
while (n >= 10) {
uint32_t quo = n % 10;
n /= 10;
gen_digit(buf, '0' + quo);
gen_digit(buf, '0' + n);
memcpy(dest, &buf, sizeof buf);
return len;
size_t u07toa_shift(char dest[minimum_length(8)], uint32_t n, size_t len)
size_t i;
dest += len;
dest[7] = '\0';
for (i = 7; i-- > 1;) {
uint32_t quo = n % 10;
n /= 10;
dest[i] = (char)('0' + quo);
dest[i] = (char)('0' + n);
return len + 7;
size_t u32toa(char buf[minimum_length(11)], uint32_t n)
if (n < 10) {
buf[0] = (char)('0' + n);
buf[1] = '\0';
return 1;
#define TEN_POW_7 10000000
if (n >= TEN_POW_7) {
uint32_t quo = n / TEN_POW_7;
n %= TEN_POW_7;
size_t len = u7toa_shift(buf, quo);
return u07toa_shift(buf, n, len);
return u7toa_shift(buf, n);
size_t u64toa(char buf[minimum_length(21)], uint64_t n)
if (likely(n < 0x100000000))
return u32toa(buf, n);
size_t len;
if (n >= TEN_POW_7) {
uint64_t n1 = n / TEN_POW_7;
n %= TEN_POW_7;
if (n1 >= TEN_POW_7) {
uint32_t quo = n1 / TEN_POW_7;
n1 %= TEN_POW_7;
len = u7toa_shift(buf, quo);
len = u07toa_shift(buf, n1, len);
} else {
len = u7toa_shift(buf, n1);
return u07toa_shift(buf, n, len);
return u7toa_shift(buf, n);
size_t i32toa(char buf[minimum_length(12)], int32_t n)
if (likely(n >= 0))
return u32toa(buf, n);
buf[0] = '-';
return 1 + u32toa(buf + 1, -(uint32_t)n);
size_t i64toa(char buf[minimum_length(22)], int64_t n)
if (likely(n >= 0))
return u64toa(buf, n);
buf[0] = '-';
return 1 + u64toa(buf + 1, -(uint64_t)n);
/* using u32toa_radix_length variant */
static uint8_t const radix_shift[64] = {
0, 0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0,
4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
size_t u32toa_radix(char buf[minimum_length(33)], uint32_t n, unsigned base)
if (likely(base == 10))
return u32toa(buf, n);
if (n < base) {
buf[0] = digits36[n];
buf[1] = '\0';
return 1;
int shift = radix_shift[base & 63];
if (shift) {
uint32_t mask = (1 << shift) - 1;
size_t len = (32 - clz32(n) + shift - 1) / shift;
size_t last = n & mask;
n /= base;
char *end = buf + len;
*end-- = '\0';
*end-- = digits36[last];
while (n >= base) {
size_t quo = n & mask;
n >>= shift;
*end-- = digits36[quo];
*end = digits36[n];
return len;
} else {
size_t len = 2;
size_t last = n % base;
n /= base;
uint32_t nbase = base;
while (n >= nbase) {
nbase *= base;
char *end = buf + len;
*end-- = '\0';
*end-- = digits36[last];
while (n >= base) {
size_t quo = n % base;
n /= base;
*end-- = digits36[quo];
*end = digits36[n];
return len;
size_t u64toa_radix(char buf[minimum_length(65)], uint64_t n, unsigned base)
if (likely(base == 10))
return u64toa(buf, n);
int shift = radix_shift[base & 63];
if (shift) {
if (n < base) {
buf[0] = digits36[n];
buf[1] = '\0';
return 1;
uint64_t mask = (1 << shift) - 1;
size_t len = (64 - clz64(n) + shift - 1) / shift;
size_t last = n & mask;
n /= base;
char *end = buf + len;
*end-- = '\0';
*end-- = digits36[last];
while (n >= base) {
size_t quo = n & mask;
n >>= shift;
*end-- = digits36[quo];
*end = digits36[n];
return len;
} else {
if (likely(n < 0x100000000))
return u32toa_radix(buf, n, base);
size_t last = n % base;
n /= base;
uint64_t nbase = base;
size_t len = 2;
while (n >= nbase) {
nbase *= base;
char *end = buf + len;
*end-- = '\0';
*end-- = digits36[last];
while (n >= base) {
size_t quo = n % base;
n /= base;
*end-- = digits36[quo];
*end = digits36[n];
return len;
size_t i64toa_radix(char buf[minimum_length(66)], int64_t n, unsigned int base)
if (likely(n >= 0))
return u64toa_radix(buf, n, base);
buf[0] = '-';
return 1 + u64toa_radix(buf + 1, -(uint64_t)n, base);
/*---- sorting with opaque argument ----*/
typedef void (*exchange_f)(void *a, void *b, size_t size);
typedef int (*cmp_f)(const void *, const void *, void *opaque);
@ -614,6 +841,8 @@ void rqsort(void *base, size_t nmemb, size_t size, cmp_f cmp, void *opaque)
/*---- Portable time functions ----*/
#if defined(_MSC_VER)
// From:
static int gettimeofday_msvc(struct timeval *tp, struct timezone *tzp)
@ -677,7 +906,7 @@ int64_t js__gettimeofday_us(void) {
return ((int64_t)tv.tv_sec * 1000000) + tv.tv_usec;
/* Cross-platform threading APIs. */
/*--- Cross-platform threading APIs. ----*/
#if !defined(EMSCRIPTEN) && !defined(__wasi__)
@ -131,6 +131,14 @@ char *pstrcat(char *buf, int buf_size, const char *s);
int strstart(const char *str, const char *val, const char **ptr);
int has_suffix(const char *str, const char *suffix);
static inline uint8_t is_be(void) {
union {
uint16_t a;
uint8_t b;
} u = { 0x100 };
return u.b;
static inline int max_int(int a, int b)
if (a > b)
@ -426,6 +434,23 @@ static inline int from_hex(int c)
return -1;
static inline uint8_t is_upper_ascii(uint8_t c) {
return c >= 'A' && c <= 'Z';
static inline uint8_t to_upper_ascii(uint8_t c) {
return c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c;
extern char const digits36[36];
size_t u32toa(char buf[minimum_length(11)], uint32_t n);
size_t i32toa(char buf[minimum_length(12)], int32_t n);
size_t u64toa(char buf[minimum_length(21)], uint64_t n);
size_t i64toa(char buf[minimum_length(22)], int64_t n);
size_t u32toa_radix(char buf[minimum_length(33)], uint32_t n, unsigned int base);
size_t u64toa_radix(char buf[minimum_length(65)], uint64_t n, unsigned int base);
size_t i64toa_radix(char buf[minimum_length(66)], int64_t n, unsigned int base);
void rqsort(void *base, size_t nmemb, size_t size,
int (*cmp)(const void *, const void *, void *),
void *arg);
@ -1519,15 +1519,6 @@ static inline int string_get(const JSString *p, int idx) {
return p->is_wide_char ? p->u.str16[idx] : p->u.str8[idx];
static inline BOOL is_be(void)
union {
uint16_t a;
uint8_t b;
} u = {0x100};
return u.b;
typedef struct JSClassShortDef {
JSAtom class_name;
JSClassFinalizer *finalizer;
@ -2956,10 +2947,9 @@ JSAtom JS_NewAtomUInt32(JSContext *ctx, uint32_t n)
if (n <= JS_ATOM_MAX_INT) {
return __JS_AtomFromUInt32(n);
} else {
char buf[11];
JSValue val;
snprintf(buf, sizeof(buf), "%u", n);
val = JS_NewString(ctx, buf);
char buf[16];
size_t len = u32toa(buf, n);
JSValue val = JS_NewStringLen(ctx, buf, len);
if (JS_IsException(val))
return JS_ATOM_NULL;
return __JS_NewAtom(ctx->rt, JS_VALUE_GET_STRING(val),
@ -2973,9 +2963,8 @@ static JSAtom JS_NewAtomInt64(JSContext *ctx, int64_t n)
return __JS_AtomFromUInt32((uint32_t)n);
} else {
char buf[24];
JSValue val;
snprintf(buf, sizeof(buf), "%" PRId64 , n);
val = JS_NewString(ctx, buf);
size_t len = i64toa(buf, n);
JSValue val = JS_NewStringLen(ctx, buf, len);
if (JS_IsException(val))
return JS_ATOM_NULL;
return __JS_NewAtom(ctx->rt, JS_VALUE_GET_STRING(val),
@ -3078,8 +3067,8 @@ static JSValue __JS_AtomToValue(JSContext *ctx, JSAtom atom, BOOL force_string)
if (__JS_AtomIsTaggedInt(atom)) {
snprintf(buf, sizeof(buf), "%u", __JS_AtomToUInt32(atom));
return JS_NewString(ctx, buf);
size_t len = u32toa(buf, __JS_AtomToUInt32(atom));
return JS_NewStringLen(ctx, buf, len);
} else {
JSRuntime *rt = ctx->rt;
JSAtomStruct *p;
@ -3346,7 +3335,7 @@ static JSAtom js_atom_concat_str(JSContext *ctx, JSAtom name, const char *str1)
static JSAtom js_atom_concat_num(JSContext *ctx, JSAtom name, uint32_t n)
char buf[16];
snprintf(buf, sizeof(buf), "%u", n);
u32toa(buf, n);
return js_atom_concat_str(ctx, name, buf);
@ -11025,39 +11014,6 @@ static JSValue js_bigint_to_string(JSContext *ctx, JSValue val)
return js_bigint_to_string1(ctx, val, 10);
/* 2 <= base <= 36 */
static char const digits[36] = "0123456789abcdefghijklmnopqrstuvwxyz";
static char *i64toa(char *buf_end, int64_t n, unsigned int base)
char *q = buf_end;
int digit, is_neg;
is_neg = 0;
if (n < 0) {
is_neg = 1;
n = -n;
*--q = '\0';
if (base == 10) {
/* division by known base uses multiplication */
do {
digit = (uint64_t)n % 10;
n = (uint64_t)n / 10;
*--q = '0' + digit;
} while (n != 0);
} else {
do {
digit = (uint64_t)n % base;
n = (uint64_t)n / base;
*--q = digits[digit];
} while (n != 0);
if (is_neg)
*--q = '-';
return q;
/* buf1 contains the printf result */
static void js_ecvt1(double d, int n_digits, int *decpt, int *sign, char *buf,
int rounding_mode, char *buf1, int buf1_size)
@ -11142,10 +11098,10 @@ static int js_ecvt(double d, int n_digits, int *decpt, int *sign, char *buf,
return n_digits;
static int js_fcvt1(char (*buf)[JS_DTOA_BUF_SIZE], double d, int n_digits,
int rounding_mode)
static size_t js_fcvt1(char (*buf)[JS_DTOA_BUF_SIZE], double d, int n_digits,
int rounding_mode)
int n;
size_t n;
if (rounding_mode != FE_TONEAREST)
n = snprintf(*buf, sizeof(*buf), "%.*f", n_digits, d);
@ -11155,7 +11111,7 @@ static int js_fcvt1(char (*buf)[JS_DTOA_BUF_SIZE], double d, int n_digits,
return n;
static void js_fcvt(char (*buf)[JS_DTOA_BUF_SIZE], double d, int n_digits)
static size_t js_fcvt(char (*buf)[JS_DTOA_BUF_SIZE], double d, int n_digits)
int rounding_mode;
rounding_mode = FE_TONEAREST;
@ -11185,7 +11141,7 @@ static void js_fcvt(char (*buf)[JS_DTOA_BUF_SIZE], double d, int n_digits)
js_fcvt1(buf, d, n_digits, rounding_mode);
return js_fcvt1(buf, d, n_digits, rounding_mode);
/* radix != 10 is only supported with flags = JS_DTOA_VAR_FORMAT */
@ -11201,35 +11157,37 @@ static void js_fcvt(char (*buf)[JS_DTOA_BUF_SIZE], double d, int n_digits)
/* XXX: slow and maybe not fully correct. Use libbf when it is fast enough.
XXX: radix != 10 is only supported for small integers
static void js_dtoa1(char (*buf)[JS_DTOA_BUF_SIZE], double d,
int radix, int n_digits, int flags)
static size_t js_dtoa1(char (*buf)[JS_DTOA_BUF_SIZE], double d,
int radix, int n_digits, int flags)
char *q;
if (!isfinite(d)) {
if (isnan(d)) {
pstrcpy(*buf, sizeof(*buf), "NaN");
memcpy(*buf, "NaN", sizeof "NaN");
return sizeof("NaN") - 1;
} else if (d < 0) {
pstrcpy(*buf, sizeof(*buf), "-Infinity");
memcpy(*buf, "-Infinity", sizeof "-Infinity");
return sizeof("-Infinity") - 1;
} else {
pstrcpy(*buf, sizeof(*buf), "Infinity");
memcpy(*buf, "Infinity", sizeof "Infinity");
return sizeof("Infinity") - 1;
} else if (flags == JS_DTOA_VAR_FORMAT) {
int64_t i64;
char buf1[70], *ptr;
char buf1[72], *ptr;
if (d > (double)MAX_SAFE_INTEGER || d < (double)-MAX_SAFE_INTEGER)
goto generic_conv;
i64 = (int64_t)d;
if (d != i64)
goto generic_conv;
/* fast path for integers */
ptr = i64toa(buf1 + sizeof(buf1), i64, radix);
pstrcpy(*buf, sizeof(*buf), ptr);
return i64toa_radix(*buf, i64, radix);
} else {
if (d == 0.0)
d = 0.0; /* convert -0 to 0 */
if (flags == JS_DTOA_FRAC_FORMAT) {
js_fcvt(buf, d, n_digits);
return js_fcvt(buf, d, n_digits);
} else {
char buf1[JS_DTOA_BUF_SIZE];
int sign, decpt, k, n, i, p, n_max;
@ -11286,8 +11244,9 @@ static void js_dtoa1(char (*buf)[JS_DTOA_BUF_SIZE], double d,
p = n - 1;
if (p >= 0)
*q++ = '+';
snprintf(q, *buf + sizeof(*buf) - q, "%d", p);
q += snprintf(q, *buf + sizeof(*buf) - q, "%d", p);
return q - *buf;
@ -11296,8 +11255,8 @@ static JSValue js_dtoa(JSContext *ctx,
double d, int radix, int n_digits, int flags)
char buf[JS_DTOA_BUF_SIZE];
js_dtoa1(&buf, d, radix, n_digits, flags);
return JS_NewString(ctx, buf);
size_t len = js_dtoa1(&buf, d, radix, n_digits, flags);
return JS_NewStringLen(ctx, buf, len);
static JSValue js_dtoa_radix(JSContext *ctx, double d, int radix)
@ -11318,9 +11277,9 @@ static JSValue js_dtoa_radix(JSContext *ctx, double d, int radix)
while (n >= radix) {
digit = n % radix;
n = n / radix;
*--ptr = digits[digit];
*--ptr = digits36[digit];
*--ptr = digits[(int)n];
*--ptr = digits36[(size_t)n];
} else {
/* no decimals */
while (d0 >= radix) {
@ -11328,9 +11287,9 @@ static JSValue js_dtoa_radix(JSContext *ctx, double d, int radix)
d0 = trunc(d0 / radix);
digit = 0;
*--ptr = digits[digit];
*--ptr = digits36[digit];
*--ptr = digits[(int)d0];
*--ptr = digits36[(size_t)d0];
goto done;
if (frac != 0) {
@ -11342,13 +11301,13 @@ static JSValue js_dtoa_radix(JSContext *ctx, double d, int radix)
frac *= radix;
digit = trunc(frac);
frac -= digit;
*ptr2++ = digits[digit];
*ptr2++ = digits36[digit];
n0 = n0 * radix + digit;
prec -= log2_radix;
*ptr2 = '\0';
if (frac * radix >= radix / 2) {
char nine = digits[radix - 1];
char nine = digits36[radix - 1];
// round to closest
while (ptr2[-1] == nine)
*--ptr2 = '\0';
@ -11379,13 +11338,14 @@ JSValue JS_ToStringInternal(JSContext *ctx, JSValue val, BOOL is_ToPropertyKey)
uint32_t tag;
const char *str;
char buf[32];
size_t len;
switch(tag) {
return js_dup(val);
case JS_TAG_INT:
snprintf(buf, sizeof(buf), "%d", JS_VALUE_GET_INT(val));
len = i32toa(buf, JS_VALUE_GET_INT(val));
str = buf;
goto new_string;
@ -11410,6 +11370,7 @@ JSValue JS_ToStringInternal(JSContext *ctx, JSValue val, BOOL is_ToPropertyKey)
str = "[function bytecode]";
len = sizeof("[function bytecode]") - 1;
goto new_string;
if (is_ToPropertyKey) {
@ -11424,8 +11385,9 @@ JSValue JS_ToStringInternal(JSContext *ctx, JSValue val, BOOL is_ToPropertyKey)
return js_bigint_to_string(ctx, val);
str = "[unsupported type]";
len = sizeof("[unsupported type]") - 1;
return JS_NewString(ctx, str);
return JS_NewStringLen(ctx, str, len);
@ -11510,7 +11472,7 @@ static JSValue JS_ToQuotedString(JSContext *ctx, JSValue val1)
if (c < 32 || is_surrogate(c)) {
snprintf(buf, sizeof(buf), "\\u%04x", c);
if (string_buffer_puts8(b, buf))
if (string_buffer_write8(b, (uint8_t*)buf, 6))
goto fail;
} else {
if (string_buffer_putc(b, c))
@ -39011,6 +38973,7 @@ static int js_get_radix(JSContext *ctx, JSValue val)
static JSValue js_number_toString(JSContext *ctx, JSValue this_val,
int argc, JSValue *argv, int magic)
char buf[72];
JSValue val;
int base;
double d;
@ -39019,6 +38982,10 @@ static JSValue js_number_toString(JSContext *ctx, JSValue this_val,
if (JS_IsException(val))
return val;
if (magic || JS_IsUndefined(argv[0])) {
if (JS_VALUE_GET_TAG(val) == JS_TAG_INT) {
size_t len = i32toa(buf, JS_VALUE_GET_INT(val));
return JS_NewStringLen(ctx, buf, len);
base = 10;
} else {
base = js_get_radix(ctx, argv[0]);
@ -39026,9 +38993,8 @@ static JSValue js_number_toString(JSContext *ctx, JSValue this_val,
goto fail;
if (JS_VALUE_GET_TAG(val) == JS_TAG_INT) {
char buf1[70], *ptr;
ptr = i64toa(buf1 + sizeof(buf1), JS_VALUE_GET_INT(val), base);
return JS_NewString(ctx, ptr);
size_t len = i64toa_radix(buf, JS_VALUE_GET_INT(val), base);
return JS_NewStringLen(ctx, buf, len);
if (JS_ToFloat64Free(ctx, &d, val))
@ -47742,10 +47708,6 @@ static BOOL string_get_milliseconds(const uint8_t *sp, int *pp, int *pval) {
return TRUE;
static uint8_t upper_ascii(uint8_t c) {
return c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c;
static BOOL string_get_tzoffset(const uint8_t *sp, int *pp, int *tzp, BOOL strict) {
int tz = 0, sgn, hh, mm, p = *pp;
@ -47787,7 +47749,7 @@ static BOOL string_get_tzoffset(const uint8_t *sp, int *pp, int *tzp, BOOL stric
static BOOL string_match(const uint8_t *sp, int *pp, const char *s) {
int p = *pp;
while (*s != '\0') {
if (upper_ascii(sp[p]) != upper_ascii(*s++))
if (to_upper_ascii(sp[p]) != to_upper_ascii(*s++))
return FALSE;
@ -47800,7 +47762,7 @@ static int find_abbrev(const uint8_t *sp, int p, const char *list, int count) {
for (n = 0; n < count; n++) {
for (i = 0;; i++) {
if (upper_ascii(sp[p + i]) != upper_ascii(list[n * 3 + i]))
if (to_upper_ascii(sp[p + i]) != to_upper_ascii(list[n * 3 + i]))
if (i == 2)
return n;
@ -876,9 +876,23 @@ function int_to_string(n)
var s, r, j;
r = 0;
for(j = 0; j < n; j++) {
s = (j + 1).toString();
s = (j % 10) + '';
s = (j % 100) + '';
s = (j) + '';
return n;
return n * 3;
function int_toString(n)
var s, r, j;
r = 0;
for(j = 0; j < n; j++) {
s = (j % 10).toString();
s = (j % 100).toString();
s = (j).toString();
return n * 3;
function float_to_string(n)
@ -886,9 +900,23 @@ function float_to_string(n)
var s, r, j;
r = 0;
for(j = 0; j < n; j++) {
s = (j + 0.1).toString();
s = (j % 10 + 0.1) + '';
s = (j + 0.1) + '';
s = (j * 12345678 + 0.1) + '';
return n;
return n * 3;
function float_toString(n)
var s, r, j;
r = 0;
for(j = 0; j < n; j++) {
s = (j % 10 + 0.1).toString();
s = (j + 0.1).toString();
s = (j * 12345678 + 0.1).toString();
return n * 3;
function string_to_int(n)
@ -983,7 +1011,9 @@ function main(argc, argv, g)
Normal file
Normal file
File diff suppressed because it is too large
Load diff
Add table
Reference in a new issue