From 708dbcbf5b7bf0e59018d4d534e12d2e198eab9c Mon Sep 17 00:00:00 2001 From: Charlie Gordon Date: Sat, 2 Mar 2024 18:38:29 +0100 Subject: [PATCH] Fix big endian serialization (#269) * Fix big endian serialization Big endian serialization was broken because: - it partially relied on `WORDS_ENDIAN` (unconditionally undef'd in cutils.h) - endianness was not handled at all in the bc reader. - `bc_tag_str` was missing the `"RegExp"` string - `lre_byte_swap()` was broken for `REOP_range` and `REOP_range32` Modifications: - remove `WORDS_ENDIAN` - use `bc_put_u32()` / `bc_put_u64()` in `JS_WriteBigInt()` - use `bc_get_u32()` / `bc_get_u64()` in `JS_ReadBigInt()` - handle host endianness in `bc_get_u16()`, `bc_get_u32()`, `bc_get_u64()` and `JS_ReadFunctionBytecode()` - handle optional littleEndian argument as specified in `js_dataview_getValue()` and `js_dataview_setValue()` - fix `bc_tag_str` and `lre_byte_swap()` --- cutils.h | 3 --- libregexp.c | 13 ++++++++--- quickjs.c | 62 ++++++++++++++++++++++++++--------------------------- 3 files changed, 40 insertions(+), 38 deletions(-) diff --git a/cutils.h b/cutils.h index 60ee650..13f75cd 100644 --- a/cutils.h +++ b/cutils.h @@ -29,9 +29,6 @@ #include #include -/* set if CPU is big endian */ -#undef WORDS_BIGENDIAN - #if defined(_MSC_VER) #include #include diff --git a/libregexp.c b/libregexp.c index 64213fc..18272e0 100644 --- a/libregexp.c +++ b/libregexp.c @@ -2598,7 +2598,7 @@ const char *lre_get_groupnames(const uint8_t *bc_buf) void lre_byte_swap(uint8_t *buf, size_t len, BOOL is_byte_swapped) { uint8_t *p, *pe; - uint32_t n, r; + uint32_t n, r, nw; p = buf; if (len < RE_HEADER_LEN) @@ -2633,16 +2633,23 @@ void lre_byte_swap(uint8_t *buf, size_t len, BOOL is_byte_swapped) case REOP_save_reset: // has two 8 bit arguments break; case REOP_range32: // variable length - for (r = 3 + 4 * get_u16(&p[1]); n < r; n += 4) + nw = get_u16(&p[1]); // number of pairs of uint32_t + if (is_byte_swapped) + n = bswap16(n); + for (r = 3 + 8 * nw; n < r; n += 4) inplace_bswap32(&p[n]); goto doswap16; case REOP_range: // variable length - for (r = 3 + 2 * get_u16(&p[1]); n < r; n += 2) + nw = get_u16(&p[1]); // number of pairs of uint16_t + if (is_byte_swapped) + n = bswap16(n); + for (r = 3 + 4 * nw; n < r; n += 2) inplace_bswap16(&p[n]); goto doswap16; default: doswap16: inplace_bswap16(&p[1]); + break; } break; case 5: diff --git a/quickjs.c b/quickjs.c index bd80e3c..c6efc21 100644 --- a/quickjs.c +++ b/quickjs.c @@ -32325,6 +32325,7 @@ static const char * const bc_tag_str[] = { "TypedArray", "ArrayBuffer", "SharedArrayBuffer", + "RegExp", "Date", "ObjectValue", "ObjectReference", @@ -32613,20 +32614,14 @@ static int JS_WriteBigInt(BCWriterState *s, JSValue obj) bc_put_leb128(s, len); /* always saved in byte based little endian representation */ for(j = 0; j < n1; j++) { - dbuf_putc(&s->dbuf, v >> (j * 8)); + bc_put_u8(s, v >> (j * 8)); } for(; i < a->len; i++) { limb_t v = a->tab[i]; #if LIMB_BITS == 32 -#ifdef WORDS_BIGENDIAN - v = bswap32(v); -#endif - dbuf_put_u32(&s->dbuf, v); + bc_put_u32(s, v); #else -#ifdef WORDS_BIGENDIAN - v = bswap64(v); -#endif - dbuf_put_u64(&s->dbuf, v); + bc_put_u64(s, v); #endif } } @@ -33218,33 +33213,45 @@ static int bc_get_u8(BCReaderState *s, uint8_t *pval) static int bc_get_u16(BCReaderState *s, uint16_t *pval) { + uint16_t v; if (unlikely(s->buf_end - s->ptr < 2)) { *pval = 0; /* avoid warning */ return bc_read_error_end(s); } - *pval = get_u16(s->ptr); + v = get_u16(s->ptr); + if (is_be()) + v = bswap16(v); + *pval = v; s->ptr += 2; return 0; } static __maybe_unused int bc_get_u32(BCReaderState *s, uint32_t *pval) { + uint32_t v; if (unlikely(s->buf_end - s->ptr < 4)) { *pval = 0; /* avoid warning */ return bc_read_error_end(s); } - *pval = get_u32(s->ptr); + v = get_u32(s->ptr); + if (is_be()) + v = bswap32(v); + *pval = v; s->ptr += 4; return 0; } static int bc_get_u64(BCReaderState *s, uint64_t *pval) { + uint64_t v; if (unlikely(s->buf_end - s->ptr < 8)) { *pval = 0; /* avoid warning */ return bc_read_error_end(s); } - *pval = get_u64(s->ptr); + v = get_u64(s->ptr); + if (is_be()) + v = bswap64(v); + *pval = v; s->ptr += 8; return 0; } @@ -33387,6 +33394,9 @@ static int JS_ReadFunctionBytecode(BCReaderState *s, JSFunctionBytecode *b, return -1; b->byte_code_buf = bc_buf; + if (is_be()) + bc_byte_swap(bc_buf, bc_len); + pos = 0; while (pos < bc_len) { op = bc_buf[pos]; @@ -33481,15 +33491,9 @@ static JSValue JS_ReadBigInt(BCReaderState *s) #if LIMB_BITS == 32 if (bc_get_u32(s, &v)) goto fail; -#ifdef WORDS_BIGENDIAN - v = bswap32(v); -#endif #else if (bc_get_u64(s, &v)) goto fail; -#ifdef WORDS_BIGENDIAN - v = bswap64(v); -#endif #endif a->tab[i] = v; } @@ -50561,7 +50565,8 @@ static JSValue js_dataview_getValue(JSContext *ctx, { JSTypedArray *ta; JSArrayBuffer *abuf; - int is_swap, size; + BOOL littleEndian, is_swap; + int size; uint8_t *ptr; uint32_t v; uint64_t pos; @@ -50572,12 +50577,8 @@ static JSValue js_dataview_getValue(JSContext *ctx, size = 1 << typed_array_size_log2(class_id); if (JS_ToIndex(ctx, &pos, argv[0])) return JS_EXCEPTION; - is_swap = FALSE; - if (argc > 1) - is_swap = JS_ToBool(ctx, argv[1]); -#ifndef WORDS_BIGENDIAN - is_swap ^= 1; -#endif + littleEndian = argc > 1 && JS_ToBool(ctx, argv[1]); + is_swap = littleEndian ^ !is_be(); abuf = ta->buffer->u.array_buffer; if (abuf->detached) return JS_ThrowTypeErrorDetachedArrayBuffer(ctx); @@ -50663,7 +50664,8 @@ static JSValue js_dataview_setValue(JSContext *ctx, { JSTypedArray *ta; JSArrayBuffer *abuf; - int is_swap, size; + BOOL littleEndian, is_swap; + int size; uint8_t *ptr; uint64_t v64; uint32_t v; @@ -50703,12 +50705,8 @@ static JSValue js_dataview_setValue(JSContext *ctx, v64 = u.u64; } } - is_swap = FALSE; - if (argc > 2) - is_swap = JS_ToBool(ctx, argv[2]); -#ifndef WORDS_BIGENDIAN - is_swap ^= 1; -#endif + littleEndian = argc > 2 && JS_ToBool(ctx, argv[2]); + is_swap = littleEndian ^ !is_be(); abuf = ta->buffer->u.array_buffer; if (abuf->detached) return JS_ThrowTypeErrorDetachedArrayBuffer(ctx);