From 5c3077e0912128380e91bb680362eec65b4b2f79 Mon Sep 17 00:00:00 2001 From: Ben Noordhuis Date: Wed, 29 Nov 2023 08:50:53 +0100 Subject: [PATCH] Implement RegExp serialization (#153) JS_WriteObject() and JS_ReadObject() now support RegExp objects. --- cutils.h | 8 ++++++ libregexp.c | 65 +++++++++++++++++++++++++++++++++++++++++++++ libregexp.h | 2 ++ quickjs.c | 51 +++++++++++++++++++++++++++++++++++ tests/test_bjson.js | 13 +++++++++ 5 files changed, 139 insertions(+) diff --git a/cutils.h b/cutils.h index 4435e38..1231500 100644 --- a/cutils.h +++ b/cutils.h @@ -230,6 +230,14 @@ static inline uint64_t bswap64(uint64_t v) ((v & ((uint64_t)0xff << (0 * 8))) << (7 * 8)); } +static inline void inplace_bswap16(uint8_t *tab) { + put_u16(tab, bswap16(get_u16(tab))); +} + +static inline void inplace_bswap32(uint8_t *tab) { + put_u32(tab, bswap32(get_u32(tab))); +} + /* XXX: should take an extra argument to pass slack information to the caller */ typedef void *DynBufReallocFunc(void *opaque, void *ptr, size_t size); diff --git a/libregexp.c b/libregexp.c index 20b8139..d4df2f8 100644 --- a/libregexp.c +++ b/libregexp.c @@ -2557,6 +2557,71 @@ const char *lre_get_groupnames(const uint8_t *bc_buf) return (const char *)(bc_buf + 7 + re_bytecode_len); } +void lre_byte_swap(uint8_t *buf, size_t len, BOOL is_byte_swapped) +{ + uint8_t *p, *pe; + uint32_t n, r; + + p = buf; + if (len < RE_HEADER_LEN) + abort(); + + // format is: + //
+ // + // + // + // etc. + n = get_u32(&p[3]); // bytecode size + inplace_bswap32(&p[3]); + if (is_byte_swapped) + n = bswap32(n); + if (n > len - RE_HEADER_LEN) + abort(); + + p = &buf[RE_HEADER_LEN]; + pe = &p[n]; + + while (p < pe) { + n = reopcode_info[*p].size; + switch (n) { + case 1: + case 2: + break; + case 3: + switch (*p) { + case REOP_save_reset: // has two 8 bit arguments + break; + case REOP_range32: // variable length + for (r = 3 + 4 * get_u16(&p[1]); n < r; n += 4) + inplace_bswap32(&p[n]); + goto doswap16; + case REOP_range: // variable length + for (r = 3 + 2 * get_u16(&p[1]); n < r; n += 2) + inplace_bswap16(&p[n]); + goto doswap16; + default: + doswap16: + inplace_bswap16(&p[1]); + } + break; + case 5: + inplace_bswap32(&p[1]); + break; + case 17: + assert(*p == REOP_simple_greedy_quant); + inplace_bswap32(&p[1]); + inplace_bswap32(&p[5]); + inplace_bswap32(&p[9]); + inplace_bswap32(&p[13]); + break; + default: + abort(); + } + p = &p[n]; + } +} + #ifdef TEST BOOL lre_check_stack_overflow(void *opaque, size_t alloca_size) diff --git a/libregexp.h b/libregexp.h index 97eac5c..46e0004 100644 --- a/libregexp.h +++ b/libregexp.h @@ -53,6 +53,8 @@ int lre_exec(uint8_t **capture, int lre_parse_escape(const uint8_t **pp, int allow_utf16); LRE_BOOL lre_is_space(int c); +void lre_byte_swap(uint8_t *buf, size_t len, BOOL is_byte_swapped); + /* must be provided by the user */ LRE_BOOL lre_check_stack_overflow(void *opaque, size_t alloca_size); void *lre_realloc(void *opaque, void *ptr, size_t size); diff --git a/quickjs.c b/quickjs.c index c139360..3fb63e6 100644 --- a/quickjs.c +++ b/quickjs.c @@ -31659,6 +31659,7 @@ typedef enum BCTagEnum { BC_TAG_TYPED_ARRAY, BC_TAG_ARRAY_BUFFER, BC_TAG_SHARED_ARRAY_BUFFER, + BC_TAG_REGEXP, BC_TAG_DATE, BC_TAG_OBJECT_VALUE, BC_TAG_OBJECT_REFERENCE, @@ -32272,6 +32273,24 @@ static int JS_WriteSharedArrayBuffer(BCWriterState *s, JSValueConst obj) return 0; } +static int JS_WriteRegExp(BCWriterState *s, JSRegExp regexp) +{ + JSString *bc = regexp.bytecode; + assert(!bc->is_wide_char); + + JS_WriteString(s, regexp.pattern); + + if (is_be()) + lre_byte_swap(bc->u.str8, bc->len, /*is_byte_swapped*/FALSE); + + JS_WriteString(s, bc); + + if (is_be()) + lre_byte_swap(bc->u.str8, bc->len, /*is_byte_swapped*/TRUE); + + return 0; +} + static int JS_WriteObjectRec(BCWriterState *s, JSValueConst obj) { uint32_t tag; @@ -32360,6 +32379,10 @@ static int JS_WriteObjectRec(BCWriterState *s, JSValueConst obj) goto invalid_tag; ret = JS_WriteSharedArrayBuffer(s, obj); break; + case JS_CLASS_REGEXP: + bc_put_u8(s, BC_TAG_REGEXP); + ret = JS_WriteRegExp(s, p->u.regexp); + break; case JS_CLASS_DATE: bc_put_u8(s, BC_TAG_DATE); ret = JS_WriteObjectRec(s, p->u.object_data); @@ -33357,6 +33380,31 @@ static JSValue JS_ReadSharedArrayBuffer(BCReaderState *s) return JS_EXCEPTION; } +static JSValue JS_ReadRegExp(BCReaderState *s) +{ + JSContext *ctx = s->ctx; + JSString *pattern; + JSString *bc; + + pattern = JS_ReadString(s); + if (!pattern) + return JS_EXCEPTION; + + bc = JS_ReadString(s); + if (!bc) { + js_free_string(ctx->rt, pattern); + return JS_EXCEPTION; + } + + assert(!bc->is_wide_char); + if (is_be()) + lre_byte_swap(bc->u.str8, bc->len, /*is_byte_swapped*/TRUE); + + return js_regexp_constructor_internal(ctx, JS_UNDEFINED, + JS_MKPTR(JS_TAG_STRING, pattern), + JS_MKPTR(JS_TAG_STRING, bc)); +} + static JSValue JS_ReadDate(BCReaderState *s) { JSContext *ctx = s->ctx; @@ -33484,6 +33532,9 @@ static JSValue JS_ReadObjectRec(BCReaderState *s) goto invalid_tag; obj = JS_ReadSharedArrayBuffer(s); break; + case BC_TAG_REGEXP: + obj = JS_ReadRegExp(s); + break; case BC_TAG_DATE: obj = JS_ReadDate(s); break; diff --git a/tests/test_bjson.js b/tests/test_bjson.js index 9b2f44f..48097d0 100644 --- a/tests/test_bjson.js +++ b/tests/test_bjson.js @@ -143,6 +143,18 @@ function bjson_test_reference() } } +function bjson_test_regexp() +{ + var buf, r; + + bjson_test(/xyzzy/); + bjson_test(/xyzzy/digu); + + buf = bjson.write(/(?<𝓓𝓸𝓰>dog)/); + r = bjson.read(buf, 0, buf.byteLength); + assert("sup dog".match(r).groups["𝓓𝓸𝓰"], "dog"); +} + function bjson_test_all() { var obj; @@ -171,6 +183,7 @@ function bjson_test_all() } bjson_test_reference(); + bjson_test_regexp(); } bjson_test_all();