Implement RegExp serialization (#153)

JS_WriteObject() and JS_ReadObject() now support RegExp objects.
This commit is contained in:
Ben Noordhuis 2023-11-29 08:50:53 +01:00 committed by GitHub
parent a6e73ca73c
commit 5c3077e091
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 139 additions and 0 deletions

View file

@ -230,6 +230,14 @@ static inline uint64_t bswap64(uint64_t v)
((v & ((uint64_t)0xff << (0 * 8))) << (7 * 8)); ((v & ((uint64_t)0xff << (0 * 8))) << (7 * 8));
} }
static inline void inplace_bswap16(uint8_t *tab) {
put_u16(tab, bswap16(get_u16(tab)));
}
static inline void inplace_bswap32(uint8_t *tab) {
put_u32(tab, bswap32(get_u32(tab)));
}
/* XXX: should take an extra argument to pass slack information to the caller */ /* XXX: should take an extra argument to pass slack information to the caller */
typedef void *DynBufReallocFunc(void *opaque, void *ptr, size_t size); typedef void *DynBufReallocFunc(void *opaque, void *ptr, size_t size);

View file

@ -2557,6 +2557,71 @@ const char *lre_get_groupnames(const uint8_t *bc_buf)
return (const char *)(bc_buf + 7 + re_bytecode_len); return (const char *)(bc_buf + 7 + re_bytecode_len);
} }
void lre_byte_swap(uint8_t *buf, size_t len, BOOL is_byte_swapped)
{
uint8_t *p, *pe;
uint32_t n, r;
p = buf;
if (len < RE_HEADER_LEN)
abort();
// format is:
// <header>
// <bytecode>
// <capture group name 1>
// <capture group name 2>
// etc.
n = get_u32(&p[3]); // bytecode size
inplace_bswap32(&p[3]);
if (is_byte_swapped)
n = bswap32(n);
if (n > len - RE_HEADER_LEN)
abort();
p = &buf[RE_HEADER_LEN];
pe = &p[n];
while (p < pe) {
n = reopcode_info[*p].size;
switch (n) {
case 1:
case 2:
break;
case 3:
switch (*p) {
case REOP_save_reset: // has two 8 bit arguments
break;
case REOP_range32: // variable length
for (r = 3 + 4 * get_u16(&p[1]); n < r; n += 4)
inplace_bswap32(&p[n]);
goto doswap16;
case REOP_range: // variable length
for (r = 3 + 2 * get_u16(&p[1]); n < r; n += 2)
inplace_bswap16(&p[n]);
goto doswap16;
default:
doswap16:
inplace_bswap16(&p[1]);
}
break;
case 5:
inplace_bswap32(&p[1]);
break;
case 17:
assert(*p == REOP_simple_greedy_quant);
inplace_bswap32(&p[1]);
inplace_bswap32(&p[5]);
inplace_bswap32(&p[9]);
inplace_bswap32(&p[13]);
break;
default:
abort();
}
p = &p[n];
}
}
#ifdef TEST #ifdef TEST
BOOL lre_check_stack_overflow(void *opaque, size_t alloca_size) BOOL lre_check_stack_overflow(void *opaque, size_t alloca_size)

View file

@ -53,6 +53,8 @@ int lre_exec(uint8_t **capture,
int lre_parse_escape(const uint8_t **pp, int allow_utf16); int lre_parse_escape(const uint8_t **pp, int allow_utf16);
LRE_BOOL lre_is_space(int c); LRE_BOOL lre_is_space(int c);
void lre_byte_swap(uint8_t *buf, size_t len, BOOL is_byte_swapped);
/* must be provided by the user */ /* must be provided by the user */
LRE_BOOL lre_check_stack_overflow(void *opaque, size_t alloca_size); LRE_BOOL lre_check_stack_overflow(void *opaque, size_t alloca_size);
void *lre_realloc(void *opaque, void *ptr, size_t size); void *lre_realloc(void *opaque, void *ptr, size_t size);

View file

@ -31659,6 +31659,7 @@ typedef enum BCTagEnum {
BC_TAG_TYPED_ARRAY, BC_TAG_TYPED_ARRAY,
BC_TAG_ARRAY_BUFFER, BC_TAG_ARRAY_BUFFER,
BC_TAG_SHARED_ARRAY_BUFFER, BC_TAG_SHARED_ARRAY_BUFFER,
BC_TAG_REGEXP,
BC_TAG_DATE, BC_TAG_DATE,
BC_TAG_OBJECT_VALUE, BC_TAG_OBJECT_VALUE,
BC_TAG_OBJECT_REFERENCE, BC_TAG_OBJECT_REFERENCE,
@ -32272,6 +32273,24 @@ static int JS_WriteSharedArrayBuffer(BCWriterState *s, JSValueConst obj)
return 0; return 0;
} }
static int JS_WriteRegExp(BCWriterState *s, JSRegExp regexp)
{
JSString *bc = regexp.bytecode;
assert(!bc->is_wide_char);
JS_WriteString(s, regexp.pattern);
if (is_be())
lre_byte_swap(bc->u.str8, bc->len, /*is_byte_swapped*/FALSE);
JS_WriteString(s, bc);
if (is_be())
lre_byte_swap(bc->u.str8, bc->len, /*is_byte_swapped*/TRUE);
return 0;
}
static int JS_WriteObjectRec(BCWriterState *s, JSValueConst obj) static int JS_WriteObjectRec(BCWriterState *s, JSValueConst obj)
{ {
uint32_t tag; uint32_t tag;
@ -32360,6 +32379,10 @@ static int JS_WriteObjectRec(BCWriterState *s, JSValueConst obj)
goto invalid_tag; goto invalid_tag;
ret = JS_WriteSharedArrayBuffer(s, obj); ret = JS_WriteSharedArrayBuffer(s, obj);
break; break;
case JS_CLASS_REGEXP:
bc_put_u8(s, BC_TAG_REGEXP);
ret = JS_WriteRegExp(s, p->u.regexp);
break;
case JS_CLASS_DATE: case JS_CLASS_DATE:
bc_put_u8(s, BC_TAG_DATE); bc_put_u8(s, BC_TAG_DATE);
ret = JS_WriteObjectRec(s, p->u.object_data); ret = JS_WriteObjectRec(s, p->u.object_data);
@ -33357,6 +33380,31 @@ static JSValue JS_ReadSharedArrayBuffer(BCReaderState *s)
return JS_EXCEPTION; return JS_EXCEPTION;
} }
static JSValue JS_ReadRegExp(BCReaderState *s)
{
JSContext *ctx = s->ctx;
JSString *pattern;
JSString *bc;
pattern = JS_ReadString(s);
if (!pattern)
return JS_EXCEPTION;
bc = JS_ReadString(s);
if (!bc) {
js_free_string(ctx->rt, pattern);
return JS_EXCEPTION;
}
assert(!bc->is_wide_char);
if (is_be())
lre_byte_swap(bc->u.str8, bc->len, /*is_byte_swapped*/TRUE);
return js_regexp_constructor_internal(ctx, JS_UNDEFINED,
JS_MKPTR(JS_TAG_STRING, pattern),
JS_MKPTR(JS_TAG_STRING, bc));
}
static JSValue JS_ReadDate(BCReaderState *s) static JSValue JS_ReadDate(BCReaderState *s)
{ {
JSContext *ctx = s->ctx; JSContext *ctx = s->ctx;
@ -33484,6 +33532,9 @@ static JSValue JS_ReadObjectRec(BCReaderState *s)
goto invalid_tag; goto invalid_tag;
obj = JS_ReadSharedArrayBuffer(s); obj = JS_ReadSharedArrayBuffer(s);
break; break;
case BC_TAG_REGEXP:
obj = JS_ReadRegExp(s);
break;
case BC_TAG_DATE: case BC_TAG_DATE:
obj = JS_ReadDate(s); obj = JS_ReadDate(s);
break; break;

View file

@ -143,6 +143,18 @@ function bjson_test_reference()
} }
} }
function bjson_test_regexp()
{
var buf, r;
bjson_test(/xyzzy/);
bjson_test(/xyzzy/digu);
buf = bjson.write(/(?<𝓓𝓸𝓰>dog)/);
r = bjson.read(buf, 0, buf.byteLength);
assert("sup dog".match(r).groups["𝓓𝓸𝓰"], "dog");
}
function bjson_test_all() function bjson_test_all()
{ {
var obj; var obj;
@ -171,6 +183,7 @@ function bjson_test_all()
} }
bjson_test_reference(); bjson_test_reference();
bjson_test_regexp();
} }
bjson_test_all(); bjson_test_all();