diff --git a/libregexp.c b/libregexp.c index c04765f..3e6fcaa 100644 --- a/libregexp.c +++ b/libregexp.c @@ -2582,25 +2582,26 @@ void *lre_realloc(void *opaque, void *ptr, size_t size) int main(int argc, char **argv) { - int len, ret, i; + int len, flags, ret, i; uint8_t *bc; char error_msg[64]; uint8_t *capture[CAPTURE_COUNT_MAX * 2]; const char *input; int input_len, capture_count; - if (argc < 3) { - printf("usage: %s regexp input\n", argv[0]); + if (argc < 4) { + printf("usage: %s regexp flags input\n", argv[0]); exit(1); } + flags = atoi(argv[2]); bc = lre_compile(&len, error_msg, sizeof(error_msg), argv[1], - strlen(argv[1]), 0, NULL); + strlen(argv[1]), flags, NULL); if (!bc) { fprintf(stderr, "error: %s\n", error_msg); exit(1); } - input = argv[2]; + input = argv[3]; input_len = strlen(input); ret = lre_exec(capture, bc, (uint8_t *)input, 0, input_len, 0, NULL); diff --git a/libregexp.h b/libregexp.h index cbf339e..7c03b1a 100644 --- a/libregexp.h +++ b/libregexp.h @@ -36,6 +36,7 @@ #define LRE_FLAG_DOTALL (1 << 3) #define LRE_FLAG_UTF16 (1 << 4) #define LRE_FLAG_STICKY (1 << 5) +#define LRE_FLAG_INDICES (1 << 6) /* Unused by libregexp, just recorded. */ #define LRE_FLAG_NAMED_GROUPS (1 << 7) /* named groups are present in the regexp */ diff --git a/quickjs-atom.h b/quickjs-atom.h index 5a0cf63..72db41f 100644 --- a/quickjs-atom.h +++ b/quickjs-atom.h @@ -166,6 +166,7 @@ DEF(revoke, "revoke") DEF(async, "async") DEF(exec, "exec") DEF(groups, "groups") +DEF(indices, "indices") DEF(status, "status") DEF(reason, "reason") DEF(globalThis, "globalThis") diff --git a/quickjs.c b/quickjs.c index 660b03d..5cdee41 100644 --- a/quickjs.c +++ b/quickjs.c @@ -1151,6 +1151,11 @@ static const JSClassExoticMethods js_proxy_exotic_methods; static const JSClassExoticMethods js_module_ns_exotic_methods; static JSClassID js_class_id_alloc = JS_CLASS_INIT_COUNT; +static JSValue js_int32(int32_t v) +{ + return JS_MKVAL(JS_TAG_INT, v); +} + static void js_trigger_gc(JSRuntime *rt, size_t size) { BOOL force_gc; @@ -39831,6 +39836,9 @@ static JSValue js_compile_regexp(JSContext *ctx, JSValueConst pattern, /* XXX: re_flags = LRE_FLAG_OCTAL unless strict mode? */ for (i = 0; i < len; i++) { switch(str[i]) { + case 'd': + mask = LRE_FLAG_INDICES; + break; case 'g': mask = LRE_FLAG_GLOBAL; break; @@ -40153,6 +40161,11 @@ static JSValue js_regexp_get_flags(JSContext *ctx, JSValueConst this_val) if (JS_VALUE_GET_TAG(this_val) != JS_TAG_OBJECT) return JS_ThrowTypeErrorNotAnObject(ctx); + res = JS_ToBoolFree(ctx, JS_GetPropertyStr(ctx, this_val, "hasIndices")); + if (res < 0) + goto exception; + if (res) + *p++ = 'd'; res = JS_ToBoolFree(ctx, JS_GetProperty(ctx, this_val, JS_ATOM_global)); if (res < 0) goto exception; @@ -40232,25 +40245,32 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val, { JSRegExp *re = js_get_regexp(ctx, this_val, TRUE); JSString *str; - JSValue str_val, obj, val, groups = JS_UNDEFINED; + JSValue t, ret, str_val, obj, val, groups; + JSValue indices, indices_groups; uint8_t *re_bytecode; - int ret; uint8_t **capture, *str_buf; - int capture_count, shift, i, re_flags; + int rc, capture_count, shift, i, re_flags; int64_t last_index; const char *group_name_ptr; if (!re) return JS_EXCEPTION; + str_val = JS_ToString(ctx, argv[0]); if (JS_IsException(str_val)) - return str_val; - val = JS_GetProperty(ctx, this_val, JS_ATOM_lastIndex); - if (JS_IsException(val) || - JS_ToLengthFree(ctx, &last_index, val)) { - JS_FreeValue(ctx, str_val); return JS_EXCEPTION; - } + + ret = JS_EXCEPTION; + obj = JS_NULL; + groups = JS_UNDEFINED; + indices = JS_UNDEFINED; + indices_groups = JS_UNDEFINED; + capture = NULL; + + val = JS_GetProperty(ctx, this_val, JS_ATOM_lastIndex); + if (JS_IsException(val) || JS_ToLengthFree(ctx, &last_index, val)) + goto fail; + re_bytecode = re->bytecode->u.str8; re_flags = lre_get_flags(re_bytecode); if ((re_flags & (LRE_FLAG_GLOBAL | LRE_FLAG_STICKY)) == 0) { @@ -40258,27 +40278,23 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val, } str = JS_VALUE_GET_STRING(str_val); capture_count = lre_get_capture_count(re_bytecode); - capture = NULL; if (capture_count > 0) { capture = js_malloc(ctx, sizeof(capture[0]) * capture_count * 2); - if (!capture) { - JS_FreeValue(ctx, str_val); - return JS_EXCEPTION; - } + if (!capture) + goto fail; } shift = str->is_wide_char; str_buf = str->u.str8; if (last_index > str->len) { - ret = 2; + rc = 2; } else { - ret = lre_exec(capture, re_bytecode, - str_buf, last_index, str->len, - shift, ctx); + rc = lre_exec(capture, re_bytecode, + str_buf, last_index, str->len, + shift, ctx); } - obj = JS_NULL; - if (ret != 1) { - if (ret >= 0) { - if (ret == 2 || (re_flags & (LRE_FLAG_GLOBAL | LRE_FLAG_STICKY))) { + if (rc != 1) { + if (rc >= 0) { + if (rc == 2 || (re_flags & (LRE_FLAG_GLOBAL | LRE_FLAG_STICKY))) { if (JS_SetProperty(ctx, this_val, JS_ATOM_lastIndex, JS_NewInt32(ctx, 0)) < 0) goto fail; @@ -40287,7 +40303,6 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val, JS_ThrowInternalError(ctx, "out of memory in regexp execution"); goto fail; } - JS_FreeValue(ctx, str_val); } else { int prop_flags; if (re_flags & (LRE_FLAG_GLOBAL | LRE_FLAG_STICKY)) { @@ -40305,52 +40320,123 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val, if (JS_IsException(groups)) goto fail; } - - for(i = 0; i < capture_count; i++) { - int start, end; - JSValue val; - if (capture[2 * i] == NULL || - capture[2 * i + 1] == NULL) { - val = JS_UNDEFINED; - } else { - start = (capture[2 * i] - str_buf) >> shift; - end = (capture[2 * i + 1] - str_buf) >> shift; - val = js_sub_string(ctx, str, start, end); - if (JS_IsException(val)) + if (re_flags & LRE_FLAG_INDICES) { + indices = JS_NewArray(ctx); + if (JS_IsException(indices)) + goto fail; + if (group_name_ptr) { + indices_groups = JS_NewObjectProto(ctx, JS_NULL); + if (JS_IsException(indices_groups)) goto fail; } + } + + for(i = 0; i < capture_count; i++) { + const char *name = NULL; + uint8_t **match = &capture[2 * i]; + int start = -1; + int end = -1; + if (group_name_ptr && i > 0) { - if (*group_name_ptr) { - if (JS_DefinePropertyValueStr(ctx, groups, group_name_ptr, - JS_DupValue(ctx, val), - prop_flags) < 0) { + if (*group_name_ptr) name = group_name_ptr; + group_name_ptr += strlen(group_name_ptr) + 1; + } + + if (match[0] && match[1]) { + start = (match[0] - str_buf) >> shift; + end = (match[1] - str_buf) >> shift; + } + + if (!JS_IsUndefined(indices)) { + JSValue val = JS_UNDEFINED; + if (start != -1) { + val = JS_NewArray(ctx); + if (JS_IsException(val)) + goto fail; + if (JS_DefinePropertyValueUint32(ctx, val, 0, + js_int32(start), + prop_flags) < 0) { + JS_FreeValue(ctx, val); + goto fail; + } + if (JS_DefinePropertyValueUint32(ctx, val, 1, + js_int32(end), + prop_flags) < 0) { JS_FreeValue(ctx, val); goto fail; } } - group_name_ptr += strlen(group_name_ptr) + 1; + if (name && !JS_IsUndefined(indices_groups)) { + val = JS_DupValue(ctx, val); + if (JS_DefinePropertyValueStr(ctx, indices_groups, + name, val, prop_flags) < 0) { + JS_FreeValue(ctx, val); + goto fail; + } + } + if (JS_DefinePropertyValueUint32(ctx, indices, i, val, + prop_flags) < 0) { + goto fail; + } } + + JSValue val = JS_UNDEFINED; + if (start != -1) { + val = js_sub_string(ctx, str, start, end); + if (JS_IsException(val)) + goto fail; + } + + if (name) { + if (JS_DefinePropertyValueStr(ctx, groups, name, + JS_DupValue(ctx, val), + prop_flags) < 0) { + JS_FreeValue(ctx, val); + goto fail; + } + } + if (JS_DefinePropertyValueUint32(ctx, obj, i, val, prop_flags) < 0) goto fail; } + + t = groups, groups = JS_UNDEFINED; if (JS_DefinePropertyValue(ctx, obj, JS_ATOM_groups, - groups, prop_flags) < 0) + t, prop_flags) < 0) { goto fail; - if (JS_DefinePropertyValue(ctx, obj, JS_ATOM_index, - JS_NewInt32(ctx, (capture[0] - str_buf) >> shift), prop_flags) < 0) + } + + t = js_int32((capture[0] - str_buf) >> shift); + if (JS_DefinePropertyValue(ctx, obj, JS_ATOM_index, t, prop_flags) < 0) goto fail; - if (JS_DefinePropertyValue(ctx, obj, JS_ATOM_input, str_val, prop_flags) < 0) - goto fail1; + + t = str_val, str_val = JS_UNDEFINED; + if (JS_DefinePropertyValue(ctx, obj, JS_ATOM_input, t, prop_flags) < 0) + goto fail; + + if (!JS_IsUndefined(indices)) { + t = indices_groups, indices_groups = JS_UNDEFINED; + if (JS_DefinePropertyValue(ctx, indices, JS_ATOM_groups, + t, prop_flags) < 0) { + goto fail; + } + t = indices, indices = JS_UNDEFINED; + if (JS_DefinePropertyValue(ctx, obj, JS_ATOM_indices, + t, prop_flags) < 0) { + goto fail; + } + } } - js_free(ctx, capture); - return obj; + ret = obj; + obj = JS_UNDEFINED; fail: - JS_FreeValue(ctx, groups); + JS_FreeValue(ctx, indices_groups); + JS_FreeValue(ctx, indices); JS_FreeValue(ctx, str_val); -fail1: + JS_FreeValue(ctx, groups); JS_FreeValue(ctx, obj); js_free(ctx, capture); - return JS_EXCEPTION; + return ret; } /* delete portions of a string that match a given regex */ @@ -41185,12 +41271,13 @@ static const JSCFunctionListEntry js_regexp_funcs[] = { static const JSCFunctionListEntry js_regexp_proto_funcs[] = { JS_CGETSET_DEF("flags", js_regexp_get_flags, NULL ), JS_CGETSET_DEF("source", js_regexp_get_source, NULL ), - JS_CGETSET_MAGIC_DEF("global", js_regexp_get_flag, NULL, 1 ), - JS_CGETSET_MAGIC_DEF("ignoreCase", js_regexp_get_flag, NULL, 2 ), - JS_CGETSET_MAGIC_DEF("multiline", js_regexp_get_flag, NULL, 4 ), - JS_CGETSET_MAGIC_DEF("dotAll", js_regexp_get_flag, NULL, 8 ), - JS_CGETSET_MAGIC_DEF("unicode", js_regexp_get_flag, NULL, 16 ), - JS_CGETSET_MAGIC_DEF("sticky", js_regexp_get_flag, NULL, 32 ), + JS_CGETSET_MAGIC_DEF("global", js_regexp_get_flag, NULL, LRE_FLAG_GLOBAL ), + JS_CGETSET_MAGIC_DEF("ignoreCase", js_regexp_get_flag, NULL, LRE_FLAG_IGNORECASE ), + JS_CGETSET_MAGIC_DEF("multiline", js_regexp_get_flag, NULL, LRE_FLAG_MULTILINE ), + JS_CGETSET_MAGIC_DEF("dotAll", js_regexp_get_flag, NULL, LRE_FLAG_DOTALL ), + JS_CGETSET_MAGIC_DEF("unicode", js_regexp_get_flag, NULL, LRE_FLAG_UTF16 ), + JS_CGETSET_MAGIC_DEF("sticky", js_regexp_get_flag, NULL, LRE_FLAG_STICKY ), + JS_CGETSET_MAGIC_DEF("hasIndices", js_regexp_get_flag, NULL, LRE_FLAG_INDICES ), JS_CFUNC_DEF("exec", 1, js_regexp_exec ), JS_CFUNC_DEF("compile", 2, js_regexp_compile ), JS_CFUNC_DEF("test", 1, js_regexp_test ), diff --git a/test262.conf b/test262.conf index cdf622f..1ad0980 100644 --- a/test262.conf +++ b/test262.conf @@ -154,7 +154,7 @@ Reflect.setPrototypeOf regexp-dotall regexp-duplicate-named-groups=skip regexp-lookbehind -regexp-match-indices=skip +regexp-match-indices regexp-named-groups regexp-unicode-property-escapes regexp-v-flag=skip