Implement RegExp 'v' flag, part 1 (#229)

This commit implements the flag itself and teaches the regex engine to
reject previously accepted patterns when in unicodeSets mode.

Refs: https://github.com/quickjs-ng/quickjs/issues/228
This commit is contained in:
Ben Noordhuis 2023-12-21 19:37:31 +01:00 committed by GitHub
parent d1852b5ea2
commit f0ef9e1593
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 199 additions and 2 deletions

View file

@ -63,6 +63,7 @@ typedef enum {
#define TMP_BUF_SIZE 128
// invariant: is_unicode ^ unicode_sets (or neither, but not both)
typedef struct {
DynBuf byte_code;
const uint8_t *buf_ptr;
@ -70,6 +71,7 @@ typedef struct {
const uint8_t *buf_start;
int re_flags;
BOOL is_unicode;
BOOL unicode_sets;
BOOL ignore_case;
BOOL dotall;
int capture_count;
@ -853,6 +855,8 @@ static int re_emit_range(REParseState *s, const CharRange *cr)
return 0;
}
// s->unicode turns patterns like []] into syntax errors
// s->unicode_sets turns more patterns into errors, like [a-] or [[]
static int re_parse_char_class(REParseState *s, const uint8_t **pp)
{
const uint8_t *p;
@ -864,17 +868,43 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp)
cr_init(cr, s->opaque, lre_realloc);
p = *pp;
p++; /* skip '[' */
if (s->unicode_sets) {
static const char verboten[] =
"()[{}/-|" "\0"
"&&!!##$$%%**++,,..::;;<<==>>??@@``~~" "\0"
"^^^_^^";
const char *s = verboten;
int n = 1;
do {
if (!memcmp(s, p, n))
if (p[n] == ']')
goto invalid_class_range;
s += n;
if (!*s) {
s++;
n++;
}
} while (n < 4);
}
invert = FALSE;
if (*p == '^') {
p++;
invert = TRUE;
}
for(;;) {
if (*p == ']')
break;
c1 = get_class_atom(s, cr1, &p, TRUE);
if ((int)c1 < 0)
goto fail;
if (*p == '-' && p[1] == ']' && s->unicode_sets) {
if (c1 >= CLASS_RANGE_BASE)
cr_free(cr1);
goto invalid_class_range;
}
if (*p == '-' && p[1] != ']') {
const uint8_t *p0 = p + 1;
if (c1 >= CLASS_RANGE_BASE) {
@ -1843,6 +1873,7 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
is_sticky = ((re_flags & LRE_FLAG_STICKY) != 0);
s->ignore_case = ((re_flags & LRE_FLAG_IGNORECASE) != 0);
s->dotall = ((re_flags & LRE_FLAG_DOTALL) != 0);
s->unicode_sets = ((re_flags & LRE_FLAG_UNICODE_SETS) != 0);
s->capture_count = 1;
s->total_capture_count = -1;
s->has_named_captures = -1;

View file

@ -37,8 +37,8 @@
#define LRE_FLAG_UNICODE (1 << 4)
#define LRE_FLAG_STICKY (1 << 5)
#define LRE_FLAG_INDICES (1 << 6) /* Unused by libregexp, just recorded. */
#define LRE_FLAG_NAMED_GROUPS (1 << 7) /* named groups are present in the regexp */
#define LRE_FLAG_UNICODE_SETS (1 << 8)
uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
const char *buf, size_t buf_len, int re_flags,

View file

@ -40730,6 +40730,9 @@ static JSValue js_compile_regexp(JSContext *ctx, JSValue pattern,
case 'u':
mask = LRE_FLAG_UNICODE;
break;
case 'v':
mask = LRE_FLAG_UNICODE_SETS;
break;
case 'y':
mask = LRE_FLAG_STICKY;
break;
@ -40746,6 +40749,10 @@ static JSValue js_compile_regexp(JSContext *ctx, JSValue pattern,
JS_FreeCString(ctx, str);
}
if (re_flags & LRE_FLAG_UNICODE)
if (re_flags & LRE_FLAG_UNICODE_SETS)
return JS_ThrowSyntaxError(ctx, "invalid regular expression flags");
str = JS_ToCStringLen2(ctx, &len, pattern, !(re_flags & LRE_FLAG_UNICODE));
if (!str)
return JS_EXCEPTION;
@ -41067,6 +41074,11 @@ static JSValue js_regexp_get_flags(JSContext *ctx, JSValue this_val)
goto exception;
if (res)
*p++ = 'u';
res = JS_ToBoolFree(ctx, JS_GetPropertyStr(ctx, this_val, "unicodeSets"));
if (res < 0)
goto exception;
if (res)
*p++ = 'v';
res = JS_ToBoolFree(ctx, JS_GetPropertyStr(ctx, this_val, "sticky"));
if (res < 0)
goto exception;
@ -42152,6 +42164,7 @@ static const JSCFunctionListEntry js_regexp_proto_funcs[] = {
JS_CGETSET_MAGIC_DEF("multiline", js_regexp_get_flag, NULL, LRE_FLAG_MULTILINE ),
JS_CGETSET_MAGIC_DEF("dotAll", js_regexp_get_flag, NULL, LRE_FLAG_DOTALL ),
JS_CGETSET_MAGIC_DEF("unicode", js_regexp_get_flag, NULL, LRE_FLAG_UNICODE ),
JS_CGETSET_MAGIC_DEF("unicodeSets", js_regexp_get_flag, NULL, LRE_FLAG_UNICODE_SETS ),
JS_CGETSET_MAGIC_DEF("sticky", js_regexp_get_flag, NULL, LRE_FLAG_STICKY ),
JS_CGETSET_MAGIC_DEF("hasIndices", js_regexp_get_flag, NULL, LRE_FLAG_INDICES ),
JS_CFUNC_DEF("exec", 1, js_regexp_exec ),

View file

@ -157,7 +157,7 @@ regexp-lookbehind
regexp-match-indices
regexp-named-groups
regexp-unicode-property-escapes
regexp-v-flag=skip
regexp-v-flag
resizable-arraybuffer=skip
rest-parameters
Set
@ -223,5 +223,147 @@ test262/test/built-ins/ThrowTypeError/unique-per-realm-function-proto.js
#test262/test/built-ins/RegExp/CharacterClassEscapes/
#test262/test/built-ins/RegExp/property-escapes/
# in progress regexp-v-flag support, see https://github.com/quickjs-ng/quickjs/issues/228
test262/test/built-ins/RegExp/property-escapes/generated/strings/Basic_Emoji-negative-CharacterClass.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/Basic_Emoji-negative-P.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/Basic_Emoji-negative-u.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/Basic_Emoji.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/Emoji_Keycap_Sequence-negative-CharacterClass.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/Emoji_Keycap_Sequence-negative-P.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/Emoji_Keycap_Sequence-negative-u.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/Emoji_Keycap_Sequence.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji-negative-CharacterClass.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji-negative-P.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji-negative-u.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Flag_Sequence-negative-CharacterClass.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Flag_Sequence-negative-P.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Flag_Sequence-negative-u.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Flag_Sequence.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Modifier_Sequence-negative-CharacterClass.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Modifier_Sequence-negative-P.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Modifier_Sequence-negative-u.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Modifier_Sequence.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Tag_Sequence-negative-CharacterClass.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Tag_Sequence-negative-P.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Tag_Sequence-negative-u.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Tag_Sequence.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_ZWJ_Sequence-negative-CharacterClass.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_ZWJ_Sequence-negative-P.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_ZWJ_Sequence-negative-u.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_ZWJ_Sequence.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-difference-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-difference-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-difference-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-difference-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-difference-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-difference-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-union-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-union-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-union-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-union-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-union-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-union-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-intersection-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-intersection-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-intersection-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-intersection-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-intersection-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-intersection-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-union-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-union-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-union-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-union-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-union-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-union-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-difference-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-difference-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-difference-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-difference-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-difference-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-difference-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-intersection-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-intersection-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-intersection-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-intersection-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-intersection-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-intersection-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-union-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-union-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-union-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-union-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-union-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-union-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-union-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-union-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-difference-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-difference-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-difference-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-difference-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-difference-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-difference-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-intersection-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-intersection-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-intersection-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-intersection-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-intersection-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-intersection-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-union-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-union-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-union-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-union-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-union-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-union-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/rgi-emoji-13.1.js
test262/test/built-ins/RegExp/unicodeSets/generated/rgi-emoji-14.0.js
test262/test/built-ins/RegExp/unicodeSets/generated/rgi-emoji-15.0.js
test262/test/built-ins/RegExp/unicodeSets/generated/rgi-emoji-15.1.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-difference-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-difference-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-difference-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-difference-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-difference-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-difference-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-intersection-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-intersection-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-intersection-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-intersection-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-intersection-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-intersection-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-union-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-union-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-union-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-union-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-union-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-union-string-literal.js
[tests]
# list test files or use config.testdir

View file

@ -646,6 +646,17 @@ function test_regexp()
assert(/{1a}/.toString(), "/{1a}/");
a = /a{1+/.exec("a{11");
assert(a, ["a{11"] );
eval("/[a-]/"); // accepted with no flag
eval("/[a-]/u"); // accepted with 'u' flag
let ex;
try {
eval("/[a-]/v"); // rejected with 'v' flag
} catch (_ex) {
ex = _ex;
}
assert(ex?.message, "invalid class range");
}
function test_symbol()