Fix bug in GET_PREV_CHAR macro (#278)

* Fix bug in `GET_PREV_CHAR` macro

- pass `cbuf_type` variable to `XXX_CHAR` macros in `lre_exec_backtrack()`
- improve readability of these macros
- fix `GET_PREV_CHAR` macro: `cptr` was decremented twice on invalid high surrogate.
- minimize non functional changes
This commit is contained in:
Charlie Gordon 2024-03-03 17:12:52 +01:00 committed by GitHub
parent d11f5f600d
commit 5abbeacc62
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1964,86 +1964,86 @@ static BOOL is_word_char(uint32_t c)
(c == '_')); (c == '_'));
} }
#define GET_CHAR(c, cptr, cbuf_end) \ #define GET_CHAR(c, cptr, cbuf_end, cbuf_type) \
do { \ do { \
if (cbuf_type == 0) { \ if (cbuf_type == 0) { \
c = *cptr++; \ c = *cptr++; \
} else { \ } else { \
const uint16_t *_p = (uint16_t *)cptr; \ const uint16_t *_p = (const uint16_t *)cptr; \
const uint16_t *_end = (uint16_t *)cbuf_end; \ const uint16_t *_end = (const uint16_t *)cbuf_end; \
c = *_p++; \ c = *_p++; \
if (is_hi_surrogate(c)) \ if (is_hi_surrogate(c)) \
if (cbuf_type == 2) \ if (cbuf_type == 2) \
if (_p < _end) \ if (_p < _end) \
if (is_lo_surrogate(*_p)) \ if (is_lo_surrogate(*_p)) \
c = from_surrogate(c, *_p++); \ c = from_surrogate(c, *_p++); \
cptr = (void *) _p; \ cptr = (const void *)_p; \
} \ } \
} while (0) } while (0)
#define PEEK_CHAR(c, cptr, cbuf_end) \ #define PEEK_CHAR(c, cptr, cbuf_end, cbuf_type) \
do { \ do { \
if (cbuf_type == 0) { \ if (cbuf_type == 0) { \
c = cptr[0]; \ c = cptr[0]; \
} else { \ } else { \
const uint16_t *_p = (uint16_t *)cptr; \ const uint16_t *_p = (const uint16_t *)cptr; \
const uint16_t *_end = (uint16_t *)cbuf_end; \ const uint16_t *_end = (const uint16_t *)cbuf_end; \
c = *_p++; \ c = *_p++; \
if (is_hi_surrogate(c)) \ if (is_hi_surrogate(c)) \
if (cbuf_type == 2) \ if (cbuf_type == 2) \
if (_p < _end) \ if (_p < _end) \
if (is_lo_surrogate(*_p)) \ if (is_lo_surrogate(*_p)) \
c = from_surrogate(c, *_p++); \ c = from_surrogate(c, *_p); \
} \ } \
} while (0) } while (0)
#define PEEK_PREV_CHAR(c, cptr, cbuf_start) \ #define PEEK_PREV_CHAR(c, cptr, cbuf_start, cbuf_type) \
do { \ do { \
if (cbuf_type == 0) { \ if (cbuf_type == 0) { \
c = cptr[-1]; \ c = cptr[-1]; \
} else { \ } else { \
const uint16_t *_p = (uint16_t *)cptr - 1; \ const uint16_t *_p = (const uint16_t *)cptr - 1; \
const uint16_t *_start = (uint16_t *)cbuf_start; \ const uint16_t *_start = (const uint16_t *)cbuf_start; \
c = *_p; \ c = *_p; \
if (is_lo_surrogate(c)) \ if (is_lo_surrogate(c)) \
if (cbuf_type == 2) \ if (cbuf_type == 2) \
if (_p > _start) \ if (_p > _start) \
if (is_hi_surrogate(*--_p)) \ if (is_hi_surrogate(_p[-1])) \
c = from_surrogate(*_p, c); \ c = from_surrogate(*--_p, c); \
} \ } \
} while (0) } while (0)
#define GET_PREV_CHAR(c, cptr, cbuf_start) \ #define GET_PREV_CHAR(c, cptr, cbuf_start, cbuf_type) \
do { \ do { \
if (cbuf_type == 0) { \ if (cbuf_type == 0) { \
cptr--; \ cptr--; \
c = cptr[0]; \ c = cptr[0]; \
} else { \ } else { \
const uint16_t *_p = (uint16_t *)cptr - 1; \ const uint16_t *_p = (const uint16_t *)cptr - 1; \
const uint16_t *_start = (uint16_t *)cbuf_start; \ const uint16_t *_start = (const uint16_t *)cbuf_start; \
c = *_p; \ c = *_p; \
if (is_lo_surrogate(c)) \ if (is_lo_surrogate(c)) \
if (cbuf_type == 2) \ if (cbuf_type == 2) \
if (_p > _start) \ if (_p > _start) \
if (is_hi_surrogate(*--_p)) \ if (is_hi_surrogate(_p[-1])) \
c = from_surrogate(*_p, c); \ c = from_surrogate(*--_p, c); \
cptr = (void *) _p; \ cptr = (const void *)_p; \
} \ } \
} while (0) } while (0)
#define PREV_CHAR(cptr, cbuf_start) \ #define PREV_CHAR(cptr, cbuf_start, cbuf_type) \
do { \ do { \
if (cbuf_type == 0) { \ if (cbuf_type == 0) { \
cptr--; \ cptr--; \
} else { \ } else { \
const uint16_t *_p = (uint16_t *)cptr - 1; \ const uint16_t *_p = (const uint16_t *)cptr - 1; \
const uint16_t *_start = (uint16_t *)cbuf_start; \ const uint16_t *_start = (const uint16_t *)cbuf_start; \
if (is_lo_surrogate(*_p)) \ if (is_lo_surrogate(*_p)) \
if (cbuf_type == 2) \ if (cbuf_type == 2) \
if (_p > _start) \ if (_p > _start) \
if (is_hi_surrogate(_p[-1])) \ if (is_hi_surrogate(_p[-1])) \
_p--; \ _p--; \
cptr = (void *) _p; \ cptr = (const void *)_p; \
} \ } \
} while (0) } while (0)
@ -2183,7 +2183,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
/* go backward */ /* go backward */
char_count = get_u32(pc + 12); char_count = get_u32(pc + 12);
for(i = 0; i < char_count; i++) { for(i = 0; i < char_count; i++) {
PREV_CHAR(cptr, s->cbuf); PREV_CHAR(cptr, s->cbuf, cbuf_type);
} }
pc = (pc + 16) + (int)get_u32(pc); pc = (pc + 16) + (int)get_u32(pc);
rs->cptr = cptr; rs->cptr = cptr;
@ -2222,7 +2222,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
test_char: test_char:
if (cptr >= cbuf_end) if (cptr >= cbuf_end)
goto no_match; goto no_match;
GET_CHAR(c, cptr, cbuf_end); GET_CHAR(c, cptr, cbuf_end, cbuf_type);
if (s->ignore_case) { if (s->ignore_case) {
c = lre_canonicalize(c, s->is_unicode); c = lre_canonicalize(c, s->is_unicode);
} }
@ -2269,7 +2269,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
break; break;
if (!s->multi_line) if (!s->multi_line)
goto no_match; goto no_match;
PEEK_PREV_CHAR(c, cptr, s->cbuf); PEEK_PREV_CHAR(c, cptr, s->cbuf, cbuf_type);
if (!is_line_terminator(c)) if (!is_line_terminator(c))
goto no_match; goto no_match;
break; break;
@ -2278,21 +2278,21 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
break; break;
if (!s->multi_line) if (!s->multi_line)
goto no_match; goto no_match;
PEEK_CHAR(c, cptr, cbuf_end); PEEK_CHAR(c, cptr, cbuf_end, cbuf_type);
if (!is_line_terminator(c)) if (!is_line_terminator(c))
goto no_match; goto no_match;
break; break;
case REOP_dot: case REOP_dot:
if (cptr == cbuf_end) if (cptr == cbuf_end)
goto no_match; goto no_match;
GET_CHAR(c, cptr, cbuf_end); GET_CHAR(c, cptr, cbuf_end, cbuf_type);
if (is_line_terminator(c)) if (is_line_terminator(c))
goto no_match; goto no_match;
break; break;
case REOP_any: case REOP_any:
if (cptr == cbuf_end) if (cptr == cbuf_end)
goto no_match; goto no_match;
GET_CHAR(c, cptr, cbuf_end); GET_CHAR(c, cptr, cbuf_end, cbuf_type);
break; break;
case REOP_save_start: case REOP_save_start:
case REOP_save_end: case REOP_save_end:
@ -2346,14 +2346,14 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
if (cptr == s->cbuf) { if (cptr == s->cbuf) {
v1 = FALSE; v1 = FALSE;
} else { } else {
PEEK_PREV_CHAR(c, cptr, s->cbuf); PEEK_PREV_CHAR(c, cptr, s->cbuf, cbuf_type);
v1 = is_word_char(c); v1 = is_word_char(c);
} }
/* current char */ /* current char */
if (cptr >= cbuf_end) { if (cptr >= cbuf_end) {
v2 = FALSE; v2 = FALSE;
} else { } else {
PEEK_CHAR(c, cptr, cbuf_end); PEEK_CHAR(c, cptr, cbuf_end, cbuf_type);
v2 = is_word_char(c); v2 = is_word_char(c);
} }
if (v1 ^ v2 ^ (REOP_not_word_boundary - opcode)) if (v1 ^ v2 ^ (REOP_not_word_boundary - opcode))
@ -2378,8 +2378,8 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
while (cptr1 < cptr1_end) { while (cptr1 < cptr1_end) {
if (cptr >= cbuf_end) if (cptr >= cbuf_end)
goto no_match; goto no_match;
GET_CHAR(c1, cptr1, cptr1_end); GET_CHAR(c1, cptr1, cptr1_end, cbuf_type);
GET_CHAR(c2, cptr, cbuf_end); GET_CHAR(c2, cptr, cbuf_end, cbuf_type);
if (s->ignore_case) { if (s->ignore_case) {
c1 = lre_canonicalize(c1, s->is_unicode); c1 = lre_canonicalize(c1, s->is_unicode);
c2 = lre_canonicalize(c2, s->is_unicode); c2 = lre_canonicalize(c2, s->is_unicode);
@ -2392,8 +2392,8 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
while (cptr1 > cptr1_start) { while (cptr1 > cptr1_start) {
if (cptr == s->cbuf) if (cptr == s->cbuf)
goto no_match; goto no_match;
GET_PREV_CHAR(c1, cptr1, cptr1_start); GET_PREV_CHAR(c1, cptr1, cptr1_start, cbuf_type);
GET_PREV_CHAR(c2, cptr, s->cbuf); GET_PREV_CHAR(c2, cptr, s->cbuf, cbuf_type);
if (s->ignore_case) { if (s->ignore_case) {
c1 = lre_canonicalize(c1, s->is_unicode); c1 = lre_canonicalize(c1, s->is_unicode);
c2 = lre_canonicalize(c2, s->is_unicode); c2 = lre_canonicalize(c2, s->is_unicode);
@ -2413,7 +2413,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
pc += 2; pc += 2;
if (cptr >= cbuf_end) if (cptr >= cbuf_end)
goto no_match; goto no_match;
GET_CHAR(c, cptr, cbuf_end); GET_CHAR(c, cptr, cbuf_end, cbuf_type);
if (s->ignore_case) { if (s->ignore_case) {
c = lre_canonicalize(c, s->is_unicode); c = lre_canonicalize(c, s->is_unicode);
} }
@ -2453,7 +2453,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
pc += 2; pc += 2;
if (cptr >= cbuf_end) if (cptr >= cbuf_end)
goto no_match; goto no_match;
GET_CHAR(c, cptr, cbuf_end); GET_CHAR(c, cptr, cbuf_end, cbuf_type);
if (s->ignore_case) { if (s->ignore_case) {
c = lre_canonicalize(c, s->is_unicode); c = lre_canonicalize(c, s->is_unicode);
} }
@ -2485,7 +2485,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
/* go to the previous char */ /* go to the previous char */
if (cptr == s->cbuf) if (cptr == s->cbuf)
goto no_match; goto no_match;
PREV_CHAR(cptr, s->cbuf); PREV_CHAR(cptr, s->cbuf, cbuf_type);
break; break;
case REOP_simple_greedy_quant: case REOP_simple_greedy_quant:
{ {