Fix bug in GET_PREV_CHAR macro (#278)
* Fix bug in `GET_PREV_CHAR` macro - pass `cbuf_type` variable to `XXX_CHAR` macros in `lre_exec_backtrack()` - improve readability of these macros - fix `GET_PREV_CHAR` macro: `cptr` was decremented twice on invalid high surrogate. - minimize non functional changes
This commit is contained in:
parent
d11f5f600d
commit
5abbeacc62
1 changed files with 38 additions and 38 deletions
76
libregexp.c
76
libregexp.c
|
@ -1964,86 +1964,86 @@ static BOOL is_word_char(uint32_t c)
|
|||
(c == '_'));
|
||||
}
|
||||
|
||||
#define GET_CHAR(c, cptr, cbuf_end) \
|
||||
#define GET_CHAR(c, cptr, cbuf_end, cbuf_type) \
|
||||
do { \
|
||||
if (cbuf_type == 0) { \
|
||||
c = *cptr++; \
|
||||
} else { \
|
||||
const uint16_t *_p = (uint16_t *)cptr; \
|
||||
const uint16_t *_end = (uint16_t *)cbuf_end; \
|
||||
const uint16_t *_p = (const uint16_t *)cptr; \
|
||||
const uint16_t *_end = (const uint16_t *)cbuf_end; \
|
||||
c = *_p++; \
|
||||
if (is_hi_surrogate(c)) \
|
||||
if (cbuf_type == 2) \
|
||||
if (_p < _end) \
|
||||
if (is_lo_surrogate(*_p)) \
|
||||
c = from_surrogate(c, *_p++); \
|
||||
cptr = (void *) _p; \
|
||||
cptr = (const void *)_p; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define PEEK_CHAR(c, cptr, cbuf_end) \
|
||||
#define PEEK_CHAR(c, cptr, cbuf_end, cbuf_type) \
|
||||
do { \
|
||||
if (cbuf_type == 0) { \
|
||||
c = cptr[0]; \
|
||||
} else { \
|
||||
const uint16_t *_p = (uint16_t *)cptr; \
|
||||
const uint16_t *_end = (uint16_t *)cbuf_end; \
|
||||
const uint16_t *_p = (const uint16_t *)cptr; \
|
||||
const uint16_t *_end = (const uint16_t *)cbuf_end; \
|
||||
c = *_p++; \
|
||||
if (is_hi_surrogate(c)) \
|
||||
if (cbuf_type == 2) \
|
||||
if (_p < _end) \
|
||||
if (is_lo_surrogate(*_p)) \
|
||||
c = from_surrogate(c, *_p++); \
|
||||
c = from_surrogate(c, *_p); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define PEEK_PREV_CHAR(c, cptr, cbuf_start) \
|
||||
#define PEEK_PREV_CHAR(c, cptr, cbuf_start, cbuf_type) \
|
||||
do { \
|
||||
if (cbuf_type == 0) { \
|
||||
c = cptr[-1]; \
|
||||
} else { \
|
||||
const uint16_t *_p = (uint16_t *)cptr - 1; \
|
||||
const uint16_t *_start = (uint16_t *)cbuf_start; \
|
||||
const uint16_t *_p = (const uint16_t *)cptr - 1; \
|
||||
const uint16_t *_start = (const uint16_t *)cbuf_start; \
|
||||
c = *_p; \
|
||||
if (is_lo_surrogate(c)) \
|
||||
if (cbuf_type == 2) \
|
||||
if (_p > _start) \
|
||||
if (is_hi_surrogate(*--_p)) \
|
||||
c = from_surrogate(*_p, c); \
|
||||
if (is_hi_surrogate(_p[-1])) \
|
||||
c = from_surrogate(*--_p, c); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define GET_PREV_CHAR(c, cptr, cbuf_start) \
|
||||
#define GET_PREV_CHAR(c, cptr, cbuf_start, cbuf_type) \
|
||||
do { \
|
||||
if (cbuf_type == 0) { \
|
||||
cptr--; \
|
||||
c = cptr[0]; \
|
||||
} else { \
|
||||
const uint16_t *_p = (uint16_t *)cptr - 1; \
|
||||
const uint16_t *_start = (uint16_t *)cbuf_start; \
|
||||
const uint16_t *_p = (const uint16_t *)cptr - 1; \
|
||||
const uint16_t *_start = (const uint16_t *)cbuf_start; \
|
||||
c = *_p; \
|
||||
if (is_lo_surrogate(c)) \
|
||||
if (cbuf_type == 2) \
|
||||
if (_p > _start) \
|
||||
if (is_hi_surrogate(*--_p)) \
|
||||
c = from_surrogate(*_p, c); \
|
||||
cptr = (void *) _p; \
|
||||
if (is_hi_surrogate(_p[-1])) \
|
||||
c = from_surrogate(*--_p, c); \
|
||||
cptr = (const void *)_p; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define PREV_CHAR(cptr, cbuf_start) \
|
||||
#define PREV_CHAR(cptr, cbuf_start, cbuf_type) \
|
||||
do { \
|
||||
if (cbuf_type == 0) { \
|
||||
cptr--; \
|
||||
} else { \
|
||||
const uint16_t *_p = (uint16_t *)cptr - 1; \
|
||||
const uint16_t *_start = (uint16_t *)cbuf_start; \
|
||||
const uint16_t *_p = (const uint16_t *)cptr - 1; \
|
||||
const uint16_t *_start = (const uint16_t *)cbuf_start; \
|
||||
if (is_lo_surrogate(*_p)) \
|
||||
if (cbuf_type == 2) \
|
||||
if (_p > _start) \
|
||||
if (is_hi_surrogate(_p[-1])) \
|
||||
_p--; \
|
||||
cptr = (void *) _p; \
|
||||
cptr = (const void *)_p; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
@ -2183,7 +2183,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|||
/* go backward */
|
||||
char_count = get_u32(pc + 12);
|
||||
for(i = 0; i < char_count; i++) {
|
||||
PREV_CHAR(cptr, s->cbuf);
|
||||
PREV_CHAR(cptr, s->cbuf, cbuf_type);
|
||||
}
|
||||
pc = (pc + 16) + (int)get_u32(pc);
|
||||
rs->cptr = cptr;
|
||||
|
@ -2222,7 +2222,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|||
test_char:
|
||||
if (cptr >= cbuf_end)
|
||||
goto no_match;
|
||||
GET_CHAR(c, cptr, cbuf_end);
|
||||
GET_CHAR(c, cptr, cbuf_end, cbuf_type);
|
||||
if (s->ignore_case) {
|
||||
c = lre_canonicalize(c, s->is_unicode);
|
||||
}
|
||||
|
@ -2269,7 +2269,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|||
break;
|
||||
if (!s->multi_line)
|
||||
goto no_match;
|
||||
PEEK_PREV_CHAR(c, cptr, s->cbuf);
|
||||
PEEK_PREV_CHAR(c, cptr, s->cbuf, cbuf_type);
|
||||
if (!is_line_terminator(c))
|
||||
goto no_match;
|
||||
break;
|
||||
|
@ -2278,21 +2278,21 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|||
break;
|
||||
if (!s->multi_line)
|
||||
goto no_match;
|
||||
PEEK_CHAR(c, cptr, cbuf_end);
|
||||
PEEK_CHAR(c, cptr, cbuf_end, cbuf_type);
|
||||
if (!is_line_terminator(c))
|
||||
goto no_match;
|
||||
break;
|
||||
case REOP_dot:
|
||||
if (cptr == cbuf_end)
|
||||
goto no_match;
|
||||
GET_CHAR(c, cptr, cbuf_end);
|
||||
GET_CHAR(c, cptr, cbuf_end, cbuf_type);
|
||||
if (is_line_terminator(c))
|
||||
goto no_match;
|
||||
break;
|
||||
case REOP_any:
|
||||
if (cptr == cbuf_end)
|
||||
goto no_match;
|
||||
GET_CHAR(c, cptr, cbuf_end);
|
||||
GET_CHAR(c, cptr, cbuf_end, cbuf_type);
|
||||
break;
|
||||
case REOP_save_start:
|
||||
case REOP_save_end:
|
||||
|
@ -2346,14 +2346,14 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|||
if (cptr == s->cbuf) {
|
||||
v1 = FALSE;
|
||||
} else {
|
||||
PEEK_PREV_CHAR(c, cptr, s->cbuf);
|
||||
PEEK_PREV_CHAR(c, cptr, s->cbuf, cbuf_type);
|
||||
v1 = is_word_char(c);
|
||||
}
|
||||
/* current char */
|
||||
if (cptr >= cbuf_end) {
|
||||
v2 = FALSE;
|
||||
} else {
|
||||
PEEK_CHAR(c, cptr, cbuf_end);
|
||||
PEEK_CHAR(c, cptr, cbuf_end, cbuf_type);
|
||||
v2 = is_word_char(c);
|
||||
}
|
||||
if (v1 ^ v2 ^ (REOP_not_word_boundary - opcode))
|
||||
|
@ -2378,8 +2378,8 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|||
while (cptr1 < cptr1_end) {
|
||||
if (cptr >= cbuf_end)
|
||||
goto no_match;
|
||||
GET_CHAR(c1, cptr1, cptr1_end);
|
||||
GET_CHAR(c2, cptr, cbuf_end);
|
||||
GET_CHAR(c1, cptr1, cptr1_end, cbuf_type);
|
||||
GET_CHAR(c2, cptr, cbuf_end, cbuf_type);
|
||||
if (s->ignore_case) {
|
||||
c1 = lre_canonicalize(c1, s->is_unicode);
|
||||
c2 = lre_canonicalize(c2, s->is_unicode);
|
||||
|
@ -2392,8 +2392,8 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|||
while (cptr1 > cptr1_start) {
|
||||
if (cptr == s->cbuf)
|
||||
goto no_match;
|
||||
GET_PREV_CHAR(c1, cptr1, cptr1_start);
|
||||
GET_PREV_CHAR(c2, cptr, s->cbuf);
|
||||
GET_PREV_CHAR(c1, cptr1, cptr1_start, cbuf_type);
|
||||
GET_PREV_CHAR(c2, cptr, s->cbuf, cbuf_type);
|
||||
if (s->ignore_case) {
|
||||
c1 = lre_canonicalize(c1, s->is_unicode);
|
||||
c2 = lre_canonicalize(c2, s->is_unicode);
|
||||
|
@ -2413,7 +2413,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|||
pc += 2;
|
||||
if (cptr >= cbuf_end)
|
||||
goto no_match;
|
||||
GET_CHAR(c, cptr, cbuf_end);
|
||||
GET_CHAR(c, cptr, cbuf_end, cbuf_type);
|
||||
if (s->ignore_case) {
|
||||
c = lre_canonicalize(c, s->is_unicode);
|
||||
}
|
||||
|
@ -2453,7 +2453,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|||
pc += 2;
|
||||
if (cptr >= cbuf_end)
|
||||
goto no_match;
|
||||
GET_CHAR(c, cptr, cbuf_end);
|
||||
GET_CHAR(c, cptr, cbuf_end, cbuf_type);
|
||||
if (s->ignore_case) {
|
||||
c = lre_canonicalize(c, s->is_unicode);
|
||||
}
|
||||
|
@ -2485,7 +2485,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|||
/* go to the previous char */
|
||||
if (cptr == s->cbuf)
|
||||
goto no_match;
|
||||
PREV_CHAR(cptr, s->cbuf);
|
||||
PREV_CHAR(cptr, s->cbuf, cbuf_type);
|
||||
break;
|
||||
case REOP_simple_greedy_quant:
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue