mirror of
https://git.savannah.gnu.org/git/emacs.git
synced 2025-02-02 20:16:25 +00:00
More unsigned char' ->
re_char' changes.
Also change several `int' into `re_wchar_t'. (PATTERN_STACK_EMPTY, PUSH_PATTERN_OP, POP_PATTERN_OP): Remove. (PUSH_FAILURE_POINTER): Don't cast any more. (POP_FAILURE_REG_OR_COUNT): Remove the cast that strips `const'. We want GCC to complain, since this piece of code makes re_match non-reentrant, which *should* be fixed. (GET_BUFFER_SPACE): Use size_t rather than unsigned long. (EXTEND_BUFFER): Use RETALLOC. (SET_LIST_BIT): Don't cast. (re_wchar_t): New type. (re_iswctype, re_wctype_to_bit): Make it crystal clear to GCC that those two functions will always properly return. (IMMEDIATE_QUIT_CHECK): Cast to void. (analyse_first): Use recursion rather than an explicit stack. (re_compile_fastmap): Can't fail anymore. (re_search_2): Don't check re_compile_fastmap for failure. (PUSH_NUMBER): Renamed from PUSH_FAILURE_COUNT. Now also sets the new value (passed in a new argument). (re_match_2_internal): Use it. Also, use a new var `reg' of type size_t when looping through regs rather than reuse the inappropriate `mcnt'.
This commit is contained in:
parent
d97151cb57
commit
0161849810
360
src/regex.c
360
src/regex.c
@ -22,10 +22,9 @@
|
||||
/* TODO:
|
||||
- structure the opcode space into opcode+flag.
|
||||
- merge with glibc's regex.[ch].
|
||||
- replace succeed_n + jump_n with a combined operation so that the counter
|
||||
can simply be decremented when popping the failure_point without having
|
||||
to stack up failure_count entries.
|
||||
*/
|
||||
- replace (succeed_n + jump_n + set_number_at) with something that doesn't
|
||||
need to modify the compiled regexp.
|
||||
*/
|
||||
|
||||
/* AIX requires this to be the first thing in the file. */
|
||||
#if defined _AIX && !defined REGEX_MALLOC
|
||||
@ -553,7 +552,7 @@ typedef enum
|
||||
is followed by a range table:
|
||||
2 bytes of flags for character sets (low 8 bits, high 8 bits)
|
||||
See RANGE_TABLE_WORK_BITS below.
|
||||
2 bytes, the number of pairs that follow
|
||||
2 bytes, the number of pairs that follow (upto 32767)
|
||||
pairs, each 2 multibyte characters,
|
||||
each multibyte character represented as 3 bytes. */
|
||||
charset,
|
||||
@ -700,7 +699,7 @@ static void extract_number _RE_ARGS ((int *dest, re_char *source));
|
||||
static void
|
||||
extract_number (dest, source)
|
||||
int *dest;
|
||||
unsigned char *source;
|
||||
re_char *source;
|
||||
{
|
||||
int temp = SIGN_EXTEND_CHAR (*(source + 1));
|
||||
*dest = *source & 0377;
|
||||
@ -729,7 +728,7 @@ static void extract_number_and_incr _RE_ARGS ((int *destination,
|
||||
static void
|
||||
extract_number_and_incr (destination, source)
|
||||
int *destination;
|
||||
unsigned char **source;
|
||||
re_char **source;
|
||||
{
|
||||
extract_number (destination, *source);
|
||||
*source += 2;
|
||||
@ -803,9 +802,9 @@ extract_number_and_incr (destination, source)
|
||||
#define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count) \
|
||||
do \
|
||||
{ \
|
||||
int range_start, range_end; \
|
||||
unsigned char *p; \
|
||||
unsigned char *range_table_end \
|
||||
re_wchar_t range_start, range_end; \
|
||||
re_char *p; \
|
||||
re_char *range_table_end \
|
||||
= CHARSET_RANGE_TABLE_END ((range_table), (count)); \
|
||||
\
|
||||
for (p = (range_table); p < range_table_end; p += 2 * 3) \
|
||||
@ -829,8 +828,8 @@ extract_number_and_incr (destination, source)
|
||||
{ \
|
||||
/* Number of ranges in range table. */ \
|
||||
int count; \
|
||||
unsigned char *range_table = CHARSET_RANGE_TABLE (charset); \
|
||||
\
|
||||
re_char *range_table = CHARSET_RANGE_TABLE (charset); \
|
||||
\
|
||||
EXTRACT_NUMBER_AND_INCR (count, range_table); \
|
||||
CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count); \
|
||||
} \
|
||||
@ -899,12 +898,12 @@ print_fastmap (fastmap)
|
||||
|
||||
void
|
||||
print_partial_compiled_pattern (start, end)
|
||||
unsigned char *start;
|
||||
unsigned char *end;
|
||||
re_char *start;
|
||||
re_char *end;
|
||||
{
|
||||
int mcnt, mcnt2;
|
||||
unsigned char *p = start;
|
||||
unsigned char *pend = end;
|
||||
re_char *p = start;
|
||||
re_char *pend = end;
|
||||
|
||||
if (start == NULL)
|
||||
{
|
||||
@ -1142,7 +1141,7 @@ void
|
||||
print_compiled_pattern (bufp)
|
||||
struct re_pattern_buffer *bufp;
|
||||
{
|
||||
unsigned char *buffer = bufp->buffer;
|
||||
re_char *buffer = bufp->buffer;
|
||||
|
||||
print_partial_compiled_pattern (buffer, buffer + bufp->used);
|
||||
printf ("%ld bytes used/%ld bytes allocated.\n",
|
||||
@ -1326,7 +1325,7 @@ size_t re_max_failures = 4000;
|
||||
|
||||
union fail_stack_elt
|
||||
{
|
||||
const unsigned char *pointer;
|
||||
re_char *pointer;
|
||||
/* This should be the biggest `int' that's no bigger than a pointer. */
|
||||
long integer;
|
||||
};
|
||||
@ -1341,7 +1340,6 @@ typedef struct
|
||||
size_t frame; /* Offset of the cur constructed frame. */
|
||||
} fail_stack_type;
|
||||
|
||||
#define PATTERN_STACK_EMPTY() (fail_stack.avail == 0)
|
||||
#define FAIL_STACK_EMPTY() (fail_stack.frame == 0)
|
||||
#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
|
||||
|
||||
@ -1413,22 +1411,11 @@ typedef struct
|
||||
1)))
|
||||
|
||||
|
||||
/* Push pointer POINTER on FAIL_STACK.
|
||||
Return 1 if was able to do so and 0 if ran out of memory allocating
|
||||
space to do so. */
|
||||
#define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
|
||||
((FAIL_STACK_FULL () \
|
||||
&& !GROW_FAIL_STACK (FAIL_STACK)) \
|
||||
? 0 \
|
||||
: ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
|
||||
1))
|
||||
#define POP_PATTERN_OP() POP_FAILURE_POINTER ()
|
||||
|
||||
/* Push a pointer value onto the failure stack.
|
||||
Assumes the variable `fail_stack'. Probably should only
|
||||
be called from within `PUSH_FAILURE_POINT'. */
|
||||
#define PUSH_FAILURE_POINTER(item) \
|
||||
fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
|
||||
fail_stack.stack[fail_stack.avail++].pointer = (item)
|
||||
|
||||
/* This pushes an integer-valued item onto the failure stack.
|
||||
Assumes the variable `fail_stack'. Probably should only
|
||||
@ -1478,16 +1465,19 @@ do { \
|
||||
PUSH_FAILURE_INT (num); \
|
||||
} while (0)
|
||||
|
||||
#define PUSH_FAILURE_COUNT(ptr) \
|
||||
/* Change the counter's value to VAL, but make sure that it will
|
||||
be reset when backtracking. */
|
||||
#define PUSH_NUMBER(ptr,val) \
|
||||
do { \
|
||||
char *destination; \
|
||||
int c; \
|
||||
ENSURE_FAIL_STACK(3); \
|
||||
EXTRACT_NUMBER (c, ptr); \
|
||||
DEBUG_PRINT3 (" Push counter %p = %d\n", ptr, c); \
|
||||
DEBUG_PRINT4 (" Push number %p = %d -> %d\n", ptr, c, val); \
|
||||
PUSH_FAILURE_INT (c); \
|
||||
PUSH_FAILURE_POINTER (ptr); \
|
||||
PUSH_FAILURE_INT (-1); \
|
||||
STORE_NUMBER (ptr, val); \
|
||||
} while (0)
|
||||
|
||||
/* Pop a saved register off the stack. */
|
||||
@ -1497,7 +1487,9 @@ do { \
|
||||
if (reg == -1) \
|
||||
{ \
|
||||
/* It's a counter. */ \
|
||||
unsigned char *ptr = (unsigned char*) POP_FAILURE_POINTER (); \
|
||||
/* Here, we discard `const', which makes re_match non-reentrant. \
|
||||
Gcc gives a warning for it, which is good. */ \
|
||||
unsigned char *ptr = POP_FAILURE_POINTER (); \
|
||||
reg = POP_FAILURE_INT (); \
|
||||
STORE_NUMBER (ptr, reg); \
|
||||
DEBUG_PRINT3 (" Pop counter %p = %d\n", ptr, reg); \
|
||||
@ -1603,14 +1595,14 @@ do { \
|
||||
while (fail_stack.frame < fail_stack.avail) \
|
||||
POP_FAILURE_REG_OR_COUNT (); \
|
||||
\
|
||||
pat = (unsigned char *) POP_FAILURE_POINTER (); \
|
||||
pat = POP_FAILURE_POINTER (); \
|
||||
DEBUG_PRINT2 (" Popping pattern %p: ", pat); \
|
||||
DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
|
||||
\
|
||||
/* If the saved string location is NULL, it came from an \
|
||||
on_failure_keep_string_jump opcode, and we want to throw away the \
|
||||
saved NULL, thus retaining our current position in the string. */ \
|
||||
str = (re_char *) POP_FAILURE_POINTER (); \
|
||||
str = POP_FAILURE_POINTER (); \
|
||||
DEBUG_PRINT2 (" Popping string %p: `", str); \
|
||||
DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
|
||||
DEBUG_PRINT1 ("'\n"); \
|
||||
@ -1641,20 +1633,18 @@ static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
|
||||
int arg, unsigned char *end));
|
||||
static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
|
||||
int arg1, int arg2, unsigned char *end));
|
||||
static boolean at_begline_loc_p _RE_ARGS ((const unsigned char *pattern,
|
||||
const unsigned char *p,
|
||||
static boolean at_begline_loc_p _RE_ARGS ((re_char *pattern,
|
||||
re_char *p,
|
||||
reg_syntax_t syntax));
|
||||
static boolean at_endline_loc_p _RE_ARGS ((const unsigned char *p,
|
||||
const unsigned char *pend,
|
||||
static boolean at_endline_loc_p _RE_ARGS ((re_char *p,
|
||||
re_char *pend,
|
||||
reg_syntax_t syntax));
|
||||
static unsigned char *skip_one_char _RE_ARGS ((unsigned char *p));
|
||||
static int analyse_first _RE_ARGS ((unsigned char *p, unsigned char *pend,
|
||||
static re_char *skip_one_char _RE_ARGS ((re_char *p));
|
||||
static int analyse_first _RE_ARGS ((re_char *p, re_char *pend,
|
||||
char *fastmap, const int multibyte));
|
||||
|
||||
/* Fetch the next character in the uncompiled pattern---translating it
|
||||
if necessary. Also cast from a signed character in the constant
|
||||
string passed to us by the user to an unsigned char that we can use
|
||||
as an array index (in, e.g., `translate'). */
|
||||
if necessary. */
|
||||
#define PATFETCH(c) \
|
||||
do { \
|
||||
PATFETCH_RAW (c); \
|
||||
@ -1689,7 +1679,7 @@ static int analyse_first _RE_ARGS ((unsigned char *p, unsigned char *pend,
|
||||
|
||||
/* Make sure we have at least N more bytes of space in buffer. */
|
||||
#define GET_BUFFER_SPACE(n) \
|
||||
while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \
|
||||
while ((size_t) (b - bufp->buffer + (n)) > bufp->allocated) \
|
||||
EXTEND_BUFFER ()
|
||||
|
||||
/* Make sure we have one more byte of buffer space and then add C to it. */
|
||||
@ -1778,13 +1768,13 @@ static int analyse_first _RE_ARGS ((unsigned char *p, unsigned char *pend,
|
||||
#endif
|
||||
#define EXTEND_BUFFER() \
|
||||
do { \
|
||||
unsigned char *old_buffer = bufp->buffer; \
|
||||
re_char *old_buffer = bufp->buffer; \
|
||||
if (bufp->allocated == MAX_BUF_SIZE) \
|
||||
return REG_ESIZE; \
|
||||
bufp->allocated <<= 1; \
|
||||
if (bufp->allocated > MAX_BUF_SIZE) \
|
||||
bufp->allocated = MAX_BUF_SIZE; \
|
||||
bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
|
||||
RETALLOC (bufp->buffer, bufp->allocated, unsigned char); \
|
||||
if (bufp->buffer == NULL) \
|
||||
return REG_ESPACE; \
|
||||
/* If the buffer moved, move all the pointers into it. */ \
|
||||
@ -1907,9 +1897,7 @@ struct range_table_work_area
|
||||
|
||||
|
||||
/* Set the bit for character C in a list. */
|
||||
#define SET_LIST_BIT(c) \
|
||||
(b[((unsigned char) (c)) / BYTEWIDTH] \
|
||||
|= 1 << (((unsigned char) c) % BYTEWIDTH))
|
||||
#define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
|
||||
|
||||
|
||||
/* Get the next unsigned number in the uncompiled pattern. */
|
||||
@ -1940,6 +1928,7 @@ struct range_table_work_area
|
||||
# define CHAR_CLASS_MAX_LENGTH 256
|
||||
# endif
|
||||
typedef wctype_t re_wctype_t;
|
||||
typedef wchar_t re_wchar_t;
|
||||
# define re_wctype wctype
|
||||
# define re_iswctype iswctype
|
||||
# define re_wctype_to_bit(cc) 0
|
||||
@ -1947,7 +1936,7 @@ typedef wctype_t re_wctype_t;
|
||||
# define CHAR_CLASS_MAX_LENGTH 9 /* Namely, `multibyte'. */
|
||||
# define btowc(c) c
|
||||
|
||||
/* Character classes' indices. */
|
||||
/* Character classes. */
|
||||
typedef enum { RECC_ERROR = 0,
|
||||
RECC_ALNUM, RECC_ALPHA, RECC_WORD,
|
||||
RECC_GRAPH, RECC_PRINT,
|
||||
@ -1959,10 +1948,12 @@ typedef enum { RECC_ERROR = 0,
|
||||
RECC_ASCII, RECC_UNIBYTE
|
||||
} re_wctype_t;
|
||||
|
||||
typedef int re_wchar_t;
|
||||
|
||||
/* Map a string to the char class it names (if any). */
|
||||
static re_wctype_t
|
||||
re_wctype (string)
|
||||
unsigned char *string;
|
||||
re_char *string;
|
||||
{
|
||||
if (STREQ (string, "alnum")) return RECC_ALNUM;
|
||||
else if (STREQ (string, "alpha")) return RECC_ALPHA;
|
||||
@ -1990,27 +1981,30 @@ re_iswctype (ch, cc)
|
||||
int ch;
|
||||
re_wctype_t cc;
|
||||
{
|
||||
boolean ret = false;
|
||||
|
||||
switch (cc)
|
||||
{
|
||||
case RECC_ALNUM: return ISALNUM (ch);
|
||||
case RECC_ALPHA: return ISALPHA (ch);
|
||||
case RECC_BLANK: return ISBLANK (ch);
|
||||
case RECC_CNTRL: return ISCNTRL (ch);
|
||||
case RECC_DIGIT: return ISDIGIT (ch);
|
||||
case RECC_GRAPH: return ISGRAPH (ch);
|
||||
case RECC_LOWER: return ISLOWER (ch);
|
||||
case RECC_PRINT: return ISPRINT (ch);
|
||||
case RECC_PUNCT: return ISPUNCT (ch);
|
||||
case RECC_SPACE: return ISSPACE (ch);
|
||||
case RECC_UPPER: return ISUPPER (ch);
|
||||
case RECC_XDIGIT: return ISXDIGIT (ch);
|
||||
case RECC_ASCII: return IS_REAL_ASCII (ch);
|
||||
case RECC_NONASCII: return !IS_REAL_ASCII (ch);
|
||||
case RECC_UNIBYTE: return ISUNIBYTE (ch);
|
||||
case RECC_MULTIBYTE: return !ISUNIBYTE (ch);
|
||||
case RECC_WORD: return ISWORD (ch);
|
||||
case RECC_ERROR: return false;
|
||||
case RECC_ALNUM: ret = ISALNUM (ch);
|
||||
case RECC_ALPHA: ret = ISALPHA (ch);
|
||||
case RECC_BLANK: ret = ISBLANK (ch);
|
||||
case RECC_CNTRL: ret = ISCNTRL (ch);
|
||||
case RECC_DIGIT: ret = ISDIGIT (ch);
|
||||
case RECC_GRAPH: ret = ISGRAPH (ch);
|
||||
case RECC_LOWER: ret = ISLOWER (ch);
|
||||
case RECC_PRINT: ret = ISPRINT (ch);
|
||||
case RECC_PUNCT: ret = ISPUNCT (ch);
|
||||
case RECC_SPACE: ret = ISSPACE (ch);
|
||||
case RECC_UPPER: ret = ISUPPER (ch);
|
||||
case RECC_XDIGIT: ret = ISXDIGIT (ch);
|
||||
case RECC_ASCII: ret = IS_REAL_ASCII (ch);
|
||||
case RECC_NONASCII: ret = !IS_REAL_ASCII (ch);
|
||||
case RECC_UNIBYTE: ret = ISUNIBYTE (ch);
|
||||
case RECC_MULTIBYTE: ret = !ISUNIBYTE (ch);
|
||||
case RECC_WORD: ret = ISWORD (ch);
|
||||
case RECC_ERROR: ret = false;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Return a bit-pattern to use in the range-table bits to match multibyte
|
||||
@ -2019,18 +2013,21 @@ static int
|
||||
re_wctype_to_bit (cc)
|
||||
re_wctype_t cc;
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
switch (cc)
|
||||
{
|
||||
case RECC_NONASCII: case RECC_PRINT: case RECC_GRAPH:
|
||||
case RECC_MULTIBYTE: return BIT_MULTIBYTE;
|
||||
case RECC_ALPHA: case RECC_ALNUM: case RECC_WORD: return BIT_WORD;
|
||||
case RECC_LOWER: return BIT_LOWER;
|
||||
case RECC_UPPER: return BIT_UPPER;
|
||||
case RECC_PUNCT: return BIT_PUNCT;
|
||||
case RECC_SPACE: return BIT_SPACE;
|
||||
case RECC_MULTIBYTE: ret = BIT_MULTIBYTE;
|
||||
case RECC_ALPHA: case RECC_ALNUM: case RECC_WORD: ret = BIT_WORD;
|
||||
case RECC_LOWER: ret = BIT_LOWER;
|
||||
case RECC_UPPER: ret = BIT_UPPER;
|
||||
case RECC_PUNCT: ret = BIT_PUNCT;
|
||||
case RECC_SPACE: ret = BIT_SPACE;
|
||||
case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
|
||||
case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
|
||||
case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: ret = 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -2042,7 +2039,7 @@ extern int immediate_quit;
|
||||
if (immediate_quit) QUIT; \
|
||||
} while (0)
|
||||
#else
|
||||
# define IMMEDIATE_QUIT_CHECK (0)
|
||||
# define IMMEDIATE_QUIT_CHECK ((void)0)
|
||||
#endif
|
||||
|
||||
#ifndef MATCH_MAY_ALLOCATE
|
||||
@ -2129,10 +2126,8 @@ regex_compile (pattern, size, syntax, bufp)
|
||||
reg_syntax_t syntax;
|
||||
struct re_pattern_buffer *bufp;
|
||||
{
|
||||
/* We fetch characters from PATTERN here. Even though PATTERN is
|
||||
`char *' (i.e., signed), we declare these variables as unsigned, so
|
||||
they can be reliably used as array indices. */
|
||||
register unsigned int c, c1;
|
||||
/* We fetch characters from PATTERN here. */
|
||||
register re_wchar_t c, c1;
|
||||
|
||||
/* A random temporary spot in PATTERN. */
|
||||
re_char *p1;
|
||||
@ -2359,6 +2354,7 @@ regex_compile (pattern, size, syntax, bufp)
|
||||
boolean simple = skip_one_char (laststart) == b;
|
||||
unsigned int startoffset = 0;
|
||||
re_opcode_t ofj =
|
||||
/* Check if the loop can match the empty string. */
|
||||
(simple || !analyse_first (laststart, b, NULL, 0)) ?
|
||||
on_failure_jump : on_failure_jump_loop;
|
||||
assert (skip_one_char (laststart) <= b);
|
||||
@ -2629,7 +2625,7 @@ regex_compile (pattern, size, syntax, bufp)
|
||||
if (SINGLE_BYTE_CHAR_P (c))
|
||||
/* ... into bitmap. */
|
||||
{
|
||||
unsigned this_char;
|
||||
re_wchar_t this_char;
|
||||
int range_start = c, range_end = c1;
|
||||
|
||||
/* If the start is after the end, the range is empty. */
|
||||
@ -3365,10 +3361,10 @@ insert_op2 (op, loc, arg1, arg2, end)
|
||||
|
||||
static boolean
|
||||
at_begline_loc_p (pattern, p, syntax)
|
||||
const unsigned char *pattern, *p;
|
||||
re_char *pattern, *p;
|
||||
reg_syntax_t syntax;
|
||||
{
|
||||
const unsigned char *prev = p - 2;
|
||||
re_char *prev = p - 2;
|
||||
boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
|
||||
|
||||
return
|
||||
@ -3389,12 +3385,12 @@ at_begline_loc_p (pattern, p, syntax)
|
||||
|
||||
static boolean
|
||||
at_endline_loc_p (p, pend, syntax)
|
||||
const unsigned char *p, *pend;
|
||||
re_char *p, *pend;
|
||||
reg_syntax_t syntax;
|
||||
{
|
||||
const unsigned char *next = p;
|
||||
re_char *next = p;
|
||||
boolean next_backslash = *next == '\\';
|
||||
const unsigned char *next_next = p + 1 < pend ? p + 1 : 0;
|
||||
re_char *next_next = p + 1 < pend ? p + 1 : 0;
|
||||
|
||||
return
|
||||
/* Before a subexpression? */
|
||||
@ -3433,36 +3429,16 @@ group_in_compile_stack (compile_stack, regnum)
|
||||
|
||||
Return 1 if p..pend might match the empty string.
|
||||
Return 0 if p..pend matches at least one char.
|
||||
Return -1 if p..pend matches at least one char, but fastmap was not
|
||||
updated accurately.
|
||||
Return -2 if an error occurred. */
|
||||
Return -1 if fastmap was not updated accurately. */
|
||||
|
||||
static int
|
||||
analyse_first (p, pend, fastmap, multibyte)
|
||||
unsigned char *p, *pend;
|
||||
re_char *p, *pend;
|
||||
char *fastmap;
|
||||
const int multibyte;
|
||||
{
|
||||
int j, k;
|
||||
boolean not;
|
||||
#ifdef MATCH_MAY_ALLOCATE
|
||||
fail_stack_type fail_stack;
|
||||
#endif
|
||||
#ifndef REGEX_MALLOC
|
||||
char *destination;
|
||||
#endif
|
||||
|
||||
#if defined REL_ALLOC && defined REGEX_MALLOC
|
||||
/* This holds the pointer to the failure stack, when
|
||||
it is allocated relocatably. */
|
||||
fail_stack_elt_t *failure_stack_ptr;
|
||||
#endif
|
||||
|
||||
/* Assume that each path through the pattern can be null until
|
||||
proven otherwise. We set this false at the bottom of switch
|
||||
statement, to which we get only if a particular path doesn't
|
||||
match the empty string. */
|
||||
boolean path_can_be_null = true;
|
||||
|
||||
/* If all elements for base leading-codes in fastmap is set, this
|
||||
flag is set true. */
|
||||
@ -3470,8 +3446,6 @@ analyse_first (p, pend, fastmap, multibyte)
|
||||
|
||||
assert (p);
|
||||
|
||||
INIT_FAIL_STACK ();
|
||||
|
||||
/* The loop below works as follows:
|
||||
- It has a working-list kept in the PATTERN_STACK and which basically
|
||||
starts by only containing a pointer to the first operation.
|
||||
@ -3487,7 +3461,7 @@ analyse_first (p, pend, fastmap, multibyte)
|
||||
so that `p' is monotonically increasing. More to the point, we
|
||||
never set `p' (or push) anything `<= p1'. */
|
||||
|
||||
while (1)
|
||||
while (p < pend)
|
||||
{
|
||||
/* `p1' is used as a marker of how far back a `on_failure_jump'
|
||||
can go without being ignored. It is normally equal to `p'
|
||||
@ -3497,29 +3471,12 @@ analyse_first (p, pend, fastmap, multibyte)
|
||||
3..9: <body>
|
||||
10: on_failure_jump 3
|
||||
as used for the *? operator. */
|
||||
unsigned char *p1 = p;
|
||||
|
||||
if (p >= pend)
|
||||
{
|
||||
if (path_can_be_null)
|
||||
return (RESET_FAIL_STACK (), 1);
|
||||
|
||||
/* We have reached the (effective) end of pattern. */
|
||||
if (PATTERN_STACK_EMPTY ())
|
||||
return (RESET_FAIL_STACK (), 0);
|
||||
|
||||
p = (unsigned char*) POP_PATTERN_OP ();
|
||||
path_can_be_null = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* We should never be about to go beyond the end of the pattern. */
|
||||
assert (p < pend);
|
||||
re_char *p1 = p;
|
||||
|
||||
switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
|
||||
{
|
||||
case succeed:
|
||||
p = pend;
|
||||
return 1;
|
||||
continue;
|
||||
|
||||
case duplicate:
|
||||
@ -3551,7 +3508,7 @@ analyse_first (p, pend, fastmap, multibyte)
|
||||
/* We could put all the chars except for \n (and maybe \0)
|
||||
but we don't bother since it is generally not worth it. */
|
||||
if (!fastmap) break;
|
||||
return (RESET_FAIL_STACK (), -1);
|
||||
return -1;
|
||||
|
||||
|
||||
case charset_not:
|
||||
@ -3626,7 +3583,7 @@ analyse_first (p, pend, fastmap, multibyte)
|
||||
#else /* emacs */
|
||||
/* This match depends on text properties. These end with
|
||||
aborting optimizations. */
|
||||
return (RESET_FAIL_STACK (), -1);
|
||||
return -1;
|
||||
|
||||
case categoryspec:
|
||||
case notcategoryspec:
|
||||
@ -3693,8 +3650,14 @@ analyse_first (p, pend, fastmap, multibyte)
|
||||
EXTRACT_NUMBER_AND_INCR (j, p);
|
||||
if (p + j <= p1)
|
||||
; /* Backward jump to be ignored. */
|
||||
else if (!PUSH_PATTERN_OP (p + j, fail_stack))
|
||||
return (RESET_FAIL_STACK (), -2);
|
||||
else
|
||||
{ /* We have to look down both arms.
|
||||
We first go down the "straight" path so as to minimize
|
||||
stack usage when going through alternatives. */
|
||||
int r = analyse_first (p, pend, fastmap, multibyte);
|
||||
if (r) return r;
|
||||
p += j;
|
||||
}
|
||||
continue;
|
||||
|
||||
|
||||
@ -3734,15 +3697,13 @@ analyse_first (p, pend, fastmap, multibyte)
|
||||
|
||||
/* Getting here means we have found the possible starting
|
||||
characters for one path of the pattern -- and that the empty
|
||||
string does not match. We need not follow this path further.
|
||||
Instead, look at the next alternative (remembered on the
|
||||
stack), or quit if no more. The test at the top of the loop
|
||||
does these things. */
|
||||
path_can_be_null = false;
|
||||
p = pend;
|
||||
string does not match. We need not follow this path further. */
|
||||
return 0;
|
||||
} /* while p */
|
||||
|
||||
return (RESET_FAIL_STACK (), 0);
|
||||
/* We reached the end without matching anything. */
|
||||
return 1;
|
||||
|
||||
} /* analyse_first */
|
||||
|
||||
/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
|
||||
@ -3777,8 +3738,6 @@ re_compile_fastmap (bufp)
|
||||
analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used,
|
||||
fastmap, RE_MULTIBYTE_P (bufp));
|
||||
bufp->can_be_null = (analysis != 0);
|
||||
if (analysis < -1)
|
||||
return analysis;
|
||||
return 0;
|
||||
} /* re_compile_fastmap */
|
||||
|
||||
@ -3921,8 +3880,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
|
||||
|
||||
/* Update the fastmap now if not correct already. */
|
||||
if (fastmap && !bufp->fastmap_accurate)
|
||||
if (re_compile_fastmap (bufp) == -2)
|
||||
return -2;
|
||||
re_compile_fastmap (bufp);
|
||||
|
||||
/* See whether the pattern is anchored. */
|
||||
anchored_start = (bufp->buffer[0] == begline);
|
||||
@ -3958,7 +3916,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
|
||||
if (fastmap && startpos < total_size && !bufp->can_be_null)
|
||||
{
|
||||
register re_char *d;
|
||||
register unsigned int buf_ch;
|
||||
register re_wchar_t buf_ch;
|
||||
|
||||
d = POS_ADDR_VSTRING (startpos);
|
||||
|
||||
@ -4191,9 +4149,9 @@ static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2,
|
||||
|
||||
/* If the operation is a match against one or more chars,
|
||||
return a pointer to the next operation, else return NULL. */
|
||||
static unsigned char *
|
||||
static re_char *
|
||||
skip_one_char (p)
|
||||
unsigned char *p;
|
||||
re_char *p;
|
||||
{
|
||||
switch (SWITCH_ENUM_CAST (*p++))
|
||||
{
|
||||
@ -4303,7 +4261,7 @@ mutually_exclusive_p (bufp, p1, p2)
|
||||
case endline:
|
||||
case exactn:
|
||||
{
|
||||
register unsigned int c
|
||||
register re_wchar_t c
|
||||
= (re_opcode_t) *p2 == endline ? '\n'
|
||||
: RE_STRING_CHAR(p2 + 2, pend - p2 - 2);
|
||||
|
||||
@ -4525,8 +4483,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
|
||||
{
|
||||
/* General temporaries. */
|
||||
int mcnt;
|
||||
size_t reg;
|
||||
boolean not;
|
||||
unsigned char *p1;
|
||||
|
||||
/* Just past the end of the corresponding string. */
|
||||
re_char *end1, *end2;
|
||||
@ -4545,8 +4503,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
|
||||
re_char *dfail;
|
||||
|
||||
/* Where we are in the pattern, and the end of the pattern. */
|
||||
unsigned char *p = bufp->buffer;
|
||||
register unsigned char *pend = p + bufp->used;
|
||||
re_char *p = bufp->buffer;
|
||||
re_char *pend = p + bufp->used;
|
||||
|
||||
/* We use this to map every character in the string. */
|
||||
RE_TRANSLATE_TYPE translate = bufp->translate;
|
||||
@ -4655,8 +4613,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
|
||||
/* Initialize subexpression text positions to -1 to mark ones that no
|
||||
start_memory/stop_memory has been seen for. Also initialize the
|
||||
register information struct. */
|
||||
for (mcnt = 1; mcnt < num_regs; mcnt++)
|
||||
regstart[mcnt] = regend[mcnt] = NULL;
|
||||
for (reg = 1; reg < num_regs; reg++)
|
||||
regstart[reg] = regend[reg] = NULL;
|
||||
|
||||
/* We move `string1' into `string2' if the latter's empty -- but not if
|
||||
`string1' is null. */
|
||||
@ -4758,10 +4716,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
|
||||
|
||||
DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
|
||||
|
||||
for (mcnt = 1; mcnt < num_regs; mcnt++)
|
||||
for (reg = 1; reg < num_regs; reg++)
|
||||
{
|
||||
best_regstart[mcnt] = regstart[mcnt];
|
||||
best_regend[mcnt] = regend[mcnt];
|
||||
best_regstart[reg] = regstart[reg];
|
||||
best_regend[reg] = regend[reg];
|
||||
}
|
||||
}
|
||||
goto fail;
|
||||
@ -4784,10 +4742,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
|
||||
dend = ((d >= string1 && d <= end1)
|
||||
? end_match_1 : end_match_2);
|
||||
|
||||
for (mcnt = 1; mcnt < num_regs; mcnt++)
|
||||
for (reg = 1; reg < num_regs; reg++)
|
||||
{
|
||||
regstart[mcnt] = best_regstart[mcnt];
|
||||
regend[mcnt] = best_regend[mcnt];
|
||||
regstart[reg] = best_regstart[reg];
|
||||
regend[reg] = best_regend[reg];
|
||||
}
|
||||
}
|
||||
} /* d != end_match_2 */
|
||||
@ -4847,16 +4805,16 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
|
||||
|
||||
/* Go through the first `min (num_regs, regs->num_regs)'
|
||||
registers, since that is all we initialized. */
|
||||
for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
|
||||
for (reg = 1; reg < MIN (num_regs, regs->num_regs); reg++)
|
||||
{
|
||||
if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
|
||||
regs->start[mcnt] = regs->end[mcnt] = -1;
|
||||
if (REG_UNSET (regstart[reg]) || REG_UNSET (regend[reg]))
|
||||
regs->start[reg] = regs->end[reg] = -1;
|
||||
else
|
||||
{
|
||||
regs->start[mcnt]
|
||||
= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
|
||||
regs->end[mcnt]
|
||||
= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
|
||||
regs->start[reg]
|
||||
= (regoff_t) POINTER_TO_OFFSET (regstart[reg]);
|
||||
regs->end[reg]
|
||||
= (regoff_t) POINTER_TO_OFFSET (regend[reg]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -4865,8 +4823,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
|
||||
we (re)allocated the registers, this is the case,
|
||||
because we always allocate enough to have at least one
|
||||
-1 at the end. */
|
||||
for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
|
||||
regs->start[mcnt] = regs->end[mcnt] = -1;
|
||||
for (reg = num_regs; reg < regs->num_regs; reg++)
|
||||
regs->start[reg] = regs->end[reg] = -1;
|
||||
} /* regs && !bufp->no_sub */
|
||||
|
||||
DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
|
||||
@ -4964,7 +4922,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
|
||||
case anychar:
|
||||
{
|
||||
int buf_charlen;
|
||||
unsigned int buf_ch;
|
||||
re_wchar_t buf_ch;
|
||||
|
||||
DEBUG_PRINT1 ("EXECUTING anychar.\n");
|
||||
|
||||
@ -4993,7 +4951,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
|
||||
|
||||
/* Start of actual range_table, or end of bitmap if there is no
|
||||
range table. */
|
||||
unsigned char *range_table;
|
||||
re_char *range_table;
|
||||
|
||||
/* Nonzero if there is a range table. */
|
||||
int range_table_exists;
|
||||
@ -5317,8 +5275,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
|
||||
DEBUG_PRINT3 ("EXECUTING on_failure_jump_smart %d (to %p).\n",
|
||||
mcnt, p + mcnt);
|
||||
{
|
||||
unsigned char *p1 = p; /* Next operation. */
|
||||
re_char *p1 = p; /* Next operation. */
|
||||
/* Please don't add casts to try and shut up GCC. */
|
||||
unsigned char *p2 = p + mcnt; /* Destination of the jump. */
|
||||
unsigned char *p3 = p - 3; /* Location of the opcode. */
|
||||
|
||||
p -= 3; /* Reset so that we will re-execute the
|
||||
instruction once it's been changed. */
|
||||
@ -5334,14 +5294,14 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
|
||||
{
|
||||
/* Use a fast `on_failure_keep_string_jump' loop. */
|
||||
DEBUG_PRINT1 (" smart exclusive => fast loop.\n");
|
||||
*p = (unsigned char) on_failure_keep_string_jump;
|
||||
*p3 = (unsigned char) on_failure_keep_string_jump;
|
||||
STORE_NUMBER (p2 - 2, mcnt + 3);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Default to a safe `on_failure_jump' loop. */
|
||||
DEBUG_PRINT1 (" smart default => slow loop.\n");
|
||||
*p = (unsigned char) on_failure_jump;
|
||||
*p3 = (unsigned char) on_failure_jump;
|
||||
}
|
||||
DEBUG_STATEMENT (debug -= 2);
|
||||
}
|
||||
@ -5361,17 +5321,18 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
|
||||
/* Have to succeed matching what follows at least n times.
|
||||
After that, handle like `on_failure_jump'. */
|
||||
case succeed_n:
|
||||
/* Signedness doesn't matter since we only compare MCNT to 0. */
|
||||
EXTRACT_NUMBER (mcnt, p + 2);
|
||||
DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
|
||||
|
||||
/* Originally, mcnt is how many times we HAVE to succeed. */
|
||||
if (mcnt != 0)
|
||||
{
|
||||
/* Please don't add a cast to try and shut up GCC. */
|
||||
unsigned char *p2 = p + 2; /* Location of the counter. */
|
||||
mcnt--;
|
||||
p += 2;
|
||||
PUSH_FAILURE_COUNT (p);
|
||||
DEBUG_PRINT3 (" Setting %p to %d.\n", p, mcnt);
|
||||
STORE_NUMBER_AND_INCR (p, mcnt);
|
||||
p += 4;
|
||||
PUSH_NUMBER (p2, mcnt);
|
||||
}
|
||||
else
|
||||
/* The two bytes encoding mcnt == 0 are two no_op opcodes. */
|
||||
@ -5379,15 +5340,17 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
|
||||
break;
|
||||
|
||||
case jump_n:
|
||||
/* Signedness doesn't matter since we only compare MCNT to 0. */
|
||||
EXTRACT_NUMBER (mcnt, p + 2);
|
||||
DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
|
||||
|
||||
/* Originally, this is how many times we CAN jump. */
|
||||
if (mcnt != 0)
|
||||
{
|
||||
/* Please don't add a cast to try and shut up GCC. */
|
||||
unsigned char *p2 = p + 2; /* Location of the counter. */
|
||||
mcnt--;
|
||||
PUSH_FAILURE_COUNT (p + 2);
|
||||
STORE_NUMBER (p + 2, mcnt);
|
||||
PUSH_NUMBER (p2, mcnt);
|
||||
goto unconditional_jump;
|
||||
}
|
||||
/* If don't have to jump any more, skip over the rest of command. */
|
||||
@ -5397,14 +5360,16 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
|
||||
|
||||
case set_number_at:
|
||||
{
|
||||
unsigned char *p2; /* Location of the counter. */
|
||||
DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
|
||||
|
||||
EXTRACT_NUMBER_AND_INCR (mcnt, p);
|
||||
p1 = p + mcnt;
|
||||
/* Please don't add a cast to try and shut up GCC. */
|
||||
p2 = p + mcnt;
|
||||
/* Signedness doesn't matter since we only copy MCNT's bits . */
|
||||
EXTRACT_NUMBER_AND_INCR (mcnt, p);
|
||||
DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt);
|
||||
PUSH_FAILURE_COUNT (p1);
|
||||
STORE_NUMBER (p1, mcnt);
|
||||
DEBUG_PRINT3 (" Setting %p to %d.\n", p2, mcnt);
|
||||
PUSH_NUMBER (p2, mcnt);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -5422,7 +5387,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
|
||||
{
|
||||
/* C1 is the character before D, S1 is the syntax of C1, C2
|
||||
is the character at D, and S2 is the syntax of C2. */
|
||||
int c1, c2, s1, s2;
|
||||
re_wchar_t c1, c2;
|
||||
int s1, s2;
|
||||
#ifdef emacs
|
||||
int offset = PTR_TO_OFFSET (d - 1);
|
||||
int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
|
||||
@ -5461,7 +5427,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
|
||||
{
|
||||
/* C1 is the character before D, S1 is the syntax of C1, C2
|
||||
is the character at D, and S2 is the syntax of C2. */
|
||||
int c1, c2, s1, s2;
|
||||
re_wchar_t c1, c2;
|
||||
int s1, s2;
|
||||
#ifdef emacs
|
||||
int offset = PTR_TO_OFFSET (d);
|
||||
int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
|
||||
@ -5504,7 +5471,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
|
||||
{
|
||||
/* C1 is the character before D, S1 is the syntax of C1, C2
|
||||
is the character at D, and S2 is the syntax of C2. */
|
||||
int c1, c2, s1, s2;
|
||||
re_wchar_t c1, c2;
|
||||
int s1, s2;
|
||||
#ifdef emacs
|
||||
int offset = PTR_TO_OFFSET (d) - 1;
|
||||
int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
|
||||
@ -5549,7 +5517,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
|
||||
}
|
||||
#endif
|
||||
{
|
||||
int c, len;
|
||||
int len;
|
||||
re_wchar_t c;
|
||||
|
||||
c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
|
||||
|
||||
@ -5585,7 +5554,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
|
||||
DEBUG_PRINT3 ("EXECUTING %scategoryspec %d.\n", not?"not":"", mcnt);
|
||||
PREFETCH ();
|
||||
{
|
||||
int c, len;
|
||||
int len;
|
||||
re_wchar_t c;
|
||||
|
||||
c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
|
||||
|
||||
if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
|
||||
@ -5607,8 +5578,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
|
||||
IMMEDIATE_QUIT_CHECK;
|
||||
if (!FAIL_STACK_EMPTY ())
|
||||
{
|
||||
re_char *str;
|
||||
unsigned char *pat;
|
||||
re_char *str, *pat;
|
||||
/* A restart point is known. Restore to that state. */
|
||||
DEBUG_PRINT1 ("\nFAIL:\n");
|
||||
POP_FAILURE_POINT (str, pat);
|
||||
@ -5678,7 +5648,7 @@ bcmp_translate (s1, s2, len, translate, multibyte)
|
||||
while (p1 < p1_end && p2 < p2_end)
|
||||
{
|
||||
int p1_charlen, p2_charlen;
|
||||
int p1_ch, p2_ch;
|
||||
re_wchar_t p1_ch, p2_ch;
|
||||
|
||||
p1_ch = RE_STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
|
||||
p2_ch = RE_STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);
|
||||
|
Loading…
x
Reference in New Issue
Block a user