1
0
mirror of https://git.savannah.gnu.org/git/emacs.git synced 2025-02-02 20:16:25 +00:00

More unsigned char' -> re_char' changes.

Also change several `int' into `re_wchar_t'.
(PATTERN_STACK_EMPTY, PUSH_PATTERN_OP, POP_PATTERN_OP): Remove.
(PUSH_FAILURE_POINTER): Don't cast any more.
(POP_FAILURE_REG_OR_COUNT): Remove the cast that strips `const'.
We want GCC to complain, since this piece of code makes
re_match non-reentrant, which *should* be fixed.
(GET_BUFFER_SPACE): Use size_t rather than unsigned long.
(EXTEND_BUFFER): Use RETALLOC.
(SET_LIST_BIT): Don't cast.
(re_wchar_t): New type.
(re_iswctype, re_wctype_to_bit): Make it crystal clear to GCC
that those two functions will always properly return.
(IMMEDIATE_QUIT_CHECK): Cast to void.
(analyse_first): Use recursion rather than an explicit stack.
(re_compile_fastmap): Can't fail anymore.
(re_search_2): Don't check re_compile_fastmap for failure.
(PUSH_NUMBER): Renamed from PUSH_FAILURE_COUNT.
Now also sets the new value (passed in a new argument).
(re_match_2_internal): Use it.
Also, use a new var `reg' of type size_t when looping through regs
rather than reuse the inappropriate `mcnt'.
This commit is contained in:
Stefan Monnier 2000-10-26 00:45:01 +00:00
parent d97151cb57
commit 0161849810

View File

@ -22,10 +22,9 @@
/* TODO:
- structure the opcode space into opcode+flag.
- merge with glibc's regex.[ch].
- replace succeed_n + jump_n with a combined operation so that the counter
can simply be decremented when popping the failure_point without having
to stack up failure_count entries.
*/
- replace (succeed_n + jump_n + set_number_at) with something that doesn't
need to modify the compiled regexp.
*/
/* AIX requires this to be the first thing in the file. */
#if defined _AIX && !defined REGEX_MALLOC
@ -553,7 +552,7 @@ typedef enum
is followed by a range table:
2 bytes of flags for character sets (low 8 bits, high 8 bits)
See RANGE_TABLE_WORK_BITS below.
2 bytes, the number of pairs that follow
2 bytes, the number of pairs that follow (upto 32767)
pairs, each 2 multibyte characters,
each multibyte character represented as 3 bytes. */
charset,
@ -700,7 +699,7 @@ static void extract_number _RE_ARGS ((int *dest, re_char *source));
static void
extract_number (dest, source)
int *dest;
unsigned char *source;
re_char *source;
{
int temp = SIGN_EXTEND_CHAR (*(source + 1));
*dest = *source & 0377;
@ -729,7 +728,7 @@ static void extract_number_and_incr _RE_ARGS ((int *destination,
static void
extract_number_and_incr (destination, source)
int *destination;
unsigned char **source;
re_char **source;
{
extract_number (destination, *source);
*source += 2;
@ -803,9 +802,9 @@ extract_number_and_incr (destination, source)
#define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count) \
do \
{ \
int range_start, range_end; \
unsigned char *p; \
unsigned char *range_table_end \
re_wchar_t range_start, range_end; \
re_char *p; \
re_char *range_table_end \
= CHARSET_RANGE_TABLE_END ((range_table), (count)); \
\
for (p = (range_table); p < range_table_end; p += 2 * 3) \
@ -829,8 +828,8 @@ extract_number_and_incr (destination, source)
{ \
/* Number of ranges in range table. */ \
int count; \
unsigned char *range_table = CHARSET_RANGE_TABLE (charset); \
\
re_char *range_table = CHARSET_RANGE_TABLE (charset); \
\
EXTRACT_NUMBER_AND_INCR (count, range_table); \
CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count); \
} \
@ -899,12 +898,12 @@ print_fastmap (fastmap)
void
print_partial_compiled_pattern (start, end)
unsigned char *start;
unsigned char *end;
re_char *start;
re_char *end;
{
int mcnt, mcnt2;
unsigned char *p = start;
unsigned char *pend = end;
re_char *p = start;
re_char *pend = end;
if (start == NULL)
{
@ -1142,7 +1141,7 @@ void
print_compiled_pattern (bufp)
struct re_pattern_buffer *bufp;
{
unsigned char *buffer = bufp->buffer;
re_char *buffer = bufp->buffer;
print_partial_compiled_pattern (buffer, buffer + bufp->used);
printf ("%ld bytes used/%ld bytes allocated.\n",
@ -1326,7 +1325,7 @@ size_t re_max_failures = 4000;
union fail_stack_elt
{
const unsigned char *pointer;
re_char *pointer;
/* This should be the biggest `int' that's no bigger than a pointer. */
long integer;
};
@ -1341,7 +1340,6 @@ typedef struct
size_t frame; /* Offset of the cur constructed frame. */
} fail_stack_type;
#define PATTERN_STACK_EMPTY() (fail_stack.avail == 0)
#define FAIL_STACK_EMPTY() (fail_stack.frame == 0)
#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
@ -1413,22 +1411,11 @@ typedef struct
1)))
/* Push pointer POINTER on FAIL_STACK.
Return 1 if was able to do so and 0 if ran out of memory allocating
space to do so. */
#define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
((FAIL_STACK_FULL () \
&& !GROW_FAIL_STACK (FAIL_STACK)) \
? 0 \
: ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
1))
#define POP_PATTERN_OP() POP_FAILURE_POINTER ()
/* Push a pointer value onto the failure stack.
Assumes the variable `fail_stack'. Probably should only
be called from within `PUSH_FAILURE_POINT'. */
#define PUSH_FAILURE_POINTER(item) \
fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
fail_stack.stack[fail_stack.avail++].pointer = (item)
/* This pushes an integer-valued item onto the failure stack.
Assumes the variable `fail_stack'. Probably should only
@ -1478,16 +1465,19 @@ do { \
PUSH_FAILURE_INT (num); \
} while (0)
#define PUSH_FAILURE_COUNT(ptr) \
/* Change the counter's value to VAL, but make sure that it will
be reset when backtracking. */
#define PUSH_NUMBER(ptr,val) \
do { \
char *destination; \
int c; \
ENSURE_FAIL_STACK(3); \
EXTRACT_NUMBER (c, ptr); \
DEBUG_PRINT3 (" Push counter %p = %d\n", ptr, c); \
DEBUG_PRINT4 (" Push number %p = %d -> %d\n", ptr, c, val); \
PUSH_FAILURE_INT (c); \
PUSH_FAILURE_POINTER (ptr); \
PUSH_FAILURE_INT (-1); \
STORE_NUMBER (ptr, val); \
} while (0)
/* Pop a saved register off the stack. */
@ -1497,7 +1487,9 @@ do { \
if (reg == -1) \
{ \
/* It's a counter. */ \
unsigned char *ptr = (unsigned char*) POP_FAILURE_POINTER (); \
/* Here, we discard `const', which makes re_match non-reentrant. \
Gcc gives a warning for it, which is good. */ \
unsigned char *ptr = POP_FAILURE_POINTER (); \
reg = POP_FAILURE_INT (); \
STORE_NUMBER (ptr, reg); \
DEBUG_PRINT3 (" Pop counter %p = %d\n", ptr, reg); \
@ -1603,14 +1595,14 @@ do { \
while (fail_stack.frame < fail_stack.avail) \
POP_FAILURE_REG_OR_COUNT (); \
\
pat = (unsigned char *) POP_FAILURE_POINTER (); \
pat = POP_FAILURE_POINTER (); \
DEBUG_PRINT2 (" Popping pattern %p: ", pat); \
DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
\
/* If the saved string location is NULL, it came from an \
on_failure_keep_string_jump opcode, and we want to throw away the \
saved NULL, thus retaining our current position in the string. */ \
str = (re_char *) POP_FAILURE_POINTER (); \
str = POP_FAILURE_POINTER (); \
DEBUG_PRINT2 (" Popping string %p: `", str); \
DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
DEBUG_PRINT1 ("'\n"); \
@ -1641,20 +1633,18 @@ static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
int arg, unsigned char *end));
static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
int arg1, int arg2, unsigned char *end));
static boolean at_begline_loc_p _RE_ARGS ((const unsigned char *pattern,
const unsigned char *p,
static boolean at_begline_loc_p _RE_ARGS ((re_char *pattern,
re_char *p,
reg_syntax_t syntax));
static boolean at_endline_loc_p _RE_ARGS ((const unsigned char *p,
const unsigned char *pend,
static boolean at_endline_loc_p _RE_ARGS ((re_char *p,
re_char *pend,
reg_syntax_t syntax));
static unsigned char *skip_one_char _RE_ARGS ((unsigned char *p));
static int analyse_first _RE_ARGS ((unsigned char *p, unsigned char *pend,
static re_char *skip_one_char _RE_ARGS ((re_char *p));
static int analyse_first _RE_ARGS ((re_char *p, re_char *pend,
char *fastmap, const int multibyte));
/* Fetch the next character in the uncompiled pattern---translating it
if necessary. Also cast from a signed character in the constant
string passed to us by the user to an unsigned char that we can use
as an array index (in, e.g., `translate'). */
if necessary. */
#define PATFETCH(c) \
do { \
PATFETCH_RAW (c); \
@ -1689,7 +1679,7 @@ static int analyse_first _RE_ARGS ((unsigned char *p, unsigned char *pend,
/* Make sure we have at least N more bytes of space in buffer. */
#define GET_BUFFER_SPACE(n) \
while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \
while ((size_t) (b - bufp->buffer + (n)) > bufp->allocated) \
EXTEND_BUFFER ()
/* Make sure we have one more byte of buffer space and then add C to it. */
@ -1778,13 +1768,13 @@ static int analyse_first _RE_ARGS ((unsigned char *p, unsigned char *pend,
#endif
#define EXTEND_BUFFER() \
do { \
unsigned char *old_buffer = bufp->buffer; \
re_char *old_buffer = bufp->buffer; \
if (bufp->allocated == MAX_BUF_SIZE) \
return REG_ESIZE; \
bufp->allocated <<= 1; \
if (bufp->allocated > MAX_BUF_SIZE) \
bufp->allocated = MAX_BUF_SIZE; \
bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
RETALLOC (bufp->buffer, bufp->allocated, unsigned char); \
if (bufp->buffer == NULL) \
return REG_ESPACE; \
/* If the buffer moved, move all the pointers into it. */ \
@ -1907,9 +1897,7 @@ struct range_table_work_area
/* Set the bit for character C in a list. */
#define SET_LIST_BIT(c) \
(b[((unsigned char) (c)) / BYTEWIDTH] \
|= 1 << (((unsigned char) c) % BYTEWIDTH))
#define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
/* Get the next unsigned number in the uncompiled pattern. */
@ -1940,6 +1928,7 @@ struct range_table_work_area
# define CHAR_CLASS_MAX_LENGTH 256
# endif
typedef wctype_t re_wctype_t;
typedef wchar_t re_wchar_t;
# define re_wctype wctype
# define re_iswctype iswctype
# define re_wctype_to_bit(cc) 0
@ -1947,7 +1936,7 @@ typedef wctype_t re_wctype_t;
# define CHAR_CLASS_MAX_LENGTH 9 /* Namely, `multibyte'. */
# define btowc(c) c
/* Character classes' indices. */
/* Character classes. */
typedef enum { RECC_ERROR = 0,
RECC_ALNUM, RECC_ALPHA, RECC_WORD,
RECC_GRAPH, RECC_PRINT,
@ -1959,10 +1948,12 @@ typedef enum { RECC_ERROR = 0,
RECC_ASCII, RECC_UNIBYTE
} re_wctype_t;
typedef int re_wchar_t;
/* Map a string to the char class it names (if any). */
static re_wctype_t
re_wctype (string)
unsigned char *string;
re_char *string;
{
if (STREQ (string, "alnum")) return RECC_ALNUM;
else if (STREQ (string, "alpha")) return RECC_ALPHA;
@ -1990,27 +1981,30 @@ re_iswctype (ch, cc)
int ch;
re_wctype_t cc;
{
boolean ret = false;
switch (cc)
{
case RECC_ALNUM: return ISALNUM (ch);
case RECC_ALPHA: return ISALPHA (ch);
case RECC_BLANK: return ISBLANK (ch);
case RECC_CNTRL: return ISCNTRL (ch);
case RECC_DIGIT: return ISDIGIT (ch);
case RECC_GRAPH: return ISGRAPH (ch);
case RECC_LOWER: return ISLOWER (ch);
case RECC_PRINT: return ISPRINT (ch);
case RECC_PUNCT: return ISPUNCT (ch);
case RECC_SPACE: return ISSPACE (ch);
case RECC_UPPER: return ISUPPER (ch);
case RECC_XDIGIT: return ISXDIGIT (ch);
case RECC_ASCII: return IS_REAL_ASCII (ch);
case RECC_NONASCII: return !IS_REAL_ASCII (ch);
case RECC_UNIBYTE: return ISUNIBYTE (ch);
case RECC_MULTIBYTE: return !ISUNIBYTE (ch);
case RECC_WORD: return ISWORD (ch);
case RECC_ERROR: return false;
case RECC_ALNUM: ret = ISALNUM (ch);
case RECC_ALPHA: ret = ISALPHA (ch);
case RECC_BLANK: ret = ISBLANK (ch);
case RECC_CNTRL: ret = ISCNTRL (ch);
case RECC_DIGIT: ret = ISDIGIT (ch);
case RECC_GRAPH: ret = ISGRAPH (ch);
case RECC_LOWER: ret = ISLOWER (ch);
case RECC_PRINT: ret = ISPRINT (ch);
case RECC_PUNCT: ret = ISPUNCT (ch);
case RECC_SPACE: ret = ISSPACE (ch);
case RECC_UPPER: ret = ISUPPER (ch);
case RECC_XDIGIT: ret = ISXDIGIT (ch);
case RECC_ASCII: ret = IS_REAL_ASCII (ch);
case RECC_NONASCII: ret = !IS_REAL_ASCII (ch);
case RECC_UNIBYTE: ret = ISUNIBYTE (ch);
case RECC_MULTIBYTE: ret = !ISUNIBYTE (ch);
case RECC_WORD: ret = ISWORD (ch);
case RECC_ERROR: ret = false;
}
return ret;
}
/* Return a bit-pattern to use in the range-table bits to match multibyte
@ -2019,18 +2013,21 @@ static int
re_wctype_to_bit (cc)
re_wctype_t cc;
{
int ret = 0;
switch (cc)
{
case RECC_NONASCII: case RECC_PRINT: case RECC_GRAPH:
case RECC_MULTIBYTE: return BIT_MULTIBYTE;
case RECC_ALPHA: case RECC_ALNUM: case RECC_WORD: return BIT_WORD;
case RECC_LOWER: return BIT_LOWER;
case RECC_UPPER: return BIT_UPPER;
case RECC_PUNCT: return BIT_PUNCT;
case RECC_SPACE: return BIT_SPACE;
case RECC_MULTIBYTE: ret = BIT_MULTIBYTE;
case RECC_ALPHA: case RECC_ALNUM: case RECC_WORD: ret = BIT_WORD;
case RECC_LOWER: ret = BIT_LOWER;
case RECC_UPPER: ret = BIT_UPPER;
case RECC_PUNCT: ret = BIT_PUNCT;
case RECC_SPACE: ret = BIT_SPACE;
case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: ret = 0;
}
return ret;
}
#endif
@ -2042,7 +2039,7 @@ extern int immediate_quit;
if (immediate_quit) QUIT; \
} while (0)
#else
# define IMMEDIATE_QUIT_CHECK (0)
# define IMMEDIATE_QUIT_CHECK ((void)0)
#endif
#ifndef MATCH_MAY_ALLOCATE
@ -2129,10 +2126,8 @@ regex_compile (pattern, size, syntax, bufp)
reg_syntax_t syntax;
struct re_pattern_buffer *bufp;
{
/* We fetch characters from PATTERN here. Even though PATTERN is
`char *' (i.e., signed), we declare these variables as unsigned, so
they can be reliably used as array indices. */
register unsigned int c, c1;
/* We fetch characters from PATTERN here. */
register re_wchar_t c, c1;
/* A random temporary spot in PATTERN. */
re_char *p1;
@ -2359,6 +2354,7 @@ regex_compile (pattern, size, syntax, bufp)
boolean simple = skip_one_char (laststart) == b;
unsigned int startoffset = 0;
re_opcode_t ofj =
/* Check if the loop can match the empty string. */
(simple || !analyse_first (laststart, b, NULL, 0)) ?
on_failure_jump : on_failure_jump_loop;
assert (skip_one_char (laststart) <= b);
@ -2629,7 +2625,7 @@ regex_compile (pattern, size, syntax, bufp)
if (SINGLE_BYTE_CHAR_P (c))
/* ... into bitmap. */
{
unsigned this_char;
re_wchar_t this_char;
int range_start = c, range_end = c1;
/* If the start is after the end, the range is empty. */
@ -3365,10 +3361,10 @@ insert_op2 (op, loc, arg1, arg2, end)
static boolean
at_begline_loc_p (pattern, p, syntax)
const unsigned char *pattern, *p;
re_char *pattern, *p;
reg_syntax_t syntax;
{
const unsigned char *prev = p - 2;
re_char *prev = p - 2;
boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
return
@ -3389,12 +3385,12 @@ at_begline_loc_p (pattern, p, syntax)
static boolean
at_endline_loc_p (p, pend, syntax)
const unsigned char *p, *pend;
re_char *p, *pend;
reg_syntax_t syntax;
{
const unsigned char *next = p;
re_char *next = p;
boolean next_backslash = *next == '\\';
const unsigned char *next_next = p + 1 < pend ? p + 1 : 0;
re_char *next_next = p + 1 < pend ? p + 1 : 0;
return
/* Before a subexpression? */
@ -3433,36 +3429,16 @@ group_in_compile_stack (compile_stack, regnum)
Return 1 if p..pend might match the empty string.
Return 0 if p..pend matches at least one char.
Return -1 if p..pend matches at least one char, but fastmap was not
updated accurately.
Return -2 if an error occurred. */
Return -1 if fastmap was not updated accurately. */
static int
analyse_first (p, pend, fastmap, multibyte)
unsigned char *p, *pend;
re_char *p, *pend;
char *fastmap;
const int multibyte;
{
int j, k;
boolean not;
#ifdef MATCH_MAY_ALLOCATE
fail_stack_type fail_stack;
#endif
#ifndef REGEX_MALLOC
char *destination;
#endif
#if defined REL_ALLOC && defined REGEX_MALLOC
/* This holds the pointer to the failure stack, when
it is allocated relocatably. */
fail_stack_elt_t *failure_stack_ptr;
#endif
/* Assume that each path through the pattern can be null until
proven otherwise. We set this false at the bottom of switch
statement, to which we get only if a particular path doesn't
match the empty string. */
boolean path_can_be_null = true;
/* If all elements for base leading-codes in fastmap is set, this
flag is set true. */
@ -3470,8 +3446,6 @@ analyse_first (p, pend, fastmap, multibyte)
assert (p);
INIT_FAIL_STACK ();
/* The loop below works as follows:
- It has a working-list kept in the PATTERN_STACK and which basically
starts by only containing a pointer to the first operation.
@ -3487,7 +3461,7 @@ analyse_first (p, pend, fastmap, multibyte)
so that `p' is monotonically increasing. More to the point, we
never set `p' (or push) anything `<= p1'. */
while (1)
while (p < pend)
{
/* `p1' is used as a marker of how far back a `on_failure_jump'
can go without being ignored. It is normally equal to `p'
@ -3497,29 +3471,12 @@ analyse_first (p, pend, fastmap, multibyte)
3..9: <body>
10: on_failure_jump 3
as used for the *? operator. */
unsigned char *p1 = p;
if (p >= pend)
{
if (path_can_be_null)
return (RESET_FAIL_STACK (), 1);
/* We have reached the (effective) end of pattern. */
if (PATTERN_STACK_EMPTY ())
return (RESET_FAIL_STACK (), 0);
p = (unsigned char*) POP_PATTERN_OP ();
path_can_be_null = true;
continue;
}
/* We should never be about to go beyond the end of the pattern. */
assert (p < pend);
re_char *p1 = p;
switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
{
case succeed:
p = pend;
return 1;
continue;
case duplicate:
@ -3551,7 +3508,7 @@ analyse_first (p, pend, fastmap, multibyte)
/* We could put all the chars except for \n (and maybe \0)
but we don't bother since it is generally not worth it. */
if (!fastmap) break;
return (RESET_FAIL_STACK (), -1);
return -1;
case charset_not:
@ -3626,7 +3583,7 @@ analyse_first (p, pend, fastmap, multibyte)
#else /* emacs */
/* This match depends on text properties. These end with
aborting optimizations. */
return (RESET_FAIL_STACK (), -1);
return -1;
case categoryspec:
case notcategoryspec:
@ -3693,8 +3650,14 @@ analyse_first (p, pend, fastmap, multibyte)
EXTRACT_NUMBER_AND_INCR (j, p);
if (p + j <= p1)
; /* Backward jump to be ignored. */
else if (!PUSH_PATTERN_OP (p + j, fail_stack))
return (RESET_FAIL_STACK (), -2);
else
{ /* We have to look down both arms.
We first go down the "straight" path so as to minimize
stack usage when going through alternatives. */
int r = analyse_first (p, pend, fastmap, multibyte);
if (r) return r;
p += j;
}
continue;
@ -3734,15 +3697,13 @@ analyse_first (p, pend, fastmap, multibyte)
/* Getting here means we have found the possible starting
characters for one path of the pattern -- and that the empty
string does not match. We need not follow this path further.
Instead, look at the next alternative (remembered on the
stack), or quit if no more. The test at the top of the loop
does these things. */
path_can_be_null = false;
p = pend;
string does not match. We need not follow this path further. */
return 0;
} /* while p */
return (RESET_FAIL_STACK (), 0);
/* We reached the end without matching anything. */
return 1;
} /* analyse_first */
/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
@ -3777,8 +3738,6 @@ re_compile_fastmap (bufp)
analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used,
fastmap, RE_MULTIBYTE_P (bufp));
bufp->can_be_null = (analysis != 0);
if (analysis < -1)
return analysis;
return 0;
} /* re_compile_fastmap */
@ -3921,8 +3880,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
/* Update the fastmap now if not correct already. */
if (fastmap && !bufp->fastmap_accurate)
if (re_compile_fastmap (bufp) == -2)
return -2;
re_compile_fastmap (bufp);
/* See whether the pattern is anchored. */
anchored_start = (bufp->buffer[0] == begline);
@ -3958,7 +3916,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
if (fastmap && startpos < total_size && !bufp->can_be_null)
{
register re_char *d;
register unsigned int buf_ch;
register re_wchar_t buf_ch;
d = POS_ADDR_VSTRING (startpos);
@ -4191,9 +4149,9 @@ static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2,
/* If the operation is a match against one or more chars,
return a pointer to the next operation, else return NULL. */
static unsigned char *
static re_char *
skip_one_char (p)
unsigned char *p;
re_char *p;
{
switch (SWITCH_ENUM_CAST (*p++))
{
@ -4303,7 +4261,7 @@ mutually_exclusive_p (bufp, p1, p2)
case endline:
case exactn:
{
register unsigned int c
register re_wchar_t c
= (re_opcode_t) *p2 == endline ? '\n'
: RE_STRING_CHAR(p2 + 2, pend - p2 - 2);
@ -4525,8 +4483,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
{
/* General temporaries. */
int mcnt;
size_t reg;
boolean not;
unsigned char *p1;
/* Just past the end of the corresponding string. */
re_char *end1, *end2;
@ -4545,8 +4503,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
re_char *dfail;
/* Where we are in the pattern, and the end of the pattern. */
unsigned char *p = bufp->buffer;
register unsigned char *pend = p + bufp->used;
re_char *p = bufp->buffer;
re_char *pend = p + bufp->used;
/* We use this to map every character in the string. */
RE_TRANSLATE_TYPE translate = bufp->translate;
@ -4655,8 +4613,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
/* Initialize subexpression text positions to -1 to mark ones that no
start_memory/stop_memory has been seen for. Also initialize the
register information struct. */
for (mcnt = 1; mcnt < num_regs; mcnt++)
regstart[mcnt] = regend[mcnt] = NULL;
for (reg = 1; reg < num_regs; reg++)
regstart[reg] = regend[reg] = NULL;
/* We move `string1' into `string2' if the latter's empty -- but not if
`string1' is null. */
@ -4758,10 +4716,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
for (mcnt = 1; mcnt < num_regs; mcnt++)
for (reg = 1; reg < num_regs; reg++)
{
best_regstart[mcnt] = regstart[mcnt];
best_regend[mcnt] = regend[mcnt];
best_regstart[reg] = regstart[reg];
best_regend[reg] = regend[reg];
}
}
goto fail;
@ -4784,10 +4742,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
dend = ((d >= string1 && d <= end1)
? end_match_1 : end_match_2);
for (mcnt = 1; mcnt < num_regs; mcnt++)
for (reg = 1; reg < num_regs; reg++)
{
regstart[mcnt] = best_regstart[mcnt];
regend[mcnt] = best_regend[mcnt];
regstart[reg] = best_regstart[reg];
regend[reg] = best_regend[reg];
}
}
} /* d != end_match_2 */
@ -4847,16 +4805,16 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
/* Go through the first `min (num_regs, regs->num_regs)'
registers, since that is all we initialized. */
for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
for (reg = 1; reg < MIN (num_regs, regs->num_regs); reg++)
{
if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
regs->start[mcnt] = regs->end[mcnt] = -1;
if (REG_UNSET (regstart[reg]) || REG_UNSET (regend[reg]))
regs->start[reg] = regs->end[reg] = -1;
else
{
regs->start[mcnt]
= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
regs->end[mcnt]
= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
regs->start[reg]
= (regoff_t) POINTER_TO_OFFSET (regstart[reg]);
regs->end[reg]
= (regoff_t) POINTER_TO_OFFSET (regend[reg]);
}
}
@ -4865,8 +4823,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
we (re)allocated the registers, this is the case,
because we always allocate enough to have at least one
-1 at the end. */
for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
regs->start[mcnt] = regs->end[mcnt] = -1;
for (reg = num_regs; reg < regs->num_regs; reg++)
regs->start[reg] = regs->end[reg] = -1;
} /* regs && !bufp->no_sub */
DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
@ -4964,7 +4922,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
case anychar:
{
int buf_charlen;
unsigned int buf_ch;
re_wchar_t buf_ch;
DEBUG_PRINT1 ("EXECUTING anychar.\n");
@ -4993,7 +4951,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
/* Start of actual range_table, or end of bitmap if there is no
range table. */
unsigned char *range_table;
re_char *range_table;
/* Nonzero if there is a range table. */
int range_table_exists;
@ -5317,8 +5275,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
DEBUG_PRINT3 ("EXECUTING on_failure_jump_smart %d (to %p).\n",
mcnt, p + mcnt);
{
unsigned char *p1 = p; /* Next operation. */
re_char *p1 = p; /* Next operation. */
/* Please don't add casts to try and shut up GCC. */
unsigned char *p2 = p + mcnt; /* Destination of the jump. */
unsigned char *p3 = p - 3; /* Location of the opcode. */
p -= 3; /* Reset so that we will re-execute the
instruction once it's been changed. */
@ -5334,14 +5294,14 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
{
/* Use a fast `on_failure_keep_string_jump' loop. */
DEBUG_PRINT1 (" smart exclusive => fast loop.\n");
*p = (unsigned char) on_failure_keep_string_jump;
*p3 = (unsigned char) on_failure_keep_string_jump;
STORE_NUMBER (p2 - 2, mcnt + 3);
}
else
{
/* Default to a safe `on_failure_jump' loop. */
DEBUG_PRINT1 (" smart default => slow loop.\n");
*p = (unsigned char) on_failure_jump;
*p3 = (unsigned char) on_failure_jump;
}
DEBUG_STATEMENT (debug -= 2);
}
@ -5361,17 +5321,18 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
/* Have to succeed matching what follows at least n times.
After that, handle like `on_failure_jump'. */
case succeed_n:
/* Signedness doesn't matter since we only compare MCNT to 0. */
EXTRACT_NUMBER (mcnt, p + 2);
DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
/* Originally, mcnt is how many times we HAVE to succeed. */
if (mcnt != 0)
{
/* Please don't add a cast to try and shut up GCC. */
unsigned char *p2 = p + 2; /* Location of the counter. */
mcnt--;
p += 2;
PUSH_FAILURE_COUNT (p);
DEBUG_PRINT3 (" Setting %p to %d.\n", p, mcnt);
STORE_NUMBER_AND_INCR (p, mcnt);
p += 4;
PUSH_NUMBER (p2, mcnt);
}
else
/* The two bytes encoding mcnt == 0 are two no_op opcodes. */
@ -5379,15 +5340,17 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
break;
case jump_n:
/* Signedness doesn't matter since we only compare MCNT to 0. */
EXTRACT_NUMBER (mcnt, p + 2);
DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
/* Originally, this is how many times we CAN jump. */
if (mcnt != 0)
{
/* Please don't add a cast to try and shut up GCC. */
unsigned char *p2 = p + 2; /* Location of the counter. */
mcnt--;
PUSH_FAILURE_COUNT (p + 2);
STORE_NUMBER (p + 2, mcnt);
PUSH_NUMBER (p2, mcnt);
goto unconditional_jump;
}
/* If don't have to jump any more, skip over the rest of command. */
@ -5397,14 +5360,16 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
case set_number_at:
{
unsigned char *p2; /* Location of the counter. */
DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
EXTRACT_NUMBER_AND_INCR (mcnt, p);
p1 = p + mcnt;
/* Please don't add a cast to try and shut up GCC. */
p2 = p + mcnt;
/* Signedness doesn't matter since we only copy MCNT's bits . */
EXTRACT_NUMBER_AND_INCR (mcnt, p);
DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt);
PUSH_FAILURE_COUNT (p1);
STORE_NUMBER (p1, mcnt);
DEBUG_PRINT3 (" Setting %p to %d.\n", p2, mcnt);
PUSH_NUMBER (p2, mcnt);
break;
}
@ -5422,7 +5387,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
{
/* C1 is the character before D, S1 is the syntax of C1, C2
is the character at D, and S2 is the syntax of C2. */
int c1, c2, s1, s2;
re_wchar_t c1, c2;
int s1, s2;
#ifdef emacs
int offset = PTR_TO_OFFSET (d - 1);
int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
@ -5461,7 +5427,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
{
/* C1 is the character before D, S1 is the syntax of C1, C2
is the character at D, and S2 is the syntax of C2. */
int c1, c2, s1, s2;
re_wchar_t c1, c2;
int s1, s2;
#ifdef emacs
int offset = PTR_TO_OFFSET (d);
int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
@ -5504,7 +5471,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
{
/* C1 is the character before D, S1 is the syntax of C1, C2
is the character at D, and S2 is the syntax of C2. */
int c1, c2, s1, s2;
re_wchar_t c1, c2;
int s1, s2;
#ifdef emacs
int offset = PTR_TO_OFFSET (d) - 1;
int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
@ -5549,7 +5517,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
}
#endif
{
int c, len;
int len;
re_wchar_t c;
c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
@ -5585,7 +5554,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
DEBUG_PRINT3 ("EXECUTING %scategoryspec %d.\n", not?"not":"", mcnt);
PREFETCH ();
{
int c, len;
int len;
re_wchar_t c;
c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
@ -5607,8 +5578,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
IMMEDIATE_QUIT_CHECK;
if (!FAIL_STACK_EMPTY ())
{
re_char *str;
unsigned char *pat;
re_char *str, *pat;
/* A restart point is known. Restore to that state. */
DEBUG_PRINT1 ("\nFAIL:\n");
POP_FAILURE_POINT (str, pat);
@ -5678,7 +5648,7 @@ bcmp_translate (s1, s2, len, translate, multibyte)
while (p1 < p1_end && p2 < p2_end)
{
int p1_charlen, p2_charlen;
int p1_ch, p2_ch;
re_wchar_t p1_ch, p2_ch;
p1_ch = RE_STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
p2_ch = RE_STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);