mirror of
https://git.savannah.gnu.org/git/emacs.git
synced 2025-01-05 11:45:45 +00:00
*** empty log message ***
This commit is contained in:
parent
9549c46d0b
commit
9114e2792f
166
src/regex.c
166
src/regex.c
@ -47,9 +47,15 @@
|
|||||||
`BSTRING', as far as I know, and neither of them use this code. */
|
`BSTRING', as far as I know, and neither of them use this code. */
|
||||||
#if USG || STDC_HEADERS
|
#if USG || STDC_HEADERS
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#ifndef bcmp
|
||||||
#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
|
#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
|
||||||
|
#endif
|
||||||
|
#ifndef bcopy
|
||||||
#define bcopy(s, d, n) memcpy ((d), (s), (n))
|
#define bcopy(s, d, n) memcpy ((d), (s), (n))
|
||||||
|
#endif
|
||||||
|
#ifndef bzero
|
||||||
#define bzero(s, n) memset ((s), 0, (n))
|
#define bzero(s, n) memset ((s), 0, (n))
|
||||||
|
#endif
|
||||||
#else
|
#else
|
||||||
#include <strings.h>
|
#include <strings.h>
|
||||||
#endif
|
#endif
|
||||||
@ -135,12 +141,8 @@ init_syntax_once ()
|
|||||||
(Per Bothner suggested the basic approach.) */
|
(Per Bothner suggested the basic approach.) */
|
||||||
#undef SIGN_EXTEND_CHAR
|
#undef SIGN_EXTEND_CHAR
|
||||||
#if __STDC__
|
#if __STDC__
|
||||||
#ifndef VMS
|
|
||||||
#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
|
#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
|
||||||
#else /* On VMS, VAXC doesn't recognize `signed' before `char' */
|
#else /* not __STDC__ */
|
||||||
#define SIGN_EXTEND_CHAR(c) ((char) (c))
|
|
||||||
#endif /* VMS */
|
|
||||||
#else
|
|
||||||
/* As in Harbison and Steele. */
|
/* As in Harbison and Steele. */
|
||||||
#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
|
#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
|
||||||
#endif
|
#endif
|
||||||
@ -447,6 +449,7 @@ static int debug = 0;
|
|||||||
#define DEBUG_PRINT1(x) if (debug) printf (x)
|
#define DEBUG_PRINT1(x) if (debug) printf (x)
|
||||||
#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
|
#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
|
||||||
#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
|
#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
|
||||||
|
#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
|
||||||
#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
|
#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
|
||||||
if (debug) print_partial_compiled_pattern (s, e)
|
if (debug) print_partial_compiled_pattern (s, e)
|
||||||
#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
|
#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
|
||||||
@ -760,6 +763,7 @@ print_double_string (where, string1, size1, string2, size2)
|
|||||||
#define DEBUG_PRINT1(x)
|
#define DEBUG_PRINT1(x)
|
||||||
#define DEBUG_PRINT2(x1, x2)
|
#define DEBUG_PRINT2(x1, x2)
|
||||||
#define DEBUG_PRINT3(x1, x2, x3)
|
#define DEBUG_PRINT3(x1, x2, x3)
|
||||||
|
#define DEBUG_PRINT4(x1, x2, x3, x4)
|
||||||
#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
|
#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
|
||||||
#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
|
#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
|
||||||
|
|
||||||
@ -1025,9 +1029,9 @@ typedef struct
|
|||||||
`buffer' is the compiled pattern;
|
`buffer' is the compiled pattern;
|
||||||
`syntax' is set to SYNTAX;
|
`syntax' is set to SYNTAX;
|
||||||
`used' is set to the length of the compiled pattern;
|
`used' is set to the length of the compiled pattern;
|
||||||
`fastmap_accurate' is set to zero;
|
`fastmap_accurate' is zero;
|
||||||
`re_nsub' is set to the number of groups in PATTERN;
|
`re_nsub' is the number of subexpressions in PATTERN;
|
||||||
`not_bol' and `not_eol' are set to zero.
|
`not_bol' and `not_eol' are zero;
|
||||||
|
|
||||||
The `fastmap' and `newline_anchor' fields are neither
|
The `fastmap' and `newline_anchor' fields are neither
|
||||||
examined nor set. */
|
examined nor set. */
|
||||||
@ -1676,10 +1680,10 @@ regex_compile (pattern, size, syntax, bufp)
|
|||||||
| v | v
|
| v | v
|
||||||
a | b | c
|
a | b | c
|
||||||
|
|
||||||
If we are at `b,' then fixup_alt_jump right now points to a
|
If we are at `b', then fixup_alt_jump right now points to a
|
||||||
three-byte space after `a.' We'll put in the jump, set
|
three-byte space after `a'. We'll put in the jump, set
|
||||||
fixup_alt_jump to right after `b,' and leave behind three
|
fixup_alt_jump to right after `b', and leave behind three
|
||||||
bytes which we'll fill in when we get to after `c.' */
|
bytes which we'll fill in when we get to after `c'. */
|
||||||
|
|
||||||
if (fixup_alt_jump)
|
if (fixup_alt_jump)
|
||||||
STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
|
STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
|
||||||
@ -2320,6 +2324,7 @@ typedef struct
|
|||||||
int this_reg; \
|
int this_reg; \
|
||||||
\
|
\
|
||||||
DEBUG_STATEMENT (failure_id++); \
|
DEBUG_STATEMENT (failure_id++); \
|
||||||
|
DEBUG_STATEMENT (nfailure_points_pushed++); \
|
||||||
DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
|
DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
|
||||||
DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
|
DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
|
||||||
DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
|
DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
|
||||||
@ -2473,6 +2478,8 @@ typedef struct
|
|||||||
regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \
|
regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \
|
||||||
DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
|
DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
|
||||||
} \
|
} \
|
||||||
|
\
|
||||||
|
DEBUG_STATEMENT (nfailure_points_popped++); \
|
||||||
} /* POP_FAILURE_POINT */
|
} /* POP_FAILURE_POINT */
|
||||||
|
|
||||||
/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
|
/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
|
||||||
@ -2860,15 +2867,9 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
|
|||||||
else if (endpos > total_size)
|
else if (endpos > total_size)
|
||||||
range = total_size - startpos;
|
range = total_size - startpos;
|
||||||
|
|
||||||
/* Update the fastmap now if not correct already. */
|
|
||||||
if (fastmap && !bufp->fastmap_accurate)
|
|
||||||
if (re_compile_fastmap (bufp) == -2)
|
|
||||||
return -2;
|
|
||||||
|
|
||||||
/* If the search isn't to be a backwards one, don't waste time in a
|
/* If the search isn't to be a backwards one, don't waste time in a
|
||||||
long search for a pattern that says it is anchored. */
|
search for a pattern that must be anchored. */
|
||||||
if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf
|
if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
|
||||||
&& range > 0)
|
|
||||||
{
|
{
|
||||||
if (startpos > 0)
|
if (startpos > 0)
|
||||||
return -1;
|
return -1;
|
||||||
@ -2876,6 +2877,12 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
|
|||||||
range = 1;
|
range = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Update the fastmap now if not correct already. */
|
||||||
|
if (fastmap && !bufp->fastmap_accurate)
|
||||||
|
if (re_compile_fastmap (bufp) == -2)
|
||||||
|
return -2;
|
||||||
|
|
||||||
|
/* Loop through the string, looking for a place to start matching. */
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
/* If a fastmap is supplied, skip quickly over characters that
|
/* If a fastmap is supplied, skip quickly over characters that
|
||||||
@ -2913,7 +2920,7 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
|
|||||||
? string2[startpos - size1]
|
? string2[startpos - size1]
|
||||||
: string1[startpos]);
|
: string1[startpos]);
|
||||||
|
|
||||||
if (!fastmap[TRANSLATE (c)])
|
if (!fastmap[(unsigned char) TRANSLATE (c)])
|
||||||
goto advance;
|
goto advance;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2987,12 +2994,9 @@ typedef union
|
|||||||
#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
|
#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
|
||||||
|
|
||||||
|
|
||||||
/* Call this when have matched something; it sets `matched' flags for the
|
/* Call this when have matched a real character; it sets `matched' flags
|
||||||
registers corresponding to the group of which we currently are inside.
|
for the subexpressions which we are currently inside. Also records
|
||||||
Also records whether this group ever matched something. We only care
|
that those subexprs have matched. */
|
||||||
about this information at `stop_memory', and then only about the
|
|
||||||
previous time through the loop (if the group is starred or whatever).
|
|
||||||
So it is ok to clear all the nonactive registers here. */
|
|
||||||
#define SET_REGS_MATCHED() \
|
#define SET_REGS_MATCHED() \
|
||||||
do \
|
do \
|
||||||
{ \
|
{ \
|
||||||
@ -3037,24 +3041,24 @@ typedef union
|
|||||||
|
|
||||||
/* Test if at very beginning or at very end of the virtual concatenation
|
/* Test if at very beginning or at very end of the virtual concatenation
|
||||||
of `string1' and `string2'. If only one string, it's `string2'. */
|
of `string1' and `string2'. If only one string, it's `string2'. */
|
||||||
#define AT_STRINGS_BEG() (d == (size1 ? string1 : string2) || !size2)
|
#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
|
||||||
#define AT_STRINGS_END() (d == end2)
|
#define AT_STRINGS_END(d) ((d) == end2)
|
||||||
|
|
||||||
|
|
||||||
/* Test if D points to a character which is word-constituent. We have
|
/* Test if D points to a character which is word-constituent. We have
|
||||||
two special cases to check for: if past the end of string1, look at
|
two special cases to check for: if past the end of string1, look at
|
||||||
the first character in string2; and if before the beginning of
|
the first character in string2; and if before the beginning of
|
||||||
string2, look at the last character in string1.
|
string2, look at the last character in string1. */
|
||||||
|
#define WORDCHAR_P(d) \
|
||||||
Assumes `string1' exists, so use in conjunction with AT_STRINGS_BEG (). */
|
|
||||||
#define LETTER_P(d) \
|
|
||||||
(SYNTAX ((d) == end1 ? *string2 \
|
(SYNTAX ((d) == end1 ? *string2 \
|
||||||
: (d) == string2 - 1 ? *(end1 - 1) : *(d)) == Sword)
|
: (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
|
||||||
|
== Sword)
|
||||||
|
|
||||||
/* Test if the character before D and the one at D differ with respect
|
/* Test if the character before D and the one at D differ with respect
|
||||||
to being word-constituent. */
|
to being word-constituent. */
|
||||||
#define AT_WORD_BOUNDARY(d) \
|
#define AT_WORD_BOUNDARY(d) \
|
||||||
(AT_STRINGS_BEG () || AT_STRINGS_END () || LETTER_P (d - 1) != LETTER_P (d))
|
(AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
|
||||||
|
|| WORDCHAR_P (d - 1) != WORDCHAR_P (d))
|
||||||
|
|
||||||
|
|
||||||
/* Free everything we malloc. */
|
/* Free everything we malloc. */
|
||||||
@ -3161,6 +3165,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
|
|||||||
fail_stack_type fail_stack;
|
fail_stack_type fail_stack;
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
static unsigned failure_id = 0;
|
static unsigned failure_id = 0;
|
||||||
|
unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* We fill all the registers internally, independent of what we
|
/* We fill all the registers internally, independent of what we
|
||||||
@ -3254,8 +3259,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* We must initialize all our variables to NULL, so that
|
/* We must initialize all our variables to NULL, so that
|
||||||
`FREE_VARIABLES' doesn't try to free them. Too bad this isn't
|
`FREE_VARIABLES' doesn't try to free them. */
|
||||||
Lisp, so we could have a list of variables. As it is, */
|
|
||||||
regstart = regend = old_regstart = old_regend = best_regstart
|
regstart = regend = old_regstart = old_regend = best_regstart
|
||||||
= best_regend = reg_dummy = NULL;
|
= best_regend = reg_dummy = NULL;
|
||||||
reg_info = reg_info_dummy = (register_info_type *) NULL;
|
reg_info = reg_info_dummy = (register_info_type *) NULL;
|
||||||
@ -3339,8 +3343,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
|
|||||||
|
|
||||||
if (p == pend)
|
if (p == pend)
|
||||||
{ /* End of pattern means we might have succeeded. */
|
{ /* End of pattern means we might have succeeded. */
|
||||||
DEBUG_PRINT1 ("End of pattern: ");
|
DEBUG_PRINT1 ("end of pattern ... ");
|
||||||
/* If not end of string, try backtracking. Otherwise done. */
|
|
||||||
|
/* If we haven't matched the entire string, and we want the
|
||||||
|
longest match, try backtracking. */
|
||||||
if (d != end_match_2)
|
if (d != end_match_2)
|
||||||
{
|
{
|
||||||
DEBUG_PRINT1 ("backtracking.\n");
|
DEBUG_PRINT1 ("backtracking.\n");
|
||||||
@ -3378,6 +3384,8 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
|
|||||||
For example, the pattern `x.*y.*z' against the
|
For example, the pattern `x.*y.*z' against the
|
||||||
strings `x-' and `y-z-', if the two strings are
|
strings `x-' and `y-z-', if the two strings are
|
||||||
not consecutive in memory. */
|
not consecutive in memory. */
|
||||||
|
DEBUG_PRINT1 ("Restoring best registers.\n");
|
||||||
|
|
||||||
d = match_end;
|
d = match_end;
|
||||||
dend = ((d >= string1 && d <= end1)
|
dend = ((d >= string1 && d <= end1)
|
||||||
? end_match_1 : end_match_2);
|
? end_match_1 : end_match_2);
|
||||||
@ -3390,7 +3398,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
|
|||||||
}
|
}
|
||||||
} /* d != end_match_2 */
|
} /* d != end_match_2 */
|
||||||
|
|
||||||
DEBUG_PRINT1 ("\nAccepting match.\n");
|
DEBUG_PRINT1 ("Accepting match.\n");
|
||||||
|
|
||||||
/* If caller wants register contents data back, do it. */
|
/* If caller wants register contents data back, do it. */
|
||||||
if (regs && !bufp->no_sub)
|
if (regs && !bufp->no_sub)
|
||||||
@ -3456,7 +3464,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
|
|||||||
} /* regs && !bufp->no_sub */
|
} /* regs && !bufp->no_sub */
|
||||||
|
|
||||||
FREE_VARIABLES ();
|
FREE_VARIABLES ();
|
||||||
DEBUG_PRINT2 ("%d registers pushed.\n", num_regs_pushed);
|
DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
|
||||||
|
nfailure_points_pushed, nfailure_points_popped,
|
||||||
|
nfailure_points_pushed - nfailure_points_popped);
|
||||||
|
DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
|
||||||
|
|
||||||
mcnt = d - pos - (MATCHING_IN_FIRST_STRING
|
mcnt = d - pos - (MATCHING_IN_FIRST_STRING
|
||||||
? string1
|
? string1
|
||||||
@ -3658,7 +3669,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
|
|||||||
|
|
||||||
/* If just failed to match something this time around with a
|
/* If just failed to match something this time around with a
|
||||||
group that's operated on by a repetition operator, try to
|
group that's operated on by a repetition operator, try to
|
||||||
force exit from the ``loop,'' and restore the register
|
force exit from the ``loop'', and restore the register
|
||||||
information for this group that we had before trying this
|
information for this group that we had before trying this
|
||||||
last match. */
|
last match. */
|
||||||
if ((!MATCHED_SOMETHING (reg_info[*p])
|
if ((!MATCHED_SOMETHING (reg_info[*p])
|
||||||
@ -3802,7 +3813,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
|
|||||||
case begline:
|
case begline:
|
||||||
DEBUG_PRINT1 ("EXECUTING begline.\n");
|
DEBUG_PRINT1 ("EXECUTING begline.\n");
|
||||||
|
|
||||||
if (AT_STRINGS_BEG ())
|
if (AT_STRINGS_BEG (d))
|
||||||
{
|
{
|
||||||
if (!bufp->not_bol) break;
|
if (!bufp->not_bol) break;
|
||||||
}
|
}
|
||||||
@ -3818,7 +3829,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
|
|||||||
case endline:
|
case endline:
|
||||||
DEBUG_PRINT1 ("EXECUTING endline.\n");
|
DEBUG_PRINT1 ("EXECUTING endline.\n");
|
||||||
|
|
||||||
if (AT_STRINGS_END ())
|
if (AT_STRINGS_END (d))
|
||||||
{
|
{
|
||||||
if (!bufp->not_eol) break;
|
if (!bufp->not_eol) break;
|
||||||
}
|
}
|
||||||
@ -3835,7 +3846,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
|
|||||||
/* Match at the very beginning of the data. */
|
/* Match at the very beginning of the data. */
|
||||||
case begbuf:
|
case begbuf:
|
||||||
DEBUG_PRINT1 ("EXECUTING begbuf.\n");
|
DEBUG_PRINT1 ("EXECUTING begbuf.\n");
|
||||||
if (AT_STRINGS_BEG ())
|
if (AT_STRINGS_BEG (d))
|
||||||
break;
|
break;
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
@ -3843,7 +3854,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
|
|||||||
/* Match at the very end of the data. */
|
/* Match at the very end of the data. */
|
||||||
case endbuf:
|
case endbuf:
|
||||||
DEBUG_PRINT1 ("EXECUTING endbuf.\n");
|
DEBUG_PRINT1 ("EXECUTING endbuf.\n");
|
||||||
if (AT_STRINGS_END ())
|
if (AT_STRINGS_END (d))
|
||||||
break;
|
break;
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
@ -3897,7 +3908,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
|
|||||||
the original * applied to a group), save the information
|
the original * applied to a group), save the information
|
||||||
for that group and all inner ones, so that if we fail back
|
for that group and all inner ones, so that if we fail back
|
||||||
to this point, the group's information will be correct.
|
to this point, the group's information will be correct.
|
||||||
For example, in \(a*\)*\1, we only need the preceding group,
|
For example, in \(a*\)*\1, we need the preceding group,
|
||||||
and in \(\(a*\)b*\)\2, we need the inner group. */
|
and in \(\(a*\)b*\)\2, we need the inner group. */
|
||||||
|
|
||||||
/* We can't use `p' to check ahead because we push
|
/* We can't use `p' to check ahead because we push
|
||||||
@ -3927,8 +3938,8 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
||||||
/* A smart repeat ends with a maybe_pop_jump.
|
/* A smart repeat ends with `maybe_pop_jump'.
|
||||||
We change it either to a pop_failure_jump or a jump. */
|
We change it to either `pop_failure_jump' or `jump'. */
|
||||||
case maybe_pop_jump:
|
case maybe_pop_jump:
|
||||||
EXTRACT_NUMBER_AND_INCR (mcnt, p);
|
EXTRACT_NUMBER_AND_INCR (mcnt, p);
|
||||||
DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
|
DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
|
||||||
@ -3956,10 +3967,21 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
|
|||||||
|
|
||||||
/* If we're at the end of the pattern, we can change. */
|
/* If we're at the end of the pattern, we can change. */
|
||||||
if (p2 == pend)
|
if (p2 == pend)
|
||||||
{
|
{ /* But if we're also at the end of the string, we might
|
||||||
|
as well skip changing anything. For example, in `a+'
|
||||||
|
against `a', we'll have already matched the `a', and
|
||||||
|
I don't see the the point of changing the opcode,
|
||||||
|
popping the failure point, finding out it fails, and
|
||||||
|
then going into our endgame. */
|
||||||
|
if (d == dend)
|
||||||
|
{
|
||||||
|
p = pend;
|
||||||
|
DEBUG_PRINT1 (" End of pattern & string => done.\n");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
p[-3] = (unsigned char) pop_failure_jump;
|
p[-3] = (unsigned char) pop_failure_jump;
|
||||||
DEBUG_PRINT1
|
DEBUG_PRINT1 (" End of pattern => pop_failure_jump.\n");
|
||||||
(" End of pattern: change to `pop_failure_jump'.\n");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
else if ((re_opcode_t) *p2 == exactn
|
else if ((re_opcode_t) *p2 == exactn
|
||||||
@ -3973,7 +3995,12 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
|
|||||||
to the `maybe_finalize_jump' of this case. Examine what
|
to the `maybe_finalize_jump' of this case. Examine what
|
||||||
follows. */
|
follows. */
|
||||||
if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
|
if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
|
||||||
p[-3] = (unsigned char) pop_failure_jump;
|
{
|
||||||
|
p[-3] = (unsigned char) pop_failure_jump;
|
||||||
|
DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
|
||||||
|
c, p1[5]);
|
||||||
|
}
|
||||||
|
|
||||||
else if ((re_opcode_t) p1[3] == charset
|
else if ((re_opcode_t) p1[3] == charset
|
||||||
|| (re_opcode_t) p1[3] == charset_not)
|
|| (re_opcode_t) p1[3] == charset_not)
|
||||||
{
|
{
|
||||||
@ -3988,9 +4015,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
|
|||||||
if (!not)
|
if (!not)
|
||||||
{
|
{
|
||||||
p[-3] = (unsigned char) pop_failure_jump;
|
p[-3] = (unsigned char) pop_failure_jump;
|
||||||
DEBUG_PRINT1
|
DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
|
||||||
(" No match: change to `pop_failure_jump'.\n");
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3999,6 +4024,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
|
|||||||
if ((re_opcode_t) p[-1] != pop_failure_jump)
|
if ((re_opcode_t) p[-1] != pop_failure_jump)
|
||||||
{
|
{
|
||||||
p[-1] = (unsigned char) jump;
|
p[-1] = (unsigned char) jump;
|
||||||
|
DEBUG_PRINT1 (" Match => jump.\n");
|
||||||
goto unconditional_jump;
|
goto unconditional_jump;
|
||||||
}
|
}
|
||||||
/* Note fall through. */
|
/* Note fall through. */
|
||||||
@ -4060,7 +4086,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
|
|||||||
|
|
||||||
|
|
||||||
/* At the end of an alternative, we need to push a dummy failure
|
/* At the end of an alternative, we need to push a dummy failure
|
||||||
point in case we are followed by a pop_failure_jump', because
|
point in case we are followed by a `pop_failure_jump', because
|
||||||
we don't want the failure point for the alternative to be
|
we don't want the failure point for the alternative to be
|
||||||
popped. For example, matching `(a|ab)*' against `aab'
|
popped. For example, matching `(a|ab)*' against `aab'
|
||||||
requires that we match the `ab' alternative. */
|
requires that we match the `ab' alternative. */
|
||||||
@ -4137,14 +4163,14 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
|
|||||||
|
|
||||||
case wordbeg:
|
case wordbeg:
|
||||||
DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
|
DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
|
||||||
if (LETTER_P (d) && (AT_STRINGS_BEG () || !LETTER_P (d - 1)))
|
if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
|
||||||
break;
|
break;
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
case wordend:
|
case wordend:
|
||||||
DEBUG_PRINT1 ("EXECUTING wordend.\n");
|
DEBUG_PRINT1 ("EXECUTING wordend.\n");
|
||||||
if (!AT_STRINGS_BEG () && LETTER_P (d - 1)
|
if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
|
||||||
&& (!LETTER_P (d) || AT_STRINGS_END ()))
|
&& (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
|
||||||
break;
|
break;
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
@ -4181,11 +4207,12 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
|
|||||||
goto matchsyntax;
|
goto matchsyntax;
|
||||||
|
|
||||||
case wordchar:
|
case wordchar:
|
||||||
DEBUG_PRINT1 ("EXECUTING wordchar.\n");
|
DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
|
||||||
mcnt = (int) Sword;
|
mcnt = (int) Sword;
|
||||||
matchsyntax:
|
matchsyntax:
|
||||||
PREFETCH ();
|
PREFETCH ();
|
||||||
if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
|
if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
|
||||||
|
goto fail;
|
||||||
SET_REGS_MATCHED ();
|
SET_REGS_MATCHED ();
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -4195,11 +4222,12 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
|
|||||||
goto matchnotsyntax;
|
goto matchnotsyntax;
|
||||||
|
|
||||||
case notwordchar:
|
case notwordchar:
|
||||||
DEBUG_PRINT1 ("EXECUTING notwordchar.\n");
|
DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
|
||||||
mcnt = (int) Sword;
|
mcnt = (int) Sword;
|
||||||
matchnotsyntax: /* We goto here from notsyntaxspec. */
|
matchnotsyntax:
|
||||||
PREFETCH ();
|
PREFETCH ();
|
||||||
if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
|
if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
|
||||||
|
goto fail;
|
||||||
SET_REGS_MATCHED ();
|
SET_REGS_MATCHED ();
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -4207,17 +4235,19 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
|
|||||||
case wordchar:
|
case wordchar:
|
||||||
DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
|
DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
|
||||||
PREFETCH ();
|
PREFETCH ();
|
||||||
if (!LETTER_P (d))
|
if (!WORDCHAR_P (d))
|
||||||
goto fail;
|
goto fail;
|
||||||
SET_REGS_MATCHED ();
|
SET_REGS_MATCHED ();
|
||||||
|
d++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case notwordchar:
|
case notwordchar:
|
||||||
DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
|
DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
|
||||||
PREFETCH ();
|
PREFETCH ();
|
||||||
if (LETTER_P (d))
|
if (WORDCHAR_P (d))
|
||||||
goto fail;
|
goto fail;
|
||||||
SET_REGS_MATCHED ();
|
SET_REGS_MATCHED ();
|
||||||
|
d++;
|
||||||
break;
|
break;
|
||||||
#endif /* not emacs */
|
#endif /* not emacs */
|
||||||
|
|
||||||
@ -4812,7 +4842,7 @@ regexec (preg, string, nmatch, pmatch, eflags)
|
|||||||
|
|
||||||
|
|
||||||
/* Returns a message corresponding to an error code, ERRCODE, returned
|
/* Returns a message corresponding to an error code, ERRCODE, returned
|
||||||
from either regcomp or regexec. */
|
from either regcomp or regexec. We don't use PREG here. */
|
||||||
|
|
||||||
size_t
|
size_t
|
||||||
regerror (errcode, preg, errbuf, errbuf_size)
|
regerror (errcode, preg, errbuf, errbuf_size)
|
||||||
|
28
src/regex.h
28
src/regex.h
@ -20,12 +20,15 @@
|
|||||||
#ifndef __REGEXP_LIBRARY_H__
|
#ifndef __REGEXP_LIBRARY_H__
|
||||||
#define __REGEXP_LIBRARY_H__
|
#define __REGEXP_LIBRARY_H__
|
||||||
|
|
||||||
|
/* POSIX says that <sys/types.h> must be included (by the caller) before
|
||||||
|
<regex.h>. */
|
||||||
|
|
||||||
#ifdef VMS
|
#ifdef VMS
|
||||||
/* POSIX says that size_t should be in stddef.h. */
|
/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
|
||||||
|
should be there. */
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* POSIX says that <sys/types.h> must be included before <regex.h>. */
|
|
||||||
|
|
||||||
/* The following bits are used to determine the regexp syntax we
|
/* The following bits are used to determine the regexp syntax we
|
||||||
recognize. The set/not-set meanings are chosen so that Emacs syntax
|
recognize. The set/not-set meanings are chosen so that Emacs syntax
|
||||||
@ -162,6 +165,9 @@ extern reg_syntax_t re_syntax_options;
|
|||||||
#define RE_SYNTAX_POSIX_EGREP \
|
#define RE_SYNTAX_POSIX_EGREP \
|
||||||
(RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
|
(RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
|
||||||
|
|
||||||
|
/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
|
||||||
|
#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
|
||||||
|
|
||||||
#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
|
#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
|
||||||
|
|
||||||
/* Syntax bits common to both basic and extended POSIX regex syntax. */
|
/* Syntax bits common to both basic and extended POSIX regex syntax. */
|
||||||
@ -316,12 +322,12 @@ struct re_pattern_buffer
|
|||||||
#define REGS_FIXED 2
|
#define REGS_FIXED 2
|
||||||
unsigned regs_allocated : 2;
|
unsigned regs_allocated : 2;
|
||||||
|
|
||||||
/* Set to zero when regex_compile compiles a pattern; set to one
|
/* Set to zero when `regex_compile' compiles a pattern; set to one
|
||||||
by re_compile_fastmap when it updates the fastmap, if any. */
|
by `re_compile_fastmap' if it updates the fastmap. */
|
||||||
unsigned fastmap_accurate : 1;
|
unsigned fastmap_accurate : 1;
|
||||||
|
|
||||||
/* If set, regexec reports only success or failure and does not
|
/* If set, `re_match_2' does not return information about
|
||||||
return anything in pmatch. */
|
subexpressions. */
|
||||||
unsigned no_sub : 1;
|
unsigned no_sub : 1;
|
||||||
|
|
||||||
/* If set, a beginning-of-line anchor doesn't match at the
|
/* If set, a beginning-of-line anchor doesn't match at the
|
||||||
@ -383,17 +389,17 @@ typedef struct
|
|||||||
unfortunately clutters up the declarations a bit, but I think it's
|
unfortunately clutters up the declarations a bit, but I think it's
|
||||||
worth it.
|
worth it.
|
||||||
|
|
||||||
We also have to undo `const' if we are not ANSI and if it hasn't
|
We may also have to undo `const' if we are not ANSI -- but if it has
|
||||||
previously being taken care of. */
|
already been defined, as by Autoconf's AC_CONST, don't do anything. */
|
||||||
|
|
||||||
#if __STDC__
|
#if __STDC__
|
||||||
#define _RE_ARGS(args) args
|
#define _RE_ARGS(args) args
|
||||||
#else
|
#else /* not __STDC__ */
|
||||||
#define _RE_ARGS(args) ()
|
#define _RE_ARGS(args) ()
|
||||||
#ifndef const
|
#if !const && !HAVE_CONST
|
||||||
#define const
|
#define const
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif /* not __STDC__ */
|
||||||
|
|
||||||
/* Sets the current default syntax to SYNTAX, and return the old syntax.
|
/* Sets the current default syntax to SYNTAX, and return the old syntax.
|
||||||
You can also simply assign to the `re_syntax_options' variable. */
|
You can also simply assign to the `re_syntax_options' variable. */
|
||||||
|
Loading…
Reference in New Issue
Block a user