1
0
mirror of https://git.savannah.gnu.org/git/emacs.git synced 2024-12-04 08:47:11 +00:00

Maintain interval ->position fields correctly in update_interval

Also fix some anomalies in the handling of byte positions in regexp-emacs.c
This fixes bug #34525.

* src/intervals.c (SET_PARENT_POSITION): New macro.
(update_interval): When moving to an interval's parent, set that parent's
->position field, to maintain the consistency of the tree.

* src/intervals.h (struct interval): Amend the comment describing when
->position is valid.

* src/pdumper.c: Update the hash associated with struct interval.

* src/regex-emacs.c: (re_match_2_internal): Only invoke POINTER_TO_OFFSET on a
known character boundary.  Only perform arithmetic on character positions, not
on byte positions.  Correct the argument to an invocation of
UPDATE_SYNTAX_TABLE_FORWARD by adding 1 to it (in case wordend:).

* src/syntax.c: (update_syntax_table): Remove the now redundant code that set
the ->position field of all parents of the interval found by update_interval.
This commit is contained in:
Alan Mackenzie 2019-03-01 17:35:12 +00:00
parent 287f2a6764
commit 31182c1d17
5 changed files with 40 additions and 34 deletions

View File

@ -713,11 +713,21 @@ previous_interval (register INTERVAL interval)
return NULL;
}
/* Find the interval containing POS given some non-NULL INTERVAL
in the same tree. Note that we need to update interval->position
if we go down the tree.
To speed up the process, we assume that the ->position of
I and all its parents is already uptodate. */
/* Set the ->position field of I's parent, based on I->position. */
#define SET_PARENT_POSITION(i) \
if (AM_LEFT_CHILD (i)) \
INTERVAL_PARENT (i)->position = \
i->position + TOTAL_LENGTH (i) - LEFT_TOTAL_LENGTH (i); \
else \
INTERVAL_PARENT (i)->position = \
i->position - LEFT_TOTAL_LENGTH (i) \
- LENGTH (INTERVAL_PARENT (i))
/* Find the interval containing POS, given some non-NULL INTERVAL in
the same tree. Note that we update interval->position in each
interval we traverse, assuming it is already correctly set for the
argument I. We don't assume that any other interval already has a
correctly set ->position. */
INTERVAL
update_interval (register INTERVAL i, ptrdiff_t pos)
{
@ -738,7 +748,10 @@ update_interval (register INTERVAL i, ptrdiff_t pos)
else if (NULL_PARENT (i))
error ("Point before start of properties");
else
i = INTERVAL_PARENT (i);
{
SET_PARENT_POSITION (i);
i = INTERVAL_PARENT (i);
}
continue;
}
else if (pos >= INTERVAL_LAST_POS (i))
@ -753,7 +766,10 @@ update_interval (register INTERVAL i, ptrdiff_t pos)
else if (NULL_PARENT (i))
error ("Point %"pD"d after end of properties", pos);
else
i = INTERVAL_PARENT (i);
{
SET_PARENT_POSITION (i);
i = INTERVAL_PARENT (i);
}
continue;
}
else

View File

@ -31,11 +31,15 @@ struct interval
/* The first group of entries deal with the tree structure. */
ptrdiff_t total_length; /* Length of myself and both children. */
ptrdiff_t position; /* Cache of interval's character position. */
/* This field is usually updated
simultaneously with an interval
traversal, there is no guarantee
that it is valid for a random
interval. */
/* This field is valid in the final
target interval returned by
find_interval, next_interval,
previous_interval and
update_interval. It cannot be
depended upon in any intermediate
intervals traversed by these
functions, or any other
interval. */
struct interval *left; /* Intervals which precede me. */
struct interval *right; /* Intervals which succeed me. */

View File

@ -2064,7 +2064,7 @@ dump_interval_tree (struct dump_context *ctx,
INTERVAL tree,
dump_off parent_offset)
{
#if CHECK_STRUCTS && !defined (HASH_interval_9110163DA0)
#if CHECK_STRUCTS && !defined (HASH_interval_1B38941C37)
# error "interval changed. See CHECK_STRUCTS comment."
#endif
// TODO: output tree breadth-first?

View File

@ -4732,8 +4732,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
int c1, c2;
int s1, s2;
int dummy;
ptrdiff_t offset = PTR_TO_OFFSET (d - 1);
ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
ptrdiff_t offset = PTR_TO_OFFSET (d);
ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1;
UPDATE_SYNTAX_TABLE (charpos);
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
s1 = SYNTAX (c1);
@ -4811,8 +4811,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
int c1, c2;
int s1, s2;
int dummy;
ptrdiff_t offset = PTR_TO_OFFSET (d) - 1;
ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
ptrdiff_t offset = PTR_TO_OFFSET (d);
ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1;
UPDATE_SYNTAX_TABLE (charpos);
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
s1 = SYNTAX (c1);
@ -4826,7 +4826,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
{
PREFETCH_NOLIMIT ();
GET_CHAR_AFTER (c2, d, dummy);
UPDATE_SYNTAX_TABLE_FORWARD (charpos);
UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
s2 = SYNTAX (c2);
/* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2)
@ -4890,8 +4890,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
is the character at D, and S2 is the syntax of C2. */
int c1, c2;
int s1, s2;
ptrdiff_t offset = PTR_TO_OFFSET (d) - 1;
ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
ptrdiff_t offset = PTR_TO_OFFSET (d);
ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1;
UPDATE_SYNTAX_TABLE (charpos);
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
s1 = SYNTAX (c1);

View File

@ -340,20 +340,6 @@ update_syntax_table (ptrdiff_t charpos, EMACS_INT count, bool init,
invalidate = false;
if (!i)
return;
/* interval_of updates only ->position of the return value, so
update the parents manually to speed up update_interval. */
while (!NULL_PARENT (i))
{
if (AM_RIGHT_CHILD (i))
INTERVAL_PARENT (i)->position = i->position
- LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
- TOTAL_LENGTH (INTERVAL_PARENT (i))
+ LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
else
INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
+ TOTAL_LENGTH (i);
i = INTERVAL_PARENT (i);
}
i = gl_state.forward_i;
gl_state.b_property = i->position - gl_state.offset;
gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;