(regex_compile): Substitute whitespace_regexp for spaces, if it is nonzero.

(whitespace_regexp): New variable. (re_set_whitespace_regexp): New function.
2024-12-01 08:17:38 +00:00 · 2004-11-19 19:36:09 +00:00 · 2004-11-19 19:36:09 +00:00 · f9b0fd9964
commit f9b0fd9964
parent e1c1c5a7f4
1 changed files with 71 additions and 2 deletions
--- a/src/regex.c
+++ b/src/regex.c
@ -1250,7 +1250,7 @@ reg_syntax_t re_syntax_options;
 reg_syntax_t
 re_set_syntax (syntax)
-    reg_syntax_t syntax;
+     reg_syntax_t syntax;
 {
  reg_syntax_t ret = re_syntax_options;
@ -1258,6 +1258,17 @@ re_set_syntax (syntax)
  return ret;
 }
 WEAK_ALIAS (__re_set_syntax, re_set_syntax)
 /* Regexp to use to replace spaces, or NULL meaning don't.  */
 static re_char *whitespace_regexp;
 void
 re_set_whitespace_regexp (regexp)
     re_char *regexp;
 {
  whitespace_regexp = regexp;
 }
 WEAK_ALIAS (__re_set_syntax, re_set_syntax)
 /* This table gives an error message for each of the error codes listed
   in regex.h.  Obviously the order here has to be same as there.
@ -2436,6 +2447,15 @@ regex_compile (pattern, size, syntax, bufp)
  /* If the object matched can contain multibyte characters.  */
  const boolean multibyte = RE_MULTIBYTE_P (bufp);
  /* Nonzero if we have pushed down into a subpattern.  */
  int in_subpattern = 0;
  /* These hold the values of p, pattern, and pend from the main
     pattern when we have pushed into a subpattern.  */
  re_char *main_p;
  re_char *main_pattern;
  re_char *main_pend;
 #ifdef DEBUG
  debug++;
  DEBUG_PRINT1 ("\nCompiling pattern: ");
@ -2498,12 +2518,61 @@ regex_compile (pattern, size, syntax, bufp)
  begalt = b = bufp->buffer;
  /* Loop through the uncompiled pattern until we're at the end.  */
-  while (p != pend)
+  while (1)
    {
      if (p == pend)
 	{
 	  /* If this is the end of an included regexp,
 	     pop back to the main regexp and try again.  */
 	  if (in_subpattern)
 	    {
 	      in_subpattern = 0;
 	      pattern = main_pattern;
 	      p = main_p;
 	      pend = main_pend;
 	      continue;
 	    }
 	  /* If this is the end of the main regexp, we are done.  */
 	  break;
 	}
      PATFETCH (c);
      switch (c)
 	{
 	case ' ':
 	  {
 	    re_char *p1 = p;
 	    /* If there's no special whitespace regexp, treat
 	       spaces normally.  */
 	    if (!whitespace_regexp)
 	      goto normal_char;
 	    /* Peek past following spaces.  */
 	    while (p1 != pend)
 	      {
 		if (*p1 != ' ')
 		  break;
 		p1++;
 	      }
 	    /* If the spaces are followed by a repetition op,
 	       treat them normally.  */
 	    if (p1 == pend
 		|| (*p1 == '*' || *p1 == '+' || *p1 == '?'
 		    || (*p1 == '\\' && p1 + 1 != pend && p1[1] == '{')))
 	      goto normal_char;
 	    /* Replace the spaces with the whitespace regexp.  */
 	    in_subpattern = 1;
 	    main_p = p1;
 	    main_pend = pend;
 	    main_pattern = pattern;
 	    p = pattern = whitespace_regexp;
 	    pend = p + strlen (p);
 	    break;
 	  }    
 	case '^':
 	  {
 	    if (   /* If at start of pattern, it's an operator.	 */