Merge local changes.

svn path=/head/; revision=131557
2025-01-13 14:40:22 +00:00 · 2004-07-04 10:02:03 +00:00 · 2004-07-04 10:02:03 +00:00 · e5978bf334 · 2020-12-20 02:59:44 +00:00
commit e5978bf334
parent d1e9179e8c
9 changed files with 2673 additions and 880 deletions
--- a/gnu/usr.bin/grep/dfa.c
+++ b/gnu/usr.bin/grep/dfa.c
--- a/gnu/usr.bin/grep/dfa.h
+++ b/gnu/usr.bin/grep/dfa.h
@ -24,18 +24,24 @@
   In addition to clobbering modularity, we eat up valuable
   name space. */

-# undef PARAMS
-#if __STDC__
+#ifdef __STDC__
 # ifndef _PTR_T
 # define _PTR_T
  typedef void * ptr_t;
 # endif
-# define PARAMS(x) x
 #else
 # ifndef _PTR_T
 # define _PTR_T
  typedef char * ptr_t;
 # endif
+#endif
+
+#ifdef PARAMS
+# undef PARAMS
+#endif
+#if PROTOTYPES
+# define PARAMS(x) x
+#else
 # define PARAMS(x) ()
 #endif

@ -138,6 +144,21 @@ typedef enum

  RPAREN,			/* RPAREN never appears in the parse tree. */

+  CRANGE,			/* CRANGE never appears in the parse tree.
+				   It stands for a character range that can
+				   match a string of one or more characters.
+				   For example, [a-z] can match "ch" in
+				   a Spanish locale.  */
+
+#ifdef MBS_SUPPORT
+  ANYCHAR,                     /* ANYCHAR is a terminal symbol that matches
+                                  any multibyte(or singlebyte) characters.
+			          It is used only if MB_CUR_MAX > 1.  */
+
+  MBCSET,			/* MBCSET is similar to CSET, but for
+				   multibyte characters.  */
+#endif /* MBS_SUPPORT */
+
  CSET				/* CSET and (and any value greater) is a
 				   terminal symbol that matches any of a
 				   class of characters. */
@ -225,6 +246,12 @@ typedef struct
  char backref;			/* True if this state matches a \<digit>. */
  unsigned char constraint;	/* Constraint for this state to accept. */
  int first_end;		/* Token value of the first END in elems. */
+#ifdef MBS_SUPPORT
+  position_set mbps;           /* Positions which can match multibyte
+                                  characters.  e.g. period.
+				  These staff are used only if
+				  MB_CUR_MAX > 1.  */
+#endif
 } dfa_state;

 /* Element of a list of strings, at least one of which is known to
@ -236,6 +263,26 @@ struct dfamust
  struct dfamust *next;
 };

+#ifdef MBS_SUPPORT
+/* A bracket operator.
+   e.g. [a-c], [[:alpha:]], etc.  */
+struct mb_char_classes
+{
+  int invert;
+  wchar_t *chars;		/* Normal characters.  */
+  int nchars;
+  wctype_t *ch_classes;		/* Character classes.  */
+  int nch_classes;
+  wchar_t *range_sts;		/* Range characters (start of the range).  */
+  wchar_t *range_ends;		/* Range characters (end of the range).  */
+  int nranges;
+  char **equivs;		/* Equivalent classes.  */
+  int nequivs;
+  char **coll_elems;
+  int ncoll_elems;		/* Collating elements.  */
+};
+#endif
+
 /* A compiled regular expression. */
 struct dfa
 {
@ -254,6 +301,32 @@ struct dfa
  int nleaves;			/* Number of leaves on the parse tree. */
  int nregexps;			/* Count of parallel regexps being built
 				   with dfaparse(). */
+#ifdef MBS_SUPPORT
+  /* These stuff are used only if MB_CUR_MAX > 1 or multibyte environments.  */
+  int nmultibyte_prop;
+  int *multibyte_prop;
+  /* The value of multibyte_prop[i] is defined by following rule.
+       if tokens[i] < NOTCHAR
+         bit 1 : tokens[i] is a singlebyte character, or the last-byte of
+	         a multibyte character.
+	 bit 0 : tokens[i] is a singlebyte character, or the 1st-byte of
+	         a multibyte character.
+       if tokens[i] = MBCSET
+         ("the index of mbcsets correspnd to this operator" << 2) + 3
+
+     e.g.
+     tokens
+        = 'single_byte_a', 'multi_byte_A', single_byte_b'
+        = 'sb_a', 'mb_A(1st byte)', 'mb_A(2nd byte)', 'mb_A(3rd byte)', 'sb_b'
+     multibyte_prop
+        = 3     , 1               ,  0              ,  2              , 3
+  */
+
+  /* Array of the bracket expressoin in the DFA.  */
+  struct mb_char_classes *mbcsets;
+  int nmbcsets;
+  int mbcsets_alloc;
+#endif

  /* Stuff owned by the state builder. */
  dfa_state *states;		/* States of the dfa. */
@ -292,13 +365,6 @@ struct dfa
 				   on a state that potentially could do so. */
  int *success;			/* Table of acceptance conditions used in
 				   dfaexec and computed in build_state. */
-  int *newlines;		/* Transitions on newlines.  The entry for a
-				   newline in any transition table is always
-				   -1 so we can count lines without wasting
-				   too many cycles.  The transition for a
-				   newline is stored separately and handled
-				   as a special case.  Newline is also used
-				   as a sentinel at the end of the buffer. */
  struct dfamust *musts;	/* List of strings, at least one of which
 				   is known to appear in any r.e. matching
 				   the dfa. */
@ -325,26 +391,21 @@ struct dfa
 /* dfasyntax() takes three arguments; the first sets the syntax bits described
   earlier in this file, the second sets the case-folding flag, and the
   third specifies the line terminator. */
-extern void dfasyntax PARAMS ((reg_syntax_t, int, int));
+extern void dfasyntax PARAMS ((reg_syntax_t, int, unsigned char));

 /* Compile the given string of the given length into the given struct dfa.
   Final argument is a flag specifying whether to build a searching or an
   exact matcher. */
-extern void dfacomp PARAMS ((char *, size_t, struct dfa *, int));
+extern void dfacomp PARAMS ((char const *, size_t, struct dfa *, int));

 /* Execute the given struct dfa on the buffer of characters.  The
-   first char * points to the beginning, and the second points to the
-   first character after the end of the buffer, which must be a writable
-   place so a sentinel end-of-buffer marker can be stored there.  The
-   second-to-last argument is a flag telling whether to allow newlines to
-   be part of a string matching the regexp.  The next-to-last argument,
-   if non-NULL, points to a place to increment every time we see a
-   newline.  The final argument, if non-NULL, points to a flag that will
+   last byte of the buffer must equal the end-of-line byte.
+   The final argument points to a flag that will
   be set if further examination by a backtracking matcher is needed in
   order to verify backreferencing; otherwise the flag will be cleared.
-   Returns NULL if no match is found, or a pointer to the first
+   Returns (size_t) -1 if no match is found, or the offset of the first
   character after the first & shortest matching string in the buffer. */
-extern char *dfaexec PARAMS ((struct dfa *, char *, char *, int, int *, int *));
+extern size_t dfaexec PARAMS ((struct dfa *, char const *, size_t, int *));

 /* Free the storage held by the components of a struct dfa. */
 extern void dfafree PARAMS ((struct dfa *));
@ -355,7 +416,7 @@ extern void dfafree PARAMS ((struct dfa *));
 extern void dfainit PARAMS ((struct dfa *));

 /* Incrementally parse a string of given length into a struct dfa. */
-extern void dfaparse PARAMS ((char *, size_t, struct dfa *));
+extern void dfaparse PARAMS ((char const *, size_t, struct dfa *));

 /* Analyze a parsed regexp; second argument tells whether to build a searching
   or an exact matcher. */
@ -369,6 +430,5 @@ extern void dfastate PARAMS ((int, struct dfa *, int []));

 /* dfaerror() is called by the regexp routines whenever an error occurs.  It
   takes a single argument, a NUL-terminated string describing the error.
-   The default dfaerror() prints the error message to stderr and exits.
-   The user can provide a different dfafree() if so desired. */
+   The user must supply a dfaerror.  */
 extern void dfaerror PARAMS ((const char *));
--- a/gnu/usr.bin/grep/getpagesize.h
+++ b/gnu/usr.bin/grep/getpagesize.h
@ -4,6 +4,11 @@

 #ifndef HAVE_GETPAGESIZE

+#if !defined getpagesize && defined __BEOS__
+# include <OS.h>
+# define getpagesize() B_PAGE_SIZE
+#endif
+
 #ifdef HAVE_UNISTD_H
 # include <unistd.h>
 #endif
--- a/gnu/usr.bin/grep/grep.1
+++ b/gnu/usr.bin/grep/grep.1
@ -13,7 +13,7 @@
 .de Id
 .ds Dt \\$4
 ..
-.Id $Id: grep.1,v 1.11 2000/02/26 03:18:40 alainm Exp $
+.Id $Id: grep.1,v 1.23 2002/01/22 13:20:04 bero Exp $
 .TH GREP 1 \*(Dt "GNU Project"
 .SH NAME
 grep, egrep, fgrep, zgrep, zegrep, zfgrep,
@ -72,6 +72,9 @@ is the same as
 Print
 .I NUM
 lines of trailing context after matching lines.
+Places a line containing
+.B \-\^\-
+between contiguous groups of matches.
 .TP
 .BR \-a ", " \-\^\-text
 Process a binary file as if it were text; this is equivalent to the
@ -82,11 +85,17 @@ option.
 Print
 .I NUM
 lines of leading context before matching lines.
+Places a line containing
+.B \-\^\-
+between contiguous groups of matches.
 .TP
-\fB\-C\fP [\fINUM\fP], \fB\-\fP\fINUM\fP, \fB\-\^\-context\fP[\fB=\fP\fINUM\fP]
+.BI \-C " NUM" "\fR,\fP \-\^\-context=" NUM
 Print
 .I NUM
-lines (default 2) of output context.
+lines of output context.
+Places a line containing
+.B \-\^\-
+between contiguous groups of matches.
 .TP
 .BR \-b ", " \-\^\-byte-offset
 Print the byte offset within the input file before
@ -127,6 +136,11 @@ might output binary garbage,
 which can have nasty side effects if the output is a terminal and if the
 terminal driver interprets some of it as commands.
 .TP
+.BI \-\^\-colour[=\fIWHEN\fR] ", " \-\^\-color[=\fIWHEN\fR]
+Surround the matching string with the marker find in
+.B GREP_COLOR
+environment variable. WHEN may be `never', `always', or `auto'
+.TP
 .BR \-c ", " \-\^\-count
 Suppress normal output; instead print a count of
 matching lines for each input file.
@ -134,6 +148,20 @@ With the
 .BR \-v ", " \-\^\-invert-match
 option (see below), count non-matching lines.
 .TP
+.BI \-D " ACTION" "\fR,\fP \-\^\-devices=" ACTION
+If an input file is a device, FIFO or socket, use
+.I ACTION
+to process it.  By default,
+.I ACTION
+is
+.BR read ,
+which means that devices are read just as if they were ordinary files.
+If
+.I ACTION
+is
+.BR skip ,
+devices are silently skipped.
+.TP
 .BI \-d " ACTION" "\fR,\fP \-\^\-directories=" ACTION
 If an input file is a directory, use
 .I ACTION
@ -173,6 +201,10 @@ Interpret
 .I PATTERN
 as a list of fixed strings, separated by newlines,
 any of which is to be matched.
+.BR \-P ", " \-\^\-perl-regexp
+Interpret
+.I PATTERN
+as a Perl regular expression.
 .TP
 .BI \-f " FILE" "\fR,\fP \-\^\-file=" FILE
 Obtain patterns from
@ -218,6 +250,39 @@ the name of each input file from which output
 would normally have been printed.  The scanning will
 stop on the first match.
 .TP
+.BI \-m " NUM" "\fR,\fP \-\^\-max-count=" NUM
+Stop reading a file after
+.I NUM
+matching lines.  If the input is standard input from a regular file,
+and
+.I NUM
+matching lines are output,
+.B grep
+ensures that the standard input is positioned to just after the last
+matching line before exiting, regardless of the presence of trailing
+context lines.  This enables a calling process to resume a search.
+When
+.B grep
+stops after
+.I NUM
+matching lines, it outputs any trailing context lines.  When the
+.B \-c
+or
+.B \-\^\-count
+option is also used,
+.B grep
+does not output a count greater than
+.IR NUM .
+When the
+.B \-v
+or
+.B \-\^\-invert-match
+option is also used,
+.B grep
+stops after outputting
+.I NUM
+non-matching lines.
+.TP
 .B \-\^\-mmap
 If possible, use the
 .BR mmap (2)
@ -237,21 +302,43 @@ is operating, or if an I/O error occurs.
 Prefix each line of output with the line number
 within its input file.
 .TP
+.BR \-o ", " \-\^\-only-matching
+Show only the part of a matching line that matches
+.I PATTERN.
+.TP
+.BI \-\^\-label= LABEL
+Displays input actually coming from standard input as input coming from file
+.I LABEL.
+This is especially useful for tools like zgrep, e.g.
+.B "gzip -cd foo.gz |grep --label=foo something"
+.TP
+.BR \-\^\-line-buffering
+Use line buffering, it can be a performance penality.
+.TP
 .BR \-q ", " \-\^\-quiet ", " \-\^\-silent
-Quiet; suppress normal output.  The scanning will stop
-on the first match.
+Quiet; do not write anything to standard output.
+Exit immediately with zero status if any match is found,
+even if an error was detected.
 Also see the
 .B \-s
 or
 .B \-\^\-no-messages
-option below.
+option.
 .TP
-.BR \-r ", " \-\^\-recursive
+.BR \-R ", " \-r ", " \-\^\-recursive
 Read all files under each directory, recursively;
 this is equivalent to the
 .B "\-d recurse"
 option.
 .TP
+.BR "\fR \fP \-\^\-include=" PATTERN
+Recurse in directories only searching file matching
+.I PATTERN.
+.TP
+.BR "\fR \fP \-\^\-exclude=" PATTERN
+Recurse in directories skip file matching
+.I PATTERN.
+.TP
 .BR \-s ", " \-\^\-no-messages
 Suppress error messages about nonexistent or unreadable files.
 Portability note: unlike \s-1GNU\s0
@ -378,11 +465,13 @@ a single character.  Most characters, including all letters and digits,
 are regular expressions that match themselves.  Any metacharacter with
 special meaning may be quoted by preceding it with a backslash.
 .PP
-A list of characters enclosed by
+A
+.I "bracket expression"
+is a list of characters enclosed by
 .B [
 and
-.B ]
-matches any single
+.BR ] .
+It matches any single
 character in that list; if the first character of the list
 is the caret
 .B ^
@ -391,10 +480,32 @@ then it matches any character
 in the list.
 For example, the regular expression
 .B [0123456789]
-matches any single digit.  A range of characters
-may be specified by giving the first and last characters, separated
-by a hyphen.
-Finally, certain named classes of characters are predefined.
+matches any single digit.
+.PP
+Within a bracket expression, a
+.I "range expression"
+consists of two characters separated by a hyphen.
+It matches any single character that sorts between the two characters,
+inclusive, using the locale's collating sequence and character set.
+For example, in the default C locale,
+.B [a\-d]
+is equivalent to
+.BR [abcd] .
+Many locales sort characters in dictionary order, and in these locales
+.B [a\-d]
+is typically not equivalent to
+.BR [abcd] ;
+it might be equivalent to
+.BR [aBbCcDd] ,
+for example.
+To obtain the traditional interpretation of bracket expressions,
+you can use the C locale by setting the
+.B LC_ALL
+environment variable to the value
+.BR C .
+.PP
+Finally, certain named classes of characters are predefined within
+bracket expressions, as follows.
 Their names are self explanatory, and they are
 .BR [:alnum:] ,
 .BR [:alpha:] ,
@ -411,8 +522,8 @@ and
 For example,
 .B [[:alnum:]]
 means
-.BR [0-9A-Za-z] ,
-except the latter form depends upon the \s-1POSIX\s0 locale and the
+.BR [0\-9A\-Za\-z] ,
+except the latter form depends upon the C locale and the
 \s-1ASCII\s0 character encoding, whereas the former is independent
 of locale and character set.
 (Note that the brackets in these class names are part of the symbolic
@ -559,6 +670,29 @@ instead of reporting a syntax error in the regular expression.
 \s-1POSIX.2\s0 allows this behavior as an extension, but portable scripts
 should avoid it.
 .SH "ENVIRONMENT VARIABLES"
+Grep's behavior is affected by the following environment variables.
+.PP
+A locale
+.BI LC_ foo
+is specified by examining the three environment variables
+.BR LC_ALL ,
+.BR LC_\fIfoo\fP ,
+.BR LANG ,
+in that order.
+The first of these variables that is set specifies the locale.
+For example, if
+.B LC_ALL
+is not set, but
+.B LC_MESSAGES
+is set to
+.BR pt_BR ,
+then Brazilian Portuguese is used for the
+.B LC_MESSAGES
+locale.
+The C locale is used if none of these environment variables are set,
+or if the locale catalog is not installed, or if
+.B grep
+was not compiled with national language support (\s-1NLS\s0).
 .TP
 .B GREP_OPTIONS
 This variable specifies default options to be placed in front of any
@ -576,28 +710,29 @@ Option specifications are separated by whitespace.
 A backslash escapes the next character,
 so it can be used to specify an option containing whitespace or a backslash.
 .TP
-\fBLC_ALL\fP, \fBLC_MESSAGES\fP, \fBLANG\fP
+.B GREP_COLOR
+Specifies the marker for highlighting.
+.TP
+\fBLC_ALL\fP, \fBLC_COLLATE\fP, \fBLANG\fP
 These variables specify the
-.B LC_MESSAGES
-locale, which determines the language that
-.B grep
-uses for messages.
-The locale is determined by the first of these variables that is set.
-American English is used if none of these environment variables are set,
-or if the message catalog is not installed, or if
-.B grep
-was not compiled with national language support (\s-1NLS\s0).
+.B LC_COLLATE
+locale, which determines the collating sequence used to interpret
+range expressions like
+.BR [a\-z] .
 .TP
 \fBLC_ALL\fP, \fBLC_CTYPE\fP, \fBLANG\fP
 These variables specify the
 .B LC_CTYPE
 locale, which determines the type of characters, e.g., which
 characters are whitespace.
-The locale is determined by the first of these variables that is set.
-The \s-1POSIX\s0 locale is used if none of these environment variables
-are set, or if the locale catalog is not installed, or if
+.TP
+\fBLC_ALL\fP, \fBLC_MESSAGES\fP, \fBLANG\fP
+These variables specify the
+.B LC_MESSAGES
+locale, which determines the language that
 .B grep
-was not compiled with national language support (\s-1NLS\s0).
+uses for messages.
+The default C locale uses American English messages.
 .TP
 .B POSIXLY_CORRECT
 If set,
@ -612,13 +747,15 @@ Also, \s-1POSIX.2\s0 requires that unrecognized options be diagnosed as
 \*(lqillegal\*(rq, but since they are not really against the law the default
 is to diagnose them as \*(lqinvalid\*(rq.
 .SH DIAGNOSTICS
-Normally, exit status is 0 if matches were found,
-and 1 if no matches were found.  (The
-.B \-v
-option inverts the sense of the exit status.)
-Exit status is 2 if there were syntax errors
-in the pattern, inaccessible input files, or
-other system errors.
+.PP
+Normally, exit status is 0 if selected lines are found and 1 otherwise.
+But the exit status is 2 if an error occurred, unless the
+.B \-q
+or
+.B \-\^\-quiet
+or
+.B \-\^\-silent
+option is used and a selected line is found.
 .SH BUGS
 Email bug reports to
 .BR bug-gnu-utils@gnu.org .
@ -626,7 +763,7 @@ Be sure to include the word \*(lqgrep\*(rq somewhere in the
 \*(lqSubject:\*(rq field.
 .PP
 Large repetition counts in the
-.BI { m , n }
+.BI { n , m }
 construct may cause grep to use lots of memory.
 In addition,
 certain other obscure regular expressions require exponential time
--- a/gnu/usr.bin/grep/grep.c
+++ b/gnu/usr.bin/grep/grep.c
--- a/gnu/usr.bin/grep/grep.h
+++ b/gnu/usr.bin/grep/grep.h
@ -1,5 +1,5 @@
 /* grep.h - interface to grep driver for searching subroutines.
-   Copyright (C) 1992, 1998 Free Software Foundation, Inc.
+   Copyright (C) 1992, 1998, 2001 Free Software Foundation, Inc.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@ -22,20 +22,16 @@
 # define __attribute__(x)
 #endif

-extern void fatal PARAMS ((const char *, int)) __attribute__((noreturn));
-extern char *xmalloc PARAMS ((size_t size));
-extern char *xrealloc PARAMS ((char *ptr, size_t size));
-
 /* Grep.c expects the matchers vector to be terminated
-   by an entry with a NULL name, and to contain at least
+   by an entry with a NULL compile, and to contain at least
   an entry named "default". */

 extern struct matcher
 {
-  char *name;
-  void (*compile) PARAMS ((char *, size_t));
-  char *(*execute) PARAMS ((char *, size_t, char **));
-} matchers[];
+  char name[8];
+  void (*compile) PARAMS ((char const *, size_t));
+  size_t (*execute) PARAMS ((char const *, size_t, size_t *, int));
+} const matchers[];

 /* Exported from fgrepmat.c, egrepmat.c, grepmat.c.  */
 extern char const *matcher;
--- a/gnu/usr.bin/grep/kwset.c
+++ b/gnu/usr.bin/grep/kwset.c
@ -83,22 +83,13 @@ struct kwset
  struct trie *next[NCHAR];	/* Table of children of the root. */
  char *target;			/* Target string if there's only one. */
  int mind2;			/* Used in Boyer-Moore search for one string. */
-  char *trans;			/* Character translation table. */
+  char const *trans;		/* Character translation table. */
 };

-/* prototypes */
-static void enqueue PARAMS((struct tree *, struct trie **));
-static void treefails PARAMS((register struct tree *, struct trie *, struct trie *));
-static void treedelta PARAMS((register struct tree *,register unsigned int, unsigned char *));
-static int  hasevery PARAMS((register struct tree *, register struct tree *));
-static void treenext PARAMS((struct tree *, struct trie **));
-static char * bmexec PARAMS((kwset_t, char *, size_t));
-static char * cwexec PARAMS((kwset_t, char *, size_t, struct kwsmatch *));
-
 /* Allocate and initialize a keyword set object, returning an opaque
   pointer to it.  Return NULL if memory is not available. */
 kwset_t
-kwsalloc (char *trans)
+kwsalloc (char const *trans)
 {
  struct kwset *kwset;

@ -133,7 +124,7 @@ kwsalloc (char *trans)
 /* Add the given string to the contents of the keyword set.  Return NULL
   for success, an error message otherwise. */
 char *
-kwsincr (kwset_t kws, char *text, size_t len)
+kwsincr (kwset_t kws, char const *text, size_t len)
 {
  struct kwset *kwset;
  register struct trie *trie;
@ -303,7 +294,8 @@ enqueue (struct tree *tree, struct trie **last)
   from the given tree, given the failure function for their parent as
   well as a last resort failure node. */
 static void
-treefails (register struct tree *tree, struct trie *fail, struct trie *recourse)
+treefails (register struct tree const *tree, struct trie const *fail,
+	   struct trie *recourse)
 {
  register struct tree *link;

@ -337,7 +329,7 @@ treefails (register struct tree *tree, struct trie *fail, struct trie *recourse)
 /* Set delta entries for the links of the given tree such that
   the preexisting delta value is larger than the current depth. */
 static void
-treedelta (register struct tree *tree,
+treedelta (register struct tree const *tree,
 	   register unsigned int depth,
 	   unsigned char delta[])
 {
@ -351,7 +343,7 @@ treedelta (register struct tree *tree,

 /* Return true if A has every label in B. */
 static int
-hasevery (register struct tree *a, register struct tree *b)
+hasevery (register struct tree const *a, register struct tree const *b)
 {
  if (!b)
    return 1;
@ -370,7 +362,7 @@ hasevery (register struct tree *a, register struct tree *b)
 /* Compute a vector, indexed by character code, of the trie nodes
   referenced from the given tree. */
 static void
-treenext (struct tree *tree, struct trie *next[])
+treenext (struct tree const *tree, struct trie *next[])
 {
  if (!tree)
    return;
@ -387,7 +379,7 @@ kwsprep (kwset_t kws)
  register struct kwset *kwset;
  register int i;
  register struct trie *curr, *fail;
-  register char *trans;
+  register char const *trans;
  unsigned char delta[NCHAR];
  struct trie *last, *next[NCHAR];

@ -499,23 +491,26 @@ kwsprep (kwset_t kws)
 #define U(C) ((unsigned char) (C))

 /* Fast boyer-moore search. */
-static char *
-bmexec (kwset_t kws, char *text, size_t size)
+static size_t
+bmexec (kwset_t kws, char const *text, size_t size)
 {
-  struct kwset *kwset;
-  register unsigned char *d1;
-  register char *ep, *sp, *tp;
+  struct kwset const *kwset;
+  register unsigned char const *d1;
+  register char const *ep, *sp, *tp;
  register int d, gc, i, len, md2;

-  kwset = (struct kwset *) kws;
+  kwset = (struct kwset const *) kws;
  len = kwset->mind;

  if (len == 0)
-    return text;
-  if (len > size)
    return 0;
+  if (len > size)
+    return -1;
  if (len == 1)
-    return memchr(text, kwset->target[0], size);
+    {
+      tp = memchr (text, kwset->target[0], size);
+      return tp ? tp - text : -1;
+    }

  d1 = kwset->delta;
  sp = kwset->target + len;
@ -554,7 +549,7 @@ bmexec (kwset_t kws, char *text, size_t size)
 	    for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
 	      ;
 	    if (i > len)
-	      return tp - len;
+	      return tp - len - text;
 	  }
 	tp += md2;
      }
@ -573,26 +568,29 @@ bmexec (kwset_t kws, char *text, size_t size)
 	  for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
 	    ;
 	  if (i > len)
-	    return tp - len;
+	    return tp - len - text;
 	}
      d = md2;
    }

-  return 0;
+  return -1;
 }

 /* Hairy multiple string search. */
-static char *
-cwexec (kwset_t kws, char *text, size_t len, struct kwsmatch *kwsmatch)
+static size_t
+cwexec (kwset_t kws, char const *text, size_t len, struct kwsmatch *kwsmatch)
 {
-  struct kwset *kwset;
-  struct trie **next, *trie, *accept;
-  char *beg, *lim, *mch, *lmch;
-  register unsigned char c, *delta;
+  struct kwset const *kwset;
+  struct trie * const *next;
+  struct trie const *trie;
+  struct trie const *accept;
+  char const *beg, *lim, *mch, *lmch;
+  register unsigned char c;
+  register unsigned char const *delta;
  register int d;
-  register char *end, *qlim;
-  register struct tree *tree;
-  register char *trans;
+  register char const *end, *qlim;
+  register struct tree const *tree;
+  register char const *trans;

 #ifdef lint
  accept = NULL;
@ -601,7 +599,7 @@ cwexec (kwset_t kws, char *text, size_t len, struct kwsmatch *kwsmatch)
  /* Initialize register copies and look for easy ways out. */
  kwset = (struct kwset *) kws;
  if (len < kwset->mind)
-    return 0;
+    return -1;
  next = kwset->next;
  delta = kwset->delta;
  trans = kwset->trans;
@ -670,7 +668,7 @@ cwexec (kwset_t kws, char *text, size_t len, struct kwsmatch *kwsmatch)
      if (mch)
 	goto match;
    }
-  return 0;
+  return -1;

 match:
  /* Given a known match, find the longest possible match anchored
@ -730,10 +728,10 @@ cwexec (kwset_t kws, char *text, size_t len, struct kwsmatch *kwsmatch)
  if (kwsmatch)
    {
      kwsmatch->index = accept->accepting / 2;
-      kwsmatch->beg[0] = mch;
+      kwsmatch->offset[0] = mch - text;
      kwsmatch->size[0] = accept->depth;
    }
-  return mch;
+  return mch - text;
 }

 /* Search through the given text for a match of any member of the
@ -743,20 +741,18 @@ cwexec (kwset_t kws, char *text, size_t len, struct kwsmatch *kwsmatch)
   matching substring.  Similarly, if FOUNDIDX is non-NULL, store
   in the referenced location the index number of the particular
   keyword matched. */
-char *
-kwsexec (kwset_t kws, char *text, size_t size, struct kwsmatch *kwsmatch)
+size_t
+kwsexec (kwset_t kws, char const *text, size_t size,
+	 struct kwsmatch *kwsmatch)
 {
-  struct kwset *kwset;
-  char *ret;
-
-  kwset = (struct kwset *) kws;
+  struct kwset const *kwset = (struct kwset *) kws;
  if (kwset->words == 1 && kwset->trans == 0)
    {
-      ret = bmexec(kws, text, size);
-      if (kwsmatch != 0 && ret != 0)
+      size_t ret = bmexec (kws, text, size);
+      if (kwsmatch != 0 && ret != (size_t) -1)
 	{
 	  kwsmatch->index = 0;
-	  kwsmatch->beg[0] = ret;
+	  kwsmatch->offset[0] = ret;
 	  kwsmatch->size[0] = kwset->mind;
 	}
      return ret;
--- a/gnu/usr.bin/grep/kwset.h
+++ b/gnu/usr.bin/grep/kwset.h
@ -25,7 +25,7 @@
 struct kwsmatch
 {
  int index;			/* Index number of matching keyword. */
-  char *beg[1];			/* Begin pointer for each submatch. */
+  size_t offset[1];		/* Offset of each submatch. */
  size_t size[1];		/* Length of each submatch. */
 };

@ -35,12 +35,12 @@ typedef ptr_t kwset_t;
   if enough memory cannot be obtained.  The argument if non-NULL
   specifies a table of character translations to be applied to all
   pattern and search text. */
-extern kwset_t kwsalloc PARAMS((char *));
+extern kwset_t kwsalloc PARAMS((char const *));

 /* Incrementally extend the keyword set to include the given string.
   Return NULL for success, or an error message.  Remember an index
   number for each keyword included in the set. */
-extern char *kwsincr PARAMS((kwset_t, char *, size_t));
+extern char *kwsincr PARAMS((kwset_t, char const *, size_t));

 /* When the keyword set has been completely built, prepare it for
   use.  Return NULL for success, or an error message. */
@ -52,7 +52,7 @@ extern char *kwsprep PARAMS((kwset_t));
   the matching substring in the integer it points to.  Similarly,
   if foundindex is non-NULL, store the index of the particular
   keyword found therein. */
-extern char *kwsexec PARAMS((kwset_t, char *, size_t, struct kwsmatch *));
+extern size_t kwsexec PARAMS((kwset_t, char const *, size_t, struct kwsmatch *));

 /* Deallocate the given keyword set and all its associated storage. */
 extern void kwsfree PARAMS((kwset_t));
--- a/gnu/usr.bin/grep/search.c
+++ b/gnu/usr.bin/grep/search.c
@ -24,54 +24,71 @@
 # include <config.h>
 #endif
 #include <sys/types.h>
+#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
+/* We can handle multibyte string.  */
+# define MBS_SUPPORT
+# include <wchar.h>
+# include <wctype.h>
+#endif
+
 #include "system.h"
 #include "grep.h"
 #include "regex.h"
 #include "dfa.h"
 #include "kwset.h"
+#include "error.h"
+#include "xalloc.h"
+#ifdef HAVE_LIBPCRE
+# include <pcre.h>
+#endif

 #define NCHAR (UCHAR_MAX + 1)

-static void Gcompile PARAMS((char *, size_t));
-static void Ecompile PARAMS((char *, size_t));
-static char *EGexecute PARAMS((char *, size_t, char **));
-static void Fcompile PARAMS((char *, size_t));
-static char *Fexecute PARAMS((char *, size_t, char **));
-static void kwsinit PARAMS((void));
-
-/* Here is the matchers vector for the main program. */
-struct matcher matchers[] = {
-  { "default", Gcompile, EGexecute },
-  { "grep", Gcompile, EGexecute },
-  { "egrep", Ecompile, EGexecute },
-  { "awk", Ecompile, EGexecute },
-  { "fgrep", Fcompile, Fexecute },
-  { 0, 0, 0 },
-};
-
 /* For -w, we also consider _ to be word constituent.  */
 #define WCHAR(C) (ISALNUM(C) || (C) == '_')

 /* DFA compiled regexp. */
 static struct dfa dfa;

-/* Regex compiled regexp. */
-static struct re_pattern_buffer regexbuf;
+/* The Regex compiled patterns.  */
+static struct patterns
+{
+  /* Regex compiled regexp. */
+  struct re_pattern_buffer regexbuf;
+  struct re_registers regs; /* This is here on account of a BRAIN-DEAD
+			       Q@#%!# library interface in regex.c.  */
+} patterns0;
+
+struct patterns *patterns;
+size_t pcount;

 /* KWset compiled pattern.  For Ecompile and Gcompile, we compile
   a list of strings, at least one of which is known to occur in
   any string matching the regexp. */
 static kwset_t kwset;

-/* Last compiled fixed string known to exactly match the regexp.
-   If kwsexec() returns < lastexact, then we don't need to
+/* Number of compiled fixed strings known to exactly match the regexp.
+   If kwsexec returns < kwset_exact_matches, then we don't need to
   call the regexp matcher at all. */
-static int lastexact;
+static int kwset_exact_matches;
+
+#if defined(MBS_SUPPORT)
+static char* check_multibyte_string PARAMS ((char const *buf, size_t size));
+#endif
+static void kwsinit PARAMS ((void));
+static void kwsmusts PARAMS ((void));
+static void Gcompile PARAMS ((char const *, size_t));
+static void Ecompile PARAMS ((char const *, size_t));
+static size_t EGexecute PARAMS ((char const *, size_t, size_t *, int ));
+static void Fcompile PARAMS ((char const *, size_t));
+static size_t Fexecute PARAMS ((char const *, size_t, size_t *, int));
+static void Pcompile PARAMS ((char const *, size_t ));
+static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int));

 void
 dfaerror (char const *mesg)
 {
-  fatal(mesg, 0);
+  error (2, 0, mesg);
 }

 static void
@ -82,10 +99,10 @@ kwsinit (void)

  if (match_icase)
    for (i = 0; i < NCHAR; ++i)
-      trans[i] = TOLOWER(i);
+      trans[i] = TOLOWER (i);

-  if (!(kwset = kwsalloc(match_icase ? trans : (char *) 0)))
-    fatal("memory exhausted", 0);
+  if (!(kwset = kwsalloc (match_icase ? trans : (char *) 0)))
+    error (2, 0, _("memory exhausted"));
 }

 /* If the DFA turns out to have some set of fixed strings one of
@ -95,12 +112,12 @@ kwsinit (void)
 static void
 kwsmusts (void)
 {
-  struct dfamust *dm;
-  char *err;
+  struct dfamust const *dm;
+  char const *err;

  if (dfa.musts)
    {
-      kwsinit();
+      kwsinit ();
      /* First, we compile in the substrings known to be exact
 	 matches.  The kwset matcher will return the index
 	 of the matching string that it chooses. */
@ -108,9 +125,9 @@ kwsmusts (void)
 	{
 	  if (!dm->exact)
 	    continue;
-	  ++lastexact;
-	  if ((err = kwsincr(kwset, dm->must, strlen(dm->must))) != 0)
-	    fatal(err, 0);
+	  ++kwset_exact_matches;
+	  if ((err = kwsincr (kwset, dm->must, strlen (dm->must))) != 0)
+	    error (2, 0, err);
 	}
      /* Now, we compile the substrings that will require
 	 the use of the regexp matcher.  */
@ -118,24 +135,90 @@ kwsmusts (void)
 	{
 	  if (dm->exact)
 	    continue;
-	  if ((err = kwsincr(kwset, dm->must, strlen(dm->must))) != 0)
-	    fatal(err, 0);
+	  if ((err = kwsincr (kwset, dm->must, strlen (dm->must))) != 0)
+	    error (2, 0, err);
 	}
-      if ((err = kwsprep(kwset)) != 0)
-	fatal(err, 0);
+      if ((err = kwsprep (kwset)) != 0)
+	error (2, 0, err);
    }
 }

+#ifdef MBS_SUPPORT
+/* This function allocate the array which correspond to "buf".
+   Then this check multibyte string and mark on the positions which
+   are not singlebyte character nor the first byte of a multibyte
+   character.  Caller must free the array.  */
+static char*
+check_multibyte_string(char const *buf, size_t size)
+{
+  char *mb_properties = malloc(size);
+  mbstate_t cur_state;
+  int i;
+  memset(&cur_state, 0, sizeof(mbstate_t));
+  memset(mb_properties, 0, sizeof(char)*size);
+  for (i = 0; i < size ;)
+    {
+      size_t mbclen;
+      mbclen = mbrlen(buf + i, size - i, &cur_state);
+
+      if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
+	{
+	  /* An invalid sequence, or a truncated multibyte character.
+	     We treat it as a singlebyte character.  */
+	  mbclen = 1;
+	}
+      mb_properties[i] = mbclen;
+      i += mbclen;
+    }
+
+  return mb_properties;
+}
+#endif
+
 static void
-Gcompile (char *pattern, size_t size)
+Gcompile (char const *pattern, size_t size)
 {
  const char *err;
+  char const *sep;
+  size_t total = size;
+  char const *motif = pattern;

-  re_set_syntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
-  dfasyntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte);
+  re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
+  dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte);

-  if ((err = re_compile_pattern(pattern, size, &regexbuf)) != 0)
-    fatal(err, 0);
+  /* For GNU regex compiler we have to pass the patterns separately to detect
+     errors like "[\nallo\n]\n".  The patterns here are "[", "allo" and "]"
+     GNU regex should have raise a syntax error.  The same for backref, where
+     the backref should have been local to each pattern.  */
+  do
+    {
+      size_t len;
+      sep = memchr (motif, '\n', total);
+      if (sep)
+	{
+	  len = sep - motif;
+	  sep++;
+	  total -= (len + 1);
+	}
+      else
+	{
+	  len = total;
+	  total = 0;
+	}
+
+      patterns = realloc (patterns, (pcount + 1) * sizeof (*patterns));
+      if (patterns == NULL)
+	error (2, errno, _("memory exhausted"));
+
+      patterns[pcount] = patterns0;
+
+      if ((err = re_compile_pattern (motif, len,
+				    &(patterns[pcount].regexbuf))) != 0)
+	error (2, 0, err);
+      pcount++;
+
+      motif = sep;
+    } while (sep && total != 0);

  /* In the match_words and match_lines cases, we use a different pattern
     for the DFA matcher that will quickly throw out cases that won't work.
@ -144,49 +227,42 @@ Gcompile (char *pattern, size_t size)
  if (match_words || match_lines)
    {
      /* In the whole-word case, we use the pattern:
-	 (^|[^A-Za-z_])(userpattern)([^A-Za-z_]|$).
+	 \(^\|[^[:alnum:]_]\)\(userpattern\)\([^[:alnum:]_]|$\).
 	 In the whole-line case, we use the pattern:
-	 ^(userpattern)$.
-	 BUG: Using [A-Za-z_] is locale-dependent!
-	 So will use [:alnum:] */
+	 ^\(userpattern\)$.  */

-      char *n = malloc(size + 50);
-      int i = 0;
-
-      strcpy(n, "");
-
-      if (match_lines)
-	strcpy(n, "^\\(");
-      if (match_words)
-	strcpy(n, "\\(^\\|[^[:alnum:]_]\\)\\(");
-
-      i = strlen(n);
-      memcpy(n + i, pattern, size);
+      static char const line_beg[] = "^\\(";
+      static char const line_end[] = "\\)$";
+      static char const word_beg[] = "\\(^\\|[^[:alnum:]_]\\)\\(";
+      static char const word_end[] = "\\)\\([^[:alnum:]_]\\|$\\)";
+      char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
+      size_t i;
+      strcpy (n, match_lines ? line_beg : word_beg);
+      i = strlen (n);
+      memcpy (n + i, pattern, size);
      i += size;
-
-      if (match_words)
-	strcpy(n + i, "\\)\\([^[:alnum:]_]\\|$\\)");
-      if (match_lines)
-	strcpy(n + i, "\\)$");
-
-      i += strlen(n + i);
-      dfacomp(n, i, &dfa, 1);
+      strcpy (n + i, match_lines ? line_end : word_end);
+      i += strlen (n + i);
+      pattern = n;
+      size = i;
    }
-  else
-    dfacomp(pattern, size, &dfa, 1);

-  kwsmusts();
+  dfacomp (pattern, size, &dfa, 1);
+  kwsmusts ();
 }

 static void
-Ecompile (char *pattern, size_t size)
+Ecompile (char const *pattern, size_t size)
 {
  const char *err;
+  const char *sep;
+  size_t total = size;
+  char const *motif = pattern;

-  if (strcmp(matcher, "awk") == 0)
+  if (strcmp (matcher, "awk") == 0)
    {
-      re_set_syntax(RE_SYNTAX_AWK);
-      dfasyntax(RE_SYNTAX_AWK, match_icase, eolbyte);
+      re_set_syntax (RE_SYNTAX_AWK);
+      dfasyntax (RE_SYNTAX_AWK, match_icase, eolbyte);
    }
  else
    {
@ -194,8 +270,38 @@ Ecompile (char *pattern, size_t size)
      dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte);
    }

-  if ((err = re_compile_pattern(pattern, size, &regexbuf)) != 0)
-    fatal(err, 0);
+  /* For GNU regex compiler we have to pass the patterns separately to detect
+     errors like "[\nallo\n]\n".  The patterns here are "[", "allo" and "]"
+     GNU regex should have raise a syntax error.  The same for backref, where
+     the backref should have been local to each pattern.  */
+  do
+    {
+      size_t len;
+      sep = memchr (motif, '\n', total);
+      if (sep)
+	{
+	  len = sep - motif;
+	  sep++;
+	  total -= (len + 1);
+	}
+      else
+	{
+	  len = total;
+	  total = 0;
+	}
+
+      patterns = realloc (patterns, (pcount + 1) * sizeof (*patterns));
+      if (patterns == NULL)
+	error (2, errno, _("memory exhausted"));
+      patterns[pcount] = patterns0;
+
+      if ((err = re_compile_pattern (motif, len,
+				    &(patterns[pcount].regexbuf))) != 0)
+	error (2, 0, err);
+      pcount++;
+
+      motif = sep;
+    } while (sep && total != 0);

  /* In the match_words and match_lines cases, we use a different pattern
     for the DFA matcher that will quickly throw out cases that won't work.
@ -204,186 +310,236 @@ Ecompile (char *pattern, size_t size)
  if (match_words || match_lines)
    {
      /* In the whole-word case, we use the pattern:
-	 (^|[^A-Za-z_])(userpattern)([^A-Za-z_]|$).
+	 (^|[^[:alnum:]_])(userpattern)([^[:alnum:]_]|$).
 	 In the whole-line case, we use the pattern:
-	 ^(userpattern)$.
-	 BUG: Using [A-Za-z_] is locale-dependent!
-	 so will use the char class */
-
-      char *n = malloc(size + 50);
-      int i = 0;
-
-      strcpy(n, "");
-
-      if (match_lines)
-	strcpy(n, "^(");
-      if (match_words)
-	strcpy(n, "(^|[^[:alnum:]_])(");
+	 ^(userpattern)$.  */

+      static char const line_beg[] = "^(";
+      static char const line_end[] = ")$";
+      static char const word_beg[] = "(^|[^[:alnum:]_])(";
+      static char const word_end[] = ")([^[:alnum:]_]|$)";
+      char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
+      size_t i;
+      strcpy (n, match_lines ? line_beg : word_beg);
      i = strlen(n);
-      memcpy(n + i, pattern, size);
+      memcpy (n + i, pattern, size);
      i += size;
-
-      if (match_words)
-	strcpy(n + i, ")([^[:alnum:]_]|$)");
-      if (match_lines)
-	strcpy(n + i, ")$");
-
-      i += strlen(n + i);
-      dfacomp(n, i, &dfa, 1);
+      strcpy (n + i, match_lines ? line_end : word_end);
+      i += strlen (n + i);
+      pattern = n;
+      size = i;
    }
-  else
-    dfacomp(pattern, size, &dfa, 1);

-  kwsmusts();
+  dfacomp (pattern, size, &dfa, 1);
+  kwsmusts ();
 }

-static char *
-EGexecute (char *buf, size_t size, char **endp)
+static size_t
+EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
 {
-  register char *buflim, *beg, *end, save;
+  register char const *buflim, *beg, *end;
  char eol = eolbyte;
  int backref, start, len;
  struct kwsmatch kwsm;
-  static struct re_registers regs; /* This is static on account of a BRAIN-DEAD
-				    Q@#%!# library interface in regex.c.  */
+  size_t i;
+#ifdef MBS_SUPPORT
+  char *mb_properties = NULL;
+#endif /* MBS_SUPPORT */
+
+#ifdef MBS_SUPPORT
+  if (MB_CUR_MAX > 1 && kwset)
+    mb_properties = check_multibyte_string(buf, size);
+#endif /* MBS_SUPPORT */

  buflim = buf + size;

-  for (beg = end = buf; end < buflim; beg = end + 1)
+  for (beg = end = buf; end < buflim; beg = end)
    {
-      if (kwset)
+      if (!exact)
 	{
-	  /* Find a possible match using the KWset matcher. */
-	  beg = kwsexec(kwset, beg, buflim - beg, &kwsm);
-	  if (!beg)
-	    goto failure;
-	  /* Narrow down to the line containing the candidate, and
-	     run it through DFA. */
-	  end = memchr(beg, eol, buflim - beg);
-	  if (!end)
-	    end = buflim;
-	  while (beg > buf && beg[-1] != eol)
-	    --beg;
-	  save = *end;
-	  if (kwsm.index < lastexact)
-	    goto success;
-	  if (!dfaexec(&dfa, beg, end, 0, (int *) 0, &backref))
+	  if (kwset)
 	    {
-	      *end = save;
-	      continue;
+	      /* Find a possible match using the KWset matcher. */
+	      size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
+	      if (offset == (size_t) -1)
+		{
+#ifdef MBS_SUPPORT
+		  if (MB_CUR_MAX > 1)
+		    free(mb_properties);
+#endif
+		  return (size_t)-1;
+		}
+	      beg += offset;
+	      /* Narrow down to the line containing the candidate, and
+		 run it through DFA. */
+	      end = memchr(beg, eol, buflim - beg);
+	      end++;
+#ifdef MBS_SUPPORT
+	      if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0)
+		continue;
+#endif
+	      while (beg > buf && beg[-1] != eol)
+		--beg;
+	      if (kwsm.index < kwset_exact_matches)
+		goto success;
+	      if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
+		continue;
+	    }
+	  else
+	    {
+	      /* No good fixed strings; start with DFA. */
+	      size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref);
+	      if (offset == (size_t) -1)
+		break;
+	      /* Narrow down to the line we've found. */
+	      beg += offset;
+	      end = memchr (beg, eol, buflim - beg);
+	      end++;
+	      while (beg > buf && beg[-1] != eol)
+		--beg;
 	    }
-	  *end = save;
-	  /* Successful, no backreferences encountered. */
-	  if (!backref)
-	    goto success;
-	}
-      else
-	{
-	  /* No good fixed strings; start with DFA. */
-	  save = *buflim;
-	  beg = dfaexec(&dfa, beg, buflim, 0, (int *) 0, &backref);
-	  *buflim = save;
-	  if (!beg)
-	    goto failure;
-	  /* Narrow down to the line we've found. */
-	  end = memchr(beg, eol, buflim - beg);
-	  if (!end)
-	    end = buflim;
-	  while (beg > buf && beg[-1] != eol)
-	    --beg;
 	  /* Successful, no backreferences encountered! */
 	  if (!backref)
 	    goto success;
 	}
+      else
+	end = beg + size;
+
      /* If we've made it to this point, this means DFA has seen
 	 a probable match, and we need to run it through Regex. */
-      regexbuf.not_eol = 0;
-      if ((start = re_search(&regexbuf, beg, end - beg, 0, end - beg, &regs)) >= 0)
+      for (i = 0; i < pcount; i++)
 	{
-	  len = regs.end[0] - start;
-	  if ((!match_lines && !match_words)
-	      || (match_lines && len == end - beg))
-	    goto success;
-	  /* If -w, check if the match aligns with word boundaries.
-	     We do this iteratively because:
-	     (a) the line may contain more than one occurence of the pattern, and
-	     (b) Several alternatives in the pattern might be valid at a given
-	     point, and we may need to consider a shorter one to find a word
-	     boundary. */
-	  if (match_words)
-	    while (start >= 0)
-	      {
-		if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1]))
-		    && (len == end - beg
-			|| !WCHAR ((unsigned char) beg[start + len])))
-		  goto success;
-		if (len > 0)
+	  patterns[i].regexbuf.not_eol = 0;
+	  if (0 <= (start = re_search (&(patterns[i].regexbuf), beg,
+				       end - beg - 1, 0,
+				       end - beg - 1, &(patterns[i].regs))))
+	    {
+	      len = patterns[i].regs.end[0] - start;
+	      if (exact)
+		{
+		  *match_size = len;
+		  return start;
+		}
+	      if ((!match_lines && !match_words)
+		  || (match_lines && len == end - beg - 1))
+		goto success;
+	      /* If -w, check if the match aligns with word boundaries.
+		 We do this iteratively because:
+		 (a) the line may contain more than one occurence of the
+		 pattern, and
+		 (b) Several alternatives in the pattern might be valid at a
+		 given point, and we may need to consider a shorter one to
+		 find a word boundary.  */
+	      if (match_words)
+		while (start >= 0)
 		  {
-		    /* Try a shorter length anchored at the same place. */
-		    --len;
-		    regexbuf.not_eol = 1;
-		    len = re_match(&regexbuf, beg, start + len, start, &regs);
+		    if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1]))
+			&& (len == end - beg - 1
+			    || !WCHAR ((unsigned char) beg[start + len])))
+		      goto success;
+		    if (len > 0)
+		      {
+			/* Try a shorter length anchored at the same place. */
+			--len;
+			patterns[i].regexbuf.not_eol = 1;
+			len = re_match (&(patterns[i].regexbuf), beg,
+					start + len, start,
+					&(patterns[i].regs));
+		      }
+		    if (len <= 0)
+		      {
+			/* Try looking further on. */
+			if (start == end - beg - 1)
+			  break;
+			++start;
+			patterns[i].regexbuf.not_eol = 0;
+			start = re_search (&(patterns[i].regexbuf), beg,
+					   end - beg - 1,
+					   start, end - beg - 1 - start,
+					   &(patterns[i].regs));
+			len = patterns[i].regs.end[0] - start;
+		      }
 		  }
-		if (len <= 0)
-		  {
-		    /* Try looking further on. */
-		    if (start == end - beg)
-		      break;
-		    ++start;
-		    regexbuf.not_eol = 0;
-		    start = re_search(&regexbuf, beg, end - beg,
-				      start, end - beg - start, &regs);
-		    len = regs.end[0] - start;
-		  }
-	      }
-	}
-    }
-
- failure:
-  return 0;
+	    }
+	} /* for Regex patterns.  */
+    } /* for (beg = end ..) */
+#ifdef MBS_SUPPORT
+  if (MB_CUR_MAX > 1 && mb_properties)
+    free (mb_properties);
+#endif /* MBS_SUPPORT */
+  return (size_t) -1;

 success:
-  *endp = end < buflim ? end + 1 : end;
-  return beg;
+#ifdef MBS_SUPPORT
+  if (MB_CUR_MAX > 1 && mb_properties)
+    free (mb_properties);
+#endif /* MBS_SUPPORT */
+  *match_size = end - beg;
+  return beg - buf;
 }

 static void
-Fcompile (char *pattern, size_t size)
+Fcompile (char const *pattern, size_t size)
 {
-  char *beg, *lim, *err;
+  char const *beg, *lim, *err;

-  kwsinit();
+  kwsinit ();
  beg = pattern;
  do
    {
      for (lim = beg; lim < pattern + size && *lim != '\n'; ++lim)
 	;
-      if ((err = kwsincr(kwset, beg, lim - beg)) != 0)
-	fatal(err, 0);
+      if ((err = kwsincr (kwset, beg, lim - beg)) != 0)
+	error (2, 0, err);
      if (lim < pattern + size)
 	++lim;
      beg = lim;
    }
  while (beg < pattern + size);

-  if ((err = kwsprep(kwset)) != 0)
-    fatal(err, 0);
+  if ((err = kwsprep (kwset)) != 0)
+    error (2, 0, err);
 }

-static char *
-Fexecute (char *buf, size_t size, char **endp)
+static size_t
+Fexecute (char const *buf, size_t size, size_t *match_size, int exact)
 {
-  register char *beg, *try, *end;
+  register char const *beg, *try, *end;
  register size_t len;
  char eol = eolbyte;
  struct kwsmatch kwsmatch;
+#ifdef MBS_SUPPORT
+  char *mb_properties;
+  if (MB_CUR_MAX > 1)
+    mb_properties = check_multibyte_string (buf, size);
+#endif /* MBS_SUPPORT */

  for (beg = buf; beg <= buf + size; ++beg)
    {
-      if (!(beg = kwsexec(kwset, beg, buf + size - beg, &kwsmatch)))
-	return 0;
+      size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
+      if (offset == (size_t) -1)
+	{
+#ifdef MBS_SUPPORT
+	  if (MB_CUR_MAX > 1)
+	    free(mb_properties);
+#endif /* MBS_SUPPORT */
+	  return offset;
+	}
+#ifdef MBS_SUPPORT
+      if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
+	continue; /* It is a part of multibyte character.  */
+#endif /* MBS_SUPPORT */
+      beg += offset;
      len = kwsmatch.size[0];
+      if (exact)
+	{
+	  *match_size = len;
+#ifdef MBS_SUPPORT
+	  if (MB_CUR_MAX > 1)
+	    free (mb_properties);
+#endif /* MBS_SUPPORT */
+	  return beg - buf;
+	}
      if (match_lines)
 	{
 	  if (beg > buf && beg[-1] != eol)
@ -393,13 +549,22 @@ Fexecute (char *buf, size_t size, char **endp)
 	  goto success;
 	}
      else if (match_words)
-	for (try = beg; len && try;)
+	for (try = beg; len; )
 	  {
 	    if (try > buf && WCHAR((unsigned char) try[-1]))
 	      break;
 	    if (try + len < buf + size && WCHAR((unsigned char) try[len]))
 	      {
-		try = kwsexec(kwset, beg, --len, &kwsmatch);
+		offset = kwsexec (kwset, beg, --len, &kwsmatch);
+		if (offset == (size_t) -1)
+		  {
+#ifdef MBS_SUPPORT
+		    if (MB_CUR_MAX > 1)
+		      free (mb_properties);
+#endif /* MBS_SUPPORT */
+		    return offset;
+		  }
+		try = beg + offset;
 		len = kwsmatch.size[0];
 	      }
 	    else
@ -409,15 +574,153 @@ Fexecute (char *buf, size_t size, char **endp)
 	goto success;
    }

-  return 0;
+#ifdef MBS_SUPPORT
+  if (MB_CUR_MAX > 1)
+    free (mb_properties);
+#endif /* MBS_SUPPORT */
+  return -1;

 success:
-  if ((end = memchr(beg + len, eol, (buf + size) - (beg + len))) != 0)
-    ++end;
-  else
-    end = buf + size;
-  *endp = end;
-  while (beg > buf && beg[-1] != '\n')
+  end = memchr (beg + len, eol, (buf + size) - (beg + len));
+  end++;
+  while (buf < beg && beg[-1] != eol)
    --beg;
-  return beg;
+  *match_size = end - beg;
+#ifdef MBS_SUPPORT
+  if (MB_CUR_MAX > 1)
+    free (mb_properties);
+#endif /* MBS_SUPPORT */
+  return beg - buf;
 }
+
+#if HAVE_LIBPCRE
+/* Compiled internal form of a Perl regular expression.  */
+static pcre *cre;
+
+/* Additional information about the pattern.  */
+static pcre_extra *extra;
+#endif
+
+static void
+Pcompile (char const *pattern, size_t size)
+{
+#if !HAVE_LIBPCRE
+  error (2, 0, _("The -P option is not supported"));
+#else
+  int e;
+  char const *ep;
+  char *re = xmalloc (4 * size + 7);
+  int flags = PCRE_MULTILINE | (match_icase ? PCRE_CASELESS : 0);
+  char const *patlim = pattern + size;
+  char *n = re;
+  char const *p;
+  char const *pnul;
+
+  /* FIXME: Remove this restriction.  */
+  if (eolbyte != '\n')
+    error (2, 0, _("The -P and -z options cannot be combined"));
+
+  *n = '\0';
+  if (match_lines)
+    strcpy (n, "^(");
+  if (match_words)
+    strcpy (n, "\\b(");
+  n += strlen (n);
+
+  /* The PCRE interface doesn't allow NUL bytes in the pattern, so
+     replace each NUL byte in the pattern with the four characters
+     "\000", removing a preceding backslash if there are an odd
+     number of backslashes before the NUL.
+
+     FIXME: This method does not work with some multibyte character
+     encodings, notably Shift-JIS, where a multibyte character can end
+     in a backslash byte.  */
+  for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1)
+    {
+      memcpy (n, p, pnul - p);
+      n += pnul - p;
+      for (p = pnul; pattern < p && p[-1] == '\\'; p--)
+	continue;
+      n -= (pnul - p) & 1;
+      strcpy (n, "\\000");
+      n += 4;
+    }
+
+  memcpy (n, p, patlim - p);
+  n += patlim - p;
+  *n = '\0';
+  if (match_words)
+    strcpy (n, ")\\b");
+  if (match_lines)
+    strcpy (n, ")$");
+
+  cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ());
+  if (!cre)
+    error (2, 0, ep);
+
+  extra = pcre_study (cre, 0, &ep);
+  if (ep)
+    error (2, 0, ep);
+
+  free (re);
+#endif
+}
+
+static size_t
+Pexecute (char const *buf, size_t size, size_t *match_size, int exact)
+{
+#if !HAVE_LIBPCRE
+  abort ();
+  return -1;
+#else
+  /* This array must have at least two elements; everything after that
+     is just for performance improvement in pcre_exec.  */
+  int sub[300];
+
+  int e = pcre_exec (cre, extra, buf, size, 0, 0,
+		     sub, sizeof sub / sizeof *sub);
+
+  if (e <= 0)
+    {
+      switch (e)
+	{
+	case PCRE_ERROR_NOMATCH:
+	  return -1;
+
+	case PCRE_ERROR_NOMEMORY:
+	  error (2, 0, _("Memory exhausted"));
+
+	default:
+	  abort ();
+	}
+    }
+  else
+    {
+      /* Narrow down to the line we've found.  */
+      char const *beg = buf + sub[0];
+      char const *end = buf + sub[1];
+      char const *buflim = buf + size;
+      char eol = eolbyte;
+      if (!exact)
+	{
+	  end = memchr (end, eol, buflim - end);
+	  end++;
+	  while (buf < beg && beg[-1] != eol)
+	    --beg;
+	}
+
+      *match_size = end - beg;
+      return beg - buf;
+    }
+#endif
+}
+
+struct matcher const matchers[] = {
+  { "default", Gcompile, EGexecute },
+  { "grep", Gcompile, EGexecute },
+  { "egrep", Ecompile, EGexecute },
+  { "awk", Ecompile, EGexecute },
+  { "fgrep", Fcompile, Fexecute },
+  { "perl", Pcompile, Pexecute },
+  { "", 0, 0 },
+};