mirror of
https://git.FreeBSD.org/ports.git
synced 2024-11-04 22:33:27 +00:00
b4caaca0b1
this is the LPC interface to GNU regexp by Robert Leslie <rob@ccs.neu.edu> and is used by the upcoming dgd-lpmoo port
534 lines
14 KiB
Plaintext
534 lines
14 KiB
Plaintext
*** src.rgx/config.c Thu Jan 2 23:34:31 1997
|
|
--- config.c Thu Jan 2 23:51:21 1997
|
|
***************
|
|
*** 19,24 ****
|
|
--- 19,25 ----
|
|
# include "compile.h"
|
|
# include "csupport.h"
|
|
# include "table.h"
|
|
+ # include "rgx.h"
|
|
|
|
typedef struct {
|
|
char *name; /* name of the option */
|
|
***************
|
|
*** 810,815 ****
|
|
--- 811,819 ----
|
|
|
|
/* initialize interpreter */
|
|
i_init(conf[CREATE].u.str);
|
|
+
|
|
+ /* initialize regular expressions */
|
|
+ rgx_init();
|
|
|
|
/* initialize compiler */
|
|
c_init(conf[AUTO_OBJECT].u.str,
|
|
*** src.rgx/kfun/extra.c Tue Sep 27 09:28:26 1994
|
|
--- kfun/extra.c Thu Feb 2 22:25:18 1995
|
|
***************
|
|
*** 560,562 ****
|
|
--- 560,640 ----
|
|
error("Not yet implemented");
|
|
}
|
|
# endif
|
|
+
|
|
+
|
|
+ # ifdef FUNCDEF
|
|
+ FUNCDEF("regexp_compile", kf_regexp_compile, p_regexp_compile)
|
|
+ # else
|
|
+ char p_regexp_compile[] = { C_TYPECHECKED | C_STATIC | C_VARARGS,
|
|
+ T_STRING | (1 << REFSHIFT), 2, T_STRING, T_INT };
|
|
+
|
|
+ /*
|
|
+ * NAME: kfun->regexp_compile()
|
|
+ * DESCRIPTION: compile a regexp pattern
|
|
+ */
|
|
+ int kf_regexp_compile(nargs)
|
|
+ int nargs;
|
|
+ {
|
|
+ int case_matters;
|
|
+ array *compiled;
|
|
+
|
|
+ if (nargs < 1)
|
|
+ return -1;
|
|
+
|
|
+ case_matters = (nargs == 2 ? ! (sp++)->u.number : 1);
|
|
+
|
|
+ compiled = rgx_new(sp->u.string, case_matters);
|
|
+
|
|
+ str_del(sp->u.string);
|
|
+ sp->type = T_ARRAY;
|
|
+ arr_ref(sp->u.array = compiled);
|
|
+
|
|
+ return 0;
|
|
+ }
|
|
+ # endif
|
|
+
|
|
+
|
|
+ # ifdef FUNCDEF
|
|
+ FUNCDEF("regexp_match", kf_regexp_match, p_regexp_match)
|
|
+ # else
|
|
+ char p_regexp_match[] = { C_TYPECHECKED | C_STATIC | C_VARARGS,
|
|
+ T_INT | (1 << REFSHIFT), 3,
|
|
+ T_STRING | (1 << REFSHIFT), T_STRING, T_INT };
|
|
+
|
|
+ /*
|
|
+ * NAME: kfun->regexp_match()
|
|
+ * DESCRIPTION: perform regexp matching with a previously compiled pattern
|
|
+ */
|
|
+ int kf_regexp_match(nargs)
|
|
+ int nargs;
|
|
+ {
|
|
+ int reverse;
|
|
+ string *subject;
|
|
+ array *compiled, *result;
|
|
+
|
|
+ if (nargs < 2)
|
|
+ return -1;
|
|
+
|
|
+ reverse = (nargs == 3 ? (sp++)->u.number : 0);
|
|
+ subject = sp->u.string;
|
|
+ compiled = sp[1].u.array;
|
|
+
|
|
+ if (compiled->size != 3)
|
|
+ return 1;
|
|
+
|
|
+ result = rgx_match(d_get_elts(compiled), subject, reverse);
|
|
+
|
|
+ str_del((sp++)->u.string);
|
|
+ arr_del(sp->u.array);
|
|
+
|
|
+ if (result == (array *) 0)
|
|
+ {
|
|
+ sp->type = T_INT;
|
|
+ sp->u.number = 0;
|
|
+ }
|
|
+ else
|
|
+ arr_ref(sp->u.array = result);
|
|
+
|
|
+ return 0;
|
|
+ }
|
|
+ # endif
|
|
*** src.rgx/kfun/kfun.h Sun May 8 08:15:01 1994
|
|
--- kfun/kfun.h Thu Feb 2 22:25:18 1995
|
|
***************
|
|
*** 5,7 ****
|
|
--- 5,8 ----
|
|
# include "xfloat.h"
|
|
# include "interpret.h"
|
|
# include "data.h"
|
|
+ # include "rgx.h"
|
|
*** src.rgx/rgx.c Thu Jan 2 21:41:55 1997
|
|
--- rgx.c Thu Jan 2 21:17:46 1997
|
|
***************
|
|
*** 0 ****
|
|
--- 1,213 ----
|
|
+ # include "dgd.h"
|
|
+ # include "str.h"
|
|
+ # include "array.h"
|
|
+ # include "interpret.h"
|
|
+ # include <gnuregex.h>
|
|
+ # include "rgx.h"
|
|
+ # include <memory.h>
|
|
+
|
|
+ static char trans_table[256];
|
|
+
|
|
+ /*
|
|
+ * NAME: regexp->init()
|
|
+ * DESCRIPTION: initialize regexp handling
|
|
+ */
|
|
+ void rgx_init()
|
|
+ {
|
|
+ register int i;
|
|
+
|
|
+ for (i = 0; i < 256; ++i)
|
|
+ trans_table[i] = i;
|
|
+ for (i = 'a'; i <= 'z'; ++i)
|
|
+ trans_table[i] = i + 'A' - 'a';
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * NAME: regexp->new()
|
|
+ * DESCRIPTION: create a new regexp buffer
|
|
+ */
|
|
+ array *rgx_new(pattern, case_matters)
|
|
+ string *pattern;
|
|
+ int case_matters;
|
|
+ {
|
|
+ char *translate;
|
|
+ struct re_pattern_buffer patbuf;
|
|
+ char fastmap[256];
|
|
+ const char *compile_error;
|
|
+ array *result;
|
|
+ register value *v;
|
|
+ string *s;
|
|
+
|
|
+ translate = (case_matters ? (char *) 0 : trans_table);
|
|
+
|
|
+ patbuf.buffer = 0;
|
|
+ patbuf.allocated = 0;
|
|
+ patbuf.used = 0;
|
|
+
|
|
+ patbuf.fastmap = fastmap;
|
|
+ patbuf.translate = translate;
|
|
+
|
|
+ patbuf.fastmap_accurate = 0;
|
|
+
|
|
+ {
|
|
+ int i;
|
|
+ long n = 0;
|
|
+ for (i = 0; i < pattern->len; i++) {
|
|
+ switch (pattern->text[i]) {
|
|
+ case '[':
|
|
+ if (pattern->text[++i] == '^')
|
|
+ i++;
|
|
+ for (i++; i < pattern->len; i++)
|
|
+ if (pattern->text[i] == ']')
|
|
+ break;
|
|
+ break;
|
|
+ case '%':
|
|
+ pattern->text[i++] = '\\'; /* skip escaped char */
|
|
+ break;
|
|
+ case '\\':
|
|
+ pattern->text[i] == '%'; /* mark for expansion */
|
|
+ n++;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ if (n) {
|
|
+ int j;
|
|
+
|
|
+ s = str_new(NULL, pattern->len + n);
|
|
+ for (i = j = 0; i < pattern->len; i++, j++) {
|
|
+ switch (pattern->text[i]) {
|
|
+ case '[':
|
|
+ s->text[j++] = pattern->text[i++];
|
|
+ if (i == pattern->len)
|
|
+ goto breakout;
|
|
+ if (pattern->text[i] == '^') {
|
|
+ s->text[j++] = pattern->text[i++];
|
|
+ if (i == pattern->len)
|
|
+ goto breakout;
|
|
+ }
|
|
+ s->text[j++] = pattern->text[i++];
|
|
+ if (i == pattern->len)
|
|
+ goto breakout;
|
|
+ for ( ; i < pattern->len; i++, j++) {
|
|
+ if ((s->text[j] = pattern->text[i]) == ']')
|
|
+ break;
|
|
+ }
|
|
+ break;
|
|
+ case '%': /* expand */
|
|
+ s->text[j++] = '\\';
|
|
+ s->text[j] = '\\';
|
|
+ break;
|
|
+ case '\\': /* skip escaped char */
|
|
+ s->text[j++] = pattern->text[i++];
|
|
+ if (i == pattern->len)
|
|
+ goto breakout;
|
|
+ /* fallthru */
|
|
+ default:
|
|
+ s->text[j] = pattern->text[i];
|
|
+ }
|
|
+ }
|
|
+ breakout:
|
|
+ }
|
|
+ }
|
|
+ compile_error = re_compile_pattern(s->text, s->len, &patbuf);
|
|
+ str_del(s);
|
|
+ if (compile_error != (char *) 0)
|
|
+ {
|
|
+ regfree(&patbuf);
|
|
+ error(compile_error);
|
|
+ }
|
|
+
|
|
+ re_compile_fastmap(&patbuf);
|
|
+
|
|
+ result = arr_new(3L);
|
|
+ v = result->elts;
|
|
+
|
|
+ v->type = T_STRING;
|
|
+ str_ref(v->u.string = str_new((char *) &patbuf, (long) sizeof(patbuf)));
|
|
+ ++v;
|
|
+ v->type = T_STRING;
|
|
+ str_ref(v->u.string = str_new((char *) patbuf.buffer,
|
|
+ (long) patbuf.allocated));
|
|
+ ++v;
|
|
+ v->type = T_STRING;
|
|
+ str_ref(v->u.string = str_new(fastmap, 256L));
|
|
+
|
|
+ /* don't let regfree() try to free these */
|
|
+ patbuf.fastmap = 0;
|
|
+ patbuf.translate = 0;
|
|
+
|
|
+ regfree(&patbuf);
|
|
+
|
|
+ return result;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * NAME: regexp->match()
|
|
+ * DESCRIPTION: perform regexp matching, given a pattern and subject string
|
|
+ */
|
|
+ array *rgx_match(pattern, subject, reverse)
|
|
+ value *pattern;
|
|
+ string *subject;
|
|
+ int reverse;
|
|
+ {
|
|
+ long sub_len;
|
|
+ struct re_pattern_buffer patbuf;
|
|
+ struct re_registers regs;
|
|
+ regoff_t starts[RGX_NREGS + 1], ends[RGX_NREGS + 1];
|
|
+ array *result;
|
|
+ register value *v;
|
|
+ register int i;
|
|
+
|
|
+ if (pattern[0].u.string->len != sizeof(struct re_pattern_buffer))
|
|
+ error("Invalid compiled pattern");
|
|
+
|
|
+ memcpy((char *) &patbuf, pattern[0].u.string->text,
|
|
+ sizeof(struct re_pattern_buffer));
|
|
+
|
|
+ if (patbuf.allocated != (unsigned long) pattern[1].u.string->len ||
|
|
+ pattern[2].u.string->len != 256)
|
|
+ error("Invalid compiled pattern");
|
|
+
|
|
+ patbuf.buffer = (unsigned char *) pattern[1].u.string->text;
|
|
+ patbuf.fastmap = pattern[2].u.string->text;
|
|
+
|
|
+ regs.num_regs = RGX_NREGS;
|
|
+ regs.start = starts;
|
|
+ regs.end = ends;
|
|
+ patbuf.regs_allocated = REGS_FIXED;
|
|
+
|
|
+ sub_len = subject->len;
|
|
+ if (re_search(&patbuf, subject->text, sub_len, reverse ? sub_len : 0,
|
|
+ reverse ? -(sub_len + 1) : sub_len + 1, ®s) == -1)
|
|
+ return (array *) 0;
|
|
+
|
|
+ result = arr_new((long) RGX_NREGS * 2);
|
|
+ v = result->elts;
|
|
+
|
|
+ v->type = T_INT;
|
|
+ v->u.number = starts[0];
|
|
+ ++v;
|
|
+
|
|
+ v->type = T_INT;
|
|
+ v->u.number = ends[0] - 1;
|
|
+ ++v;
|
|
+
|
|
+ for (i = 1; i < RGX_NREGS; ++i, v += 2)
|
|
+ {
|
|
+ v[0].type = T_INT;
|
|
+ v[1].type = T_INT;
|
|
+
|
|
+ if (starts[i] == -1)
|
|
+ {
|
|
+ v[0].u.number = 0;
|
|
+ v[1].u.number = -1;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ v[0].u.number = starts[i];
|
|
+ v[1].u.number = ends[i] - 1;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return result;
|
|
+ }
|
|
*** src.rgx/rgx.h Thu Jan 2 21:42:05 1997
|
|
--- rgx.h Fri Feb 3 03:09:54 1995
|
|
***************
|
|
*** 0 ****
|
|
--- 1,5 ----
|
|
+ # define RGX_NREGS 10
|
|
+
|
|
+ extern void rgx_init P((void));
|
|
+ extern array *rgx_new P((string*, int));
|
|
+ extern array *rgx_match P((value*, string*, int));
|
|
*** doc.rgx/example.c Thu Jan 1 00:00:00 1970
|
|
--- ../doc/rgx_example.c Fri Feb 3 03:30:01 1995
|
|
***************
|
|
*** 0 ****
|
|
--- 1,49 ----
|
|
+ /*
|
|
+ * This file shows how an interface can be built to cache regexp patterns
|
|
+ * and ultimately provide a more streamlined interface to the regexp kfuns.
|
|
+ *
|
|
+ * Note that since regexp_match() severely depends on the return result from
|
|
+ * regexp_compile() being unaltered, it is a good idea to provide an
|
|
+ * interface like this, and also to mask the regexp_match() kfun from the
|
|
+ * auto object.
|
|
+ */
|
|
+
|
|
+ # define CACHE_SIZE 10
|
|
+
|
|
+ private mapping cache;
|
|
+ private string *list;
|
|
+ private string last_pattern;
|
|
+
|
|
+ static
|
|
+ void create(void)
|
|
+ {
|
|
+ cache = ([ ]);
|
|
+ list = ({ });
|
|
+ }
|
|
+
|
|
+ int *match(string subject, string pattern)
|
|
+ {
|
|
+ string *buffer;
|
|
+
|
|
+ if ((buffer = cache[pattern]) == 0)
|
|
+ {
|
|
+ buffer = regexp_compile(pattern);
|
|
+
|
|
+ if (sizeof(list) >= CACHE_SIZE)
|
|
+ {
|
|
+ cache[list[0]] = 0;
|
|
+ list = list[1 ..] + ({ pattern });
|
|
+ }
|
|
+ else
|
|
+ list += ({ pattern });
|
|
+
|
|
+ cache[pattern] = buffer;
|
|
+ }
|
|
+ else if (pattern != last_pattern)
|
|
+ {
|
|
+ list = list - ({ pattern }) + ({ pattern });
|
|
+ last_pattern = pattern;
|
|
+ }
|
|
+
|
|
+ return regexp_match(buffer, subject);
|
|
+ }
|
|
diff -crN doc.rgx/kfun/regexp_compile doc/kfun/regexp_compile
|
|
*** doc.rgx/kfun/regexp_compile Thu Jan 1 00:00:00 1970
|
|
--- ../doc/kfun/regexp_compile Tue Jul 26 00:02:34 1994
|
|
***************
|
|
*** 0 ****
|
|
--- 1,27 ----
|
|
+ NAME
|
|
+ regexp_compile - compile a regular expression
|
|
+
|
|
+ SYNOPSIS
|
|
+ varargs string *regexp_compile(string pattern, int case_insensitive)
|
|
+
|
|
+ DESCRIPTION
|
|
+ The argument pattern is compiled as a regular expression. If the
|
|
+ argument case_insensitive is nonzero, the pattern is compiled in
|
|
+ such a way that subsequent matching will be done without case
|
|
+ sensitivity. The default is to be case-sensitive.
|
|
+
|
|
+ An array of strings is returned; these strings contain binary
|
|
+ data and must not be altered in any way before being passed to
|
|
+ regexp_match().
|
|
+
|
|
+ The compiled regexp can be saved and used any number of times with
|
|
+ regexp_match().
|
|
+
|
|
+ ERRORS
|
|
+ If the argument pattern contains a syntactically malformed regular
|
|
+ expression, an error will result. An error can also occur if the
|
|
+ pattern is too complicated, or if there is not enough memory to
|
|
+ compile the pattern.
|
|
+
|
|
+ SEE ALSO
|
|
+ kfun/regexp_match
|
|
*** doc.rgx/kfun/regexp_match Thu Jan 1 00:00:00 1970
|
|
--- ../doc/kfun/regexp_match Mon Jul 25 22:19:42 1994
|
|
***************
|
|
*** 0 ****
|
|
--- 1,34 ----
|
|
+ NAME
|
|
+ regexp_match - perform regular expression matching
|
|
+
|
|
+ SYNOPSIS
|
|
+ varargs int *regexp_match(string *pattern, string subject, int reverse)
|
|
+
|
|
+ DESCRIPTION
|
|
+ The argument subject is matched against the compiled regular
|
|
+ expression pattern. If the argument reverse is nonzero, matching
|
|
+ is performed from right-to-left; otherwise, matching is performed
|
|
+ left-to-right.
|
|
+
|
|
+ The pattern argument must be an array of strings exactly as it
|
|
+ was received from regexp_compile(); otherwise, the result of
|
|
+ calling this function is undefined.
|
|
+
|
|
+ If the argument subject could not be matched with the regular
|
|
+ expression, 0 is returned. Otherwise, an array of 20 integers
|
|
+ is returned with this format:
|
|
+
|
|
+ ({ start0, end0, start1, end1, ..., start9, end9 })
|
|
+
|
|
+ Each element is a character index into the subject string. The
|
|
+ first two elements, start0 and end0, indicate the part of the subject
|
|
+ that was matched by the regular expression as a whole. The following
|
|
+ elements indicate the starting and ending indices of each
|
|
+ subexpression (denoted by "%(" and "%)" pairs in the original
|
|
+ pattern) that were matched.
|
|
+
|
|
+ If any subexpression was not matched, the corresponding start and
|
|
+ end elements will be 0 and -1, respectively.
|
|
+
|
|
+ SEE ALSO
|
|
+ kfun/regexp_compile
|
|
*** doc.rgx/regexps Thu Jan 1 00:00:00 1970
|
|
--- ../doc/regexps Mon Jul 25 22:58:57 1994
|
|
***************
|
|
*** 0 ****
|
|
--- 1,32 ----
|
|
+
|
|
+ Regular expressions are composed of the following operators:
|
|
+
|
|
+ . Match any single character
|
|
+ XY Match X immediately followed by Y
|
|
+ X* Match zero-or-more of X
|
|
+ X+ Match one-or-more of X
|
|
+ X? Match zero-or-one of X
|
|
+ X%|Y Match either X or Y
|
|
+ [charset] Match any single character in `charset'
|
|
+ [^charset] Match any single character not in `charset'
|
|
+ %(X%) Match X, but also remember the match as a subexpression
|
|
+ %digit Match the numbered previous subexpression
|
|
+ ^X Match X anchored at the beginning of a line
|
|
+ X$ Match X anchored at the end of a line
|
|
+ %b Match the empty string at the beginning or end of a word
|
|
+ %B Match the empty string only within the middle of a word
|
|
+ %< Match the beginning of a word
|
|
+ %> Match the end of a word
|
|
+ %w Match any word-constituent character
|
|
+ %W Match any character that is not word-constituent
|
|
+
|
|
+ Any other character in a regular expression is matched literally with itself.
|
|
+ To match any of the special operator characters .*+?%[^$ literally, precede
|
|
+ the character with `%'.
|
|
+
|
|
+ A `charset' is formed by listing all desired characters with brackets. To
|
|
+ include a literal `^' in a charset, do not list it in the first position. To
|
|
+ include a literal `]', list it immediately after the opening `[' or `[^'. All
|
|
+ characters are non-special (and should not be escaped) within a charset,
|
|
+ except `-', which denotes a character range. To include a literal `-', list it
|
|
+ either first or last.
|
|
*** README.rgx.old Fri Jan 3 03:17:21 1997
|
|
--- ../README.rgx Fri Jan 3 03:14:29 1997
|
|
***************
|
|
*** 0 ****
|
|
--- 1,18 ----
|
|
+ dgd-rgx was written by Robert Leslie <rob@ccs.neu.edu> as an LPC interface to
|
|
+ GNU regex, adding two kfuns to DGD for regular expression matching:
|
|
+
|
|
+ regexp_compile()
|
|
+ regexp_match()
|
|
+
|
|
+ For a description of the regular expression language accepted by these kfuns,
|
|
+ please read doc/regexps.
|
|
+
|
|
+ Complete details for the two kfuns can be found in the doc/kfun directory.
|
|
+
|
|
+ Adapted by Adam David <adam@veda.is> for DGD 1.0.97 and to use the unmodified
|
|
+ GNU regexp library.
|
|
+
|
|
+ This software is a modification of DGD, and is therefore protected by the
|
|
+ DGD Copyright.
|
|
+
|
|
+ There is no warranty for this software.
|