1
0
mirror of https://git.FreeBSD.org/ports.git synced 2024-12-25 04:43:33 +00:00

added kfuns regexp_compile() and regexp_match()

this is the LPC interface to GNU regexp by Robert Leslie <rob@ccs.neu.edu>
and is used by the upcoming dgd-lpmoo port
This commit is contained in:
Adam David 1997-01-03 04:03:04 +00:00
parent 55f96bbeb9
commit b4caaca0b1
Notes: svn2git 2021-03-31 03:12:20 +00:00
svn path=/head/; revision=5178
3 changed files with 569 additions and 13 deletions

View File

@ -1,7 +1,7 @@
*** Makefile.orig Sun Dec 10 19:21:36 1995
--- Makefile Wed Feb 7 22:53:33 1996
*** Makefile.orig Thu Jan 2 23:38:50 1997
--- Makefile Thu Jan 2 23:41:13 1997
***************
*** 3,14 ****
*** 3,24 ****
#
HOST= NETBSD
DEFINES=-D$(HOST) #-DDUMP_FUNCS
@ -14,7 +14,17 @@
LD= $(CC)
DMAKE= make
BIN= ../bin
--- 3,15 ----
OBJ= alloc.o error.o hash.o swap.o str.o array.o object.o data.o path.o \
! editor.o comm.net.o call_out.o interpret.o config.o dgd.o
EDOBJ= alloc.o error.o
LEXOBJ= alloc.o hash.o
COMPOBJ=alloc.o error.o hash.o path.o str.o array.o object.o data.o \
! interpret.o config.o
a.out: $(OBJ) always
cd comp; $(MAKE) 'DMAKE=$(DMAKE)' 'CC=$(CC)' 'CCFLAGS=$(CCFLAGS)' dgd
--- 3,27 ----
#
HOST= NETBSD
DEFINES=-D$(HOST) #-DDUMP_FUNCS
@ -23,22 +33,23 @@
CCFLAGS=$(DEFINES) $(DEBUG)
CFLAGS= -I. -Icomp -Ilex -Ied -Ikfun $(CCFLAGS)
! LDFLAGS=-s
! LIBS=-lcrypt
! LIBS=-lcrypt -lgnuregex
! CC= cc
LD= $(CC)
DMAKE= make
BIN= ../bin
***************
*** 20,25 ****
--- 21,28 ----
COMPOBJ=alloc.o error.o hash.o path.o str.o array.o object.o data.o \
interpret.o config.o
+ all: a.out comp/a.out
+
OBJ= alloc.o error.o hash.o swap.o str.o array.o object.o data.o path.o \
! editor.o comm.net.o call_out.o interpret.o config.o dgd.o rgx.o
EDOBJ= alloc.o error.o
LEXOBJ= alloc.o hash.o
COMPOBJ=alloc.o error.o hash.o path.o str.o array.o object.o data.o \
! interpret.o config.o rgx.o
!
! all: a.out comp/a.out
a.out: $(OBJ) always
cd comp; $(MAKE) 'DMAKE=$(DMAKE)' 'CC=$(CC)' 'CCFLAGS=$(CCFLAGS)' dgd
cd lex; $(MAKE) 'DMAKE=$(DMAKE)' 'CC=$(CC)' 'CCFLAGS=$(CCFLAGS)' dgd
***************
*** 38,44 ****
-mv $(BIN)/driver $(BIN)/driver.old
@ -77,3 +88,10 @@
clean:
rm -f a.out $(OBJ) comp.sub lex.sub ed.sub
***************
*** 92,94 ****
--- 99,102 ----
call_out.o config.o dgd.o: call_out.h
error.o comm.o call_out.o config.o dgd.o: comm.h
config.o: version.h
+ rgx.o: str.h array.h rgx.h interpret.h

533
net/dgd-net/files/patch-ad Normal file
View File

@ -0,0 +1,533 @@
*** src.rgx/config.c Thu Jan 2 23:34:31 1997
--- config.c Thu Jan 2 23:51:21 1997
***************
*** 19,24 ****
--- 19,25 ----
# include "compile.h"
# include "csupport.h"
# include "table.h"
+ # include "rgx.h"
typedef struct {
char *name; /* name of the option */
***************
*** 810,815 ****
--- 811,819 ----
/* initialize interpreter */
i_init(conf[CREATE].u.str);
+
+ /* initialize regular expressions */
+ rgx_init();
/* initialize compiler */
c_init(conf[AUTO_OBJECT].u.str,
*** src.rgx/kfun/extra.c Tue Sep 27 09:28:26 1994
--- kfun/extra.c Thu Feb 2 22:25:18 1995
***************
*** 560,562 ****
--- 560,640 ----
error("Not yet implemented");
}
# endif
+
+
+ # ifdef FUNCDEF
+ FUNCDEF("regexp_compile", kf_regexp_compile, p_regexp_compile)
+ # else
+ char p_regexp_compile[] = { C_TYPECHECKED | C_STATIC | C_VARARGS,
+ T_STRING | (1 << REFSHIFT), 2, T_STRING, T_INT };
+
+ /*
+ * NAME: kfun->regexp_compile()
+ * DESCRIPTION: compile a regexp pattern
+ */
+ int kf_regexp_compile(nargs)
+ int nargs;
+ {
+ int case_matters;
+ array *compiled;
+
+ if (nargs < 1)
+ return -1;
+
+ case_matters = (nargs == 2 ? ! (sp++)->u.number : 1);
+
+ compiled = rgx_new(sp->u.string, case_matters);
+
+ str_del(sp->u.string);
+ sp->type = T_ARRAY;
+ arr_ref(sp->u.array = compiled);
+
+ return 0;
+ }
+ # endif
+
+
+ # ifdef FUNCDEF
+ FUNCDEF("regexp_match", kf_regexp_match, p_regexp_match)
+ # else
+ char p_regexp_match[] = { C_TYPECHECKED | C_STATIC | C_VARARGS,
+ T_INT | (1 << REFSHIFT), 3,
+ T_STRING | (1 << REFSHIFT), T_STRING, T_INT };
+
+ /*
+ * NAME: kfun->regexp_match()
+ * DESCRIPTION: perform regexp matching with a previously compiled pattern
+ */
+ int kf_regexp_match(nargs)
+ int nargs;
+ {
+ int reverse;
+ string *subject;
+ array *compiled, *result;
+
+ if (nargs < 2)
+ return -1;
+
+ reverse = (nargs == 3 ? (sp++)->u.number : 0);
+ subject = sp->u.string;
+ compiled = sp[1].u.array;
+
+ if (compiled->size != 3)
+ return 1;
+
+ result = rgx_match(d_get_elts(compiled), subject, reverse);
+
+ str_del((sp++)->u.string);
+ arr_del(sp->u.array);
+
+ if (result == (array *) 0)
+ {
+ sp->type = T_INT;
+ sp->u.number = 0;
+ }
+ else
+ arr_ref(sp->u.array = result);
+
+ return 0;
+ }
+ # endif
*** src.rgx/kfun/kfun.h Sun May 8 08:15:01 1994
--- kfun/kfun.h Thu Feb 2 22:25:18 1995
***************
*** 5,7 ****
--- 5,8 ----
# include "xfloat.h"
# include "interpret.h"
# include "data.h"
+ # include "rgx.h"
*** src.rgx/rgx.c Thu Jan 2 21:41:55 1997
--- rgx.c Thu Jan 2 21:17:46 1997
***************
*** 0 ****
--- 1,213 ----
+ # include "dgd.h"
+ # include "str.h"
+ # include "array.h"
+ # include "interpret.h"
+ # include <gnuregex.h>
+ # include "rgx.h"
+ # include <memory.h>
+
+ static char trans_table[256];
+
+ /*
+ * NAME: regexp->init()
+ * DESCRIPTION: initialize regexp handling
+ */
+ void rgx_init()
+ {
+ register int i;
+
+ for (i = 0; i < 256; ++i)
+ trans_table[i] = i;
+ for (i = 'a'; i <= 'z'; ++i)
+ trans_table[i] = i + 'A' - 'a';
+ }
+
+ /*
+ * NAME: regexp->new()
+ * DESCRIPTION: create a new regexp buffer
+ */
+ array *rgx_new(pattern, case_matters)
+ string *pattern;
+ int case_matters;
+ {
+ char *translate;
+ struct re_pattern_buffer patbuf;
+ char fastmap[256];
+ const char *compile_error;
+ array *result;
+ register value *v;
+ string *s;
+
+ translate = (case_matters ? (char *) 0 : trans_table);
+
+ patbuf.buffer = 0;
+ patbuf.allocated = 0;
+ patbuf.used = 0;
+
+ patbuf.fastmap = fastmap;
+ patbuf.translate = translate;
+
+ patbuf.fastmap_accurate = 0;
+
+ {
+ int i;
+ long n = 0;
+ for (i = 0; i < pattern->len; i++) {
+ switch (pattern->text[i]) {
+ case '[':
+ if (pattern->text[++i] == '^')
+ i++;
+ for (i++; i < pattern->len; i++)
+ if (pattern->text[i] == ']')
+ break;
+ break;
+ case '%':
+ pattern->text[i++] = '\\'; /* skip escaped char */
+ break;
+ case '\\':
+ pattern->text[i] == '%'; /* mark for expansion */
+ n++;
+ break;
+ }
+ }
+ if (n) {
+ int j;
+
+ s = str_new(NULL, pattern->len + n);
+ for (i = j = 0; i < pattern->len; i++, j++) {
+ switch (pattern->text[i]) {
+ case '[':
+ s->text[j++] = pattern->text[i++];
+ if (i == pattern->len)
+ goto breakout;
+ if (pattern->text[i] == '^') {
+ s->text[j++] = pattern->text[i++];
+ if (i == pattern->len)
+ goto breakout;
+ }
+ s->text[j++] = pattern->text[i++];
+ if (i == pattern->len)
+ goto breakout;
+ for ( ; i < pattern->len; i++, j++) {
+ if ((s->text[j] = pattern->text[i]) == ']')
+ break;
+ }
+ break;
+ case '%': /* expand */
+ s->text[j++] = '\\';
+ s->text[j] = '\\';
+ break;
+ case '\\': /* skip escaped char */
+ s->text[j++] = pattern->text[i++];
+ if (i == pattern->len)
+ goto breakout;
+ /* fallthru */
+ default:
+ s->text[j] = pattern->text[i];
+ }
+ }
+ breakout:
+ }
+ }
+ compile_error = re_compile_pattern(s->text, s->len, &patbuf);
+ str_del(s);
+ if (compile_error != (char *) 0)
+ {
+ regfree(&patbuf);
+ error(compile_error);
+ }
+
+ re_compile_fastmap(&patbuf);
+
+ result = arr_new(3L);
+ v = result->elts;
+
+ v->type = T_STRING;
+ str_ref(v->u.string = str_new((char *) &patbuf, (long) sizeof(patbuf)));
+ ++v;
+ v->type = T_STRING;
+ str_ref(v->u.string = str_new((char *) patbuf.buffer,
+ (long) patbuf.allocated));
+ ++v;
+ v->type = T_STRING;
+ str_ref(v->u.string = str_new(fastmap, 256L));
+
+ /* don't let regfree() try to free these */
+ patbuf.fastmap = 0;
+ patbuf.translate = 0;
+
+ regfree(&patbuf);
+
+ return result;
+ }
+
+ /*
+ * NAME: regexp->match()
+ * DESCRIPTION: perform regexp matching, given a pattern and subject string
+ */
+ array *rgx_match(pattern, subject, reverse)
+ value *pattern;
+ string *subject;
+ int reverse;
+ {
+ long sub_len;
+ struct re_pattern_buffer patbuf;
+ struct re_registers regs;
+ regoff_t starts[RGX_NREGS + 1], ends[RGX_NREGS + 1];
+ array *result;
+ register value *v;
+ register int i;
+
+ if (pattern[0].u.string->len != sizeof(struct re_pattern_buffer))
+ error("Invalid compiled pattern");
+
+ memcpy((char *) &patbuf, pattern[0].u.string->text,
+ sizeof(struct re_pattern_buffer));
+
+ if (patbuf.allocated != (unsigned long) pattern[1].u.string->len ||
+ pattern[2].u.string->len != 256)
+ error("Invalid compiled pattern");
+
+ patbuf.buffer = (unsigned char *) pattern[1].u.string->text;
+ patbuf.fastmap = pattern[2].u.string->text;
+
+ regs.num_regs = RGX_NREGS;
+ regs.start = starts;
+ regs.end = ends;
+ patbuf.regs_allocated = REGS_FIXED;
+
+ sub_len = subject->len;
+ if (re_search(&patbuf, subject->text, sub_len, reverse ? sub_len : 0,
+ reverse ? -(sub_len + 1) : sub_len + 1, &regs) == -1)
+ return (array *) 0;
+
+ result = arr_new((long) RGX_NREGS * 2);
+ v = result->elts;
+
+ v->type = T_INT;
+ v->u.number = starts[0];
+ ++v;
+
+ v->type = T_INT;
+ v->u.number = ends[0] - 1;
+ ++v;
+
+ for (i = 1; i < RGX_NREGS; ++i, v += 2)
+ {
+ v[0].type = T_INT;
+ v[1].type = T_INT;
+
+ if (starts[i] == -1)
+ {
+ v[0].u.number = 0;
+ v[1].u.number = -1;
+ }
+ else
+ {
+ v[0].u.number = starts[i];
+ v[1].u.number = ends[i] - 1;
+ }
+ }
+
+ return result;
+ }
*** src.rgx/rgx.h Thu Jan 2 21:42:05 1997
--- rgx.h Fri Feb 3 03:09:54 1995
***************
*** 0 ****
--- 1,5 ----
+ # define RGX_NREGS 10
+
+ extern void rgx_init P((void));
+ extern array *rgx_new P((string*, int));
+ extern array *rgx_match P((value*, string*, int));
*** doc.rgx/example.c Thu Jan 1 00:00:00 1970
--- ../doc/rgx_example.c Fri Feb 3 03:30:01 1995
***************
*** 0 ****
--- 1,49 ----
+ /*
+ * This file shows how an interface can be built to cache regexp patterns
+ * and ultimately provide a more streamlined interface to the regexp kfuns.
+ *
+ * Note that since regexp_match() severely depends on the return result from
+ * regexp_compile() being unaltered, it is a good idea to provide an
+ * interface like this, and also to mask the regexp_match() kfun from the
+ * auto object.
+ */
+
+ # define CACHE_SIZE 10
+
+ private mapping cache;
+ private string *list;
+ private string last_pattern;
+
+ static
+ void create(void)
+ {
+ cache = ([ ]);
+ list = ({ });
+ }
+
+ int *match(string subject, string pattern)
+ {
+ string *buffer;
+
+ if ((buffer = cache[pattern]) == 0)
+ {
+ buffer = regexp_compile(pattern);
+
+ if (sizeof(list) >= CACHE_SIZE)
+ {
+ cache[list[0]] = 0;
+ list = list[1 ..] + ({ pattern });
+ }
+ else
+ list += ({ pattern });
+
+ cache[pattern] = buffer;
+ }
+ else if (pattern != last_pattern)
+ {
+ list = list - ({ pattern }) + ({ pattern });
+ last_pattern = pattern;
+ }
+
+ return regexp_match(buffer, subject);
+ }
diff -crN doc.rgx/kfun/regexp_compile doc/kfun/regexp_compile
*** doc.rgx/kfun/regexp_compile Thu Jan 1 00:00:00 1970
--- ../doc/kfun/regexp_compile Tue Jul 26 00:02:34 1994
***************
*** 0 ****
--- 1,27 ----
+ NAME
+ regexp_compile - compile a regular expression
+
+ SYNOPSIS
+ varargs string *regexp_compile(string pattern, int case_insensitive)
+
+ DESCRIPTION
+ The argument pattern is compiled as a regular expression. If the
+ argument case_insensitive is nonzero, the pattern is compiled in
+ such a way that subsequent matching will be done without case
+ sensitivity. The default is to be case-sensitive.
+
+ An array of strings is returned; these strings contain binary
+ data and must not be altered in any way before being passed to
+ regexp_match().
+
+ The compiled regexp can be saved and used any number of times with
+ regexp_match().
+
+ ERRORS
+ If the argument pattern contains a syntactically malformed regular
+ expression, an error will result. An error can also occur if the
+ pattern is too complicated, or if there is not enough memory to
+ compile the pattern.
+
+ SEE ALSO
+ kfun/regexp_match
*** doc.rgx/kfun/regexp_match Thu Jan 1 00:00:00 1970
--- ../doc/kfun/regexp_match Mon Jul 25 22:19:42 1994
***************
*** 0 ****
--- 1,34 ----
+ NAME
+ regexp_match - perform regular expression matching
+
+ SYNOPSIS
+ varargs int *regexp_match(string *pattern, string subject, int reverse)
+
+ DESCRIPTION
+ The argument subject is matched against the compiled regular
+ expression pattern. If the argument reverse is nonzero, matching
+ is performed from right-to-left; otherwise, matching is performed
+ left-to-right.
+
+ The pattern argument must be an array of strings exactly as it
+ was received from regexp_compile(); otherwise, the result of
+ calling this function is undefined.
+
+ If the argument subject could not be matched with the regular
+ expression, 0 is returned. Otherwise, an array of 20 integers
+ is returned with this format:
+
+ ({ start0, end0, start1, end1, ..., start9, end9 })
+
+ Each element is a character index into the subject string. The
+ first two elements, start0 and end0, indicate the part of the subject
+ that was matched by the regular expression as a whole. The following
+ elements indicate the starting and ending indices of each
+ subexpression (denoted by "%(" and "%)" pairs in the original
+ pattern) that were matched.
+
+ If any subexpression was not matched, the corresponding start and
+ end elements will be 0 and -1, respectively.
+
+ SEE ALSO
+ kfun/regexp_compile
*** doc.rgx/regexps Thu Jan 1 00:00:00 1970
--- ../doc/regexps Mon Jul 25 22:58:57 1994
***************
*** 0 ****
--- 1,32 ----
+
+ Regular expressions are composed of the following operators:
+
+ . Match any single character
+ XY Match X immediately followed by Y
+ X* Match zero-or-more of X
+ X+ Match one-or-more of X
+ X? Match zero-or-one of X
+ X%|Y Match either X or Y
+ [charset] Match any single character in `charset'
+ [^charset] Match any single character not in `charset'
+ %(X%) Match X, but also remember the match as a subexpression
+ %digit Match the numbered previous subexpression
+ ^X Match X anchored at the beginning of a line
+ X$ Match X anchored at the end of a line
+ %b Match the empty string at the beginning or end of a word
+ %B Match the empty string only within the middle of a word
+ %< Match the beginning of a word
+ %> Match the end of a word
+ %w Match any word-constituent character
+ %W Match any character that is not word-constituent
+
+ Any other character in a regular expression is matched literally with itself.
+ To match any of the special operator characters .*+?%[^$ literally, precede
+ the character with `%'.
+
+ A `charset' is formed by listing all desired characters with brackets. To
+ include a literal `^' in a charset, do not list it in the first position. To
+ include a literal `]', list it immediately after the opening `[' or `[^'. All
+ characters are non-special (and should not be escaped) within a charset,
+ except `-', which denotes a character range. To include a literal `-', list it
+ either first or last.
*** README.rgx.old Fri Jan 3 03:17:21 1997
--- ../README.rgx Fri Jan 3 03:14:29 1997
***************
*** 0 ****
--- 1,18 ----
+ dgd-rgx was written by Robert Leslie <rob@ccs.neu.edu> as an LPC interface to
+ GNU regex, adding two kfuns to DGD for regular expression matching:
+
+ regexp_compile()
+ regexp_match()
+
+ For a description of the regular expression language accepted by these kfuns,
+ please read doc/regexps.
+
+ Complete details for the two kfuns can be found in the doc/kfun directory.
+
+ Adapted by Adam David <adam@veda.is> for DGD 1.0.97 and to use the unmodified
+ GNU regexp library.
+
+ This software is a modification of DGD, and is therefore protected by the
+ DGD Copyright.
+
+ There is no warranty for this software.

View File

@ -3,6 +3,7 @@ dgd/Copyright.NET
dgd/Credits
dgd/README
dgd/README.FreeBSD
dgd/README.rgx
dgd/README.sites
dgd/bin/driver.net
dgd/bin/precomp.net
@ -12,6 +13,8 @@ dgd/doc/LPC.html
dgd/doc/Patching
dgd/doc/Platforms
dgd/doc/editor
dgd/doc/regexps
dgd/doc/rgx_example.c
dgd/doc/kfun/allocate
dgd/doc/kfun/call_other
dgd/doc/kfun/call_out
@ -49,6 +52,8 @@ dgd/doc/kfun/query_editor
dgd/doc/kfun/query_ip_number
dgd/doc/kfun/random
dgd/doc/kfun/read_file
dgd/doc/kfun/regexp_compile
dgd/doc/kfun/regexp_match
dgd/doc/kfun/remove_call_out
dgd/doc/kfun/remove_dir
dgd/doc/kfun/remove_file