mirror of
https://git.FreeBSD.org/src.git
synced 2025-01-23 16:01:42 +00:00
bsdgrep: annihilate our in-tree TRE, previously disabled by default
It was an old TRE that had plenty of bugs and no performance gain over regex(3). I disabled it by default in r323615, and there was some confusion about what the knob does- likely due to poor naming on my part- to the tune of "well, it sounds like it should speed things up" (mentioned by multiple people). To compound this, I have no intention of maintaining a second regex implementation. If someone would like to step up and volunteer to maintain a lean-and-mean implementation for grep, this is OK, but we have very few volunteers to maintain even our primary regex implementation.
This commit is contained in:
parent
51688c129f
commit
a2584d1b34
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=333236
@ -271,6 +271,7 @@ SCRIPTSGRP_${script:T}?= ${SCRIPTSGRP}
|
||||
SCRIPTSMODE_${script:T}?= ${SCRIPTSMODE}
|
||||
STAGE_AS_${script:T}= ${SCRIPTSDIR_${script:T}}/${SCRIPTSNAME_${script:T}}
|
||||
_scriptsinstall: _SCRIPTSINS_${script:T}
|
||||
echo ">SFD>F>DF YES"
|
||||
_SCRIPTSINS_${script:T}: ${script}
|
||||
${INSTALL} ${TAG_ARGS} -o ${SCRIPTSOWN_${.ALLSRC:T}} \
|
||||
-g ${SCRIPTSGRP_${.ALLSRC:T}} -m ${SCRIPTSMODE_${.ALLSRC:T}} \
|
||||
|
@ -187,7 +187,6 @@ __DEFAULT_YES_OPTIONS = \
|
||||
|
||||
__DEFAULT_NO_OPTIONS = \
|
||||
BSD_GREP \
|
||||
BSD_GREP_FASTMATCH \
|
||||
CLANG_EXTRAS \
|
||||
DTRACE_TESTS \
|
||||
GNU_GREP_COMPAT \
|
||||
|
@ -1,5 +0,0 @@
|
||||
.\" $FreeBSD$
|
||||
Set this option to exclude the fastmatch implementation from
|
||||
.Xr bsdgrep 1 ,
|
||||
instead using only
|
||||
.Xr regex 3 .
|
@ -1,3 +0,0 @@
|
||||
.\" $FreeBSD$
|
||||
Set this option to use the fastmatch implementation in
|
||||
.Xr bsdgrep 1 .
|
@ -17,15 +17,6 @@ bsdgrep.1: grep.1
|
||||
.endif
|
||||
SRCS= file.c grep.c queue.c util.c
|
||||
|
||||
.if ${MK_BSD_GREP_FASTMATCH} == "yes"
|
||||
# Extra files ported backported for some regex improvements
|
||||
.PATH: ${.CURDIR}/regex
|
||||
SRCS+= fastmatch.c hashtable.c tre-compile.c tre-fastmatch.c
|
||||
CFLAGS+=-I${.CURDIR}/regex
|
||||
.else
|
||||
CFLAGS+= -DWITHOUT_FASTMATCH
|
||||
.endif
|
||||
|
||||
SCRIPTS= zgrep.sh
|
||||
LINKS= ${BINDIR}/zgrep ${BINDIR}/zfgrep \
|
||||
${BINDIR}/zgrep ${BINDIR}/zegrep \
|
||||
|
@ -51,9 +51,6 @@ __FBSDID("$FreeBSD$");
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#ifndef WITHOUT_FASTMATCH
|
||||
#include "fastmatch.h"
|
||||
#endif
|
||||
#include "grep.h"
|
||||
|
||||
#ifndef WITHOUT_NLS
|
||||
@ -96,9 +93,6 @@ unsigned int patterns;
|
||||
static unsigned int pattern_sz;
|
||||
struct pat *pattern;
|
||||
regex_t *r_pattern;
|
||||
#ifndef WITHOUT_FASTMATCH
|
||||
fastmatch_t *fg_pattern;
|
||||
#endif
|
||||
|
||||
/* Filename exclusion/inclusion patterns */
|
||||
unsigned int fpatterns, dpatterns;
|
||||
@ -712,9 +706,6 @@ main(int argc, char *argv[])
|
||||
usage();
|
||||
}
|
||||
|
||||
#ifndef WITHOUT_FASTMATCH
|
||||
fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
|
||||
#endif
|
||||
r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
|
||||
|
||||
/* Don't process any patterns if we have a blank one */
|
||||
@ -725,15 +716,6 @@ main(int argc, char *argv[])
|
||||
#endif
|
||||
/* Check if cheating is allowed (always is for fgrep). */
|
||||
for (i = 0; i < patterns; ++i) {
|
||||
#ifndef WITHOUT_FASTMATCH
|
||||
/*
|
||||
* Attempt compilation with fastmatch regex and
|
||||
* fallback to regex(3) if it fails.
|
||||
*/
|
||||
if (fastncomp(&fg_pattern[i], pattern[i].pat,
|
||||
pattern[i].len, cflags) == 0)
|
||||
continue;
|
||||
#endif
|
||||
c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
|
||||
if (c != 0) {
|
||||
regerror(c, &r_pattern[i], re_error,
|
||||
|
@ -38,10 +38,6 @@
|
||||
#include <stdio.h>
|
||||
#include <zlib.h>
|
||||
|
||||
#ifndef WITHOUT_FASTMATCH
|
||||
#include "fastmatch.h"
|
||||
#endif
|
||||
|
||||
#ifdef WITHOUT_NLS
|
||||
#define getstr(n) errstr[n]
|
||||
#else
|
||||
@ -131,9 +127,6 @@ extern unsigned int dpatterns, fpatterns, patterns;
|
||||
extern struct pat *pattern;
|
||||
extern struct epat *dpattern, *fpattern;
|
||||
extern regex_t *er_pattern, *r_pattern;
|
||||
#ifndef WITHOUT_FASTMATCH
|
||||
extern fastmatch_t *fg_pattern;
|
||||
#endif
|
||||
|
||||
/* For regex errors */
|
||||
#define RE_ERROR_BUF 512
|
||||
|
@ -1,170 +0,0 @@
|
||||
/* $FreeBSD$ */
|
||||
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
|
||||
*
|
||||
* Copyright (C) 2011 Gabor Kovesdan <gabor@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "glue.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <fastmatch.h>
|
||||
#include <regex.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "tre-fastmatch.h"
|
||||
|
||||
int
|
||||
tre_fixncomp(fastmatch_t *preg, const char *regex, size_t n, int cflags)
|
||||
{
|
||||
int ret;
|
||||
tre_char_t *wregex;
|
||||
size_t wlen;
|
||||
|
||||
if (n != 0)
|
||||
{
|
||||
ret = tre_convert_pattern(regex, n, &wregex, &wlen);
|
||||
if (ret != REG_OK)
|
||||
return ret;
|
||||
else
|
||||
ret = tre_compile_literal(preg, wregex, wlen, cflags);
|
||||
tre_free_pattern(wregex);
|
||||
return ret;
|
||||
}
|
||||
else
|
||||
return tre_compile_literal(preg, NULL, 0, cflags);
|
||||
}
|
||||
|
||||
int
|
||||
tre_fastncomp(fastmatch_t *preg, const char *regex, size_t n, int cflags)
|
||||
{
|
||||
int ret;
|
||||
tre_char_t *wregex;
|
||||
size_t wlen;
|
||||
|
||||
if (n != 0)
|
||||
{
|
||||
ret = tre_convert_pattern(regex, n, &wregex, &wlen);
|
||||
if (ret != REG_OK)
|
||||
return ret;
|
||||
else
|
||||
ret = (cflags & REG_LITERAL)
|
||||
? tre_compile_literal(preg, wregex, wlen, cflags)
|
||||
: tre_compile_fast(preg, wregex, wlen, cflags);
|
||||
tre_free_pattern(wregex);
|
||||
return ret;
|
||||
}
|
||||
else
|
||||
return tre_compile_literal(preg, NULL, 0, cflags);
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
tre_fixcomp(fastmatch_t *preg, const char *regex, int cflags)
|
||||
{
|
||||
return tre_fixncomp(preg, regex, regex ? strlen(regex) : 0, cflags);
|
||||
}
|
||||
|
||||
int
|
||||
tre_fastcomp(fastmatch_t *preg, const char *regex, int cflags)
|
||||
{
|
||||
return tre_fastncomp(preg, regex, regex ? strlen(regex) : 0, cflags);
|
||||
}
|
||||
|
||||
int
|
||||
tre_fixwncomp(fastmatch_t *preg, const wchar_t *regex, size_t n, int cflags)
|
||||
{
|
||||
return tre_compile_literal(preg, regex, n, cflags);
|
||||
}
|
||||
|
||||
int
|
||||
tre_fastwncomp(fastmatch_t *preg, const wchar_t *regex, size_t n, int cflags)
|
||||
{
|
||||
return (cflags & REG_LITERAL) ?
|
||||
tre_compile_literal(preg, regex, n, cflags) :
|
||||
tre_compile_fast(preg, regex, n, cflags);
|
||||
}
|
||||
|
||||
int
|
||||
tre_fixwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags)
|
||||
{
|
||||
return tre_fixwncomp(preg, regex, regex ? tre_strlen(regex) : 0, cflags);
|
||||
}
|
||||
|
||||
int
|
||||
tre_fastwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags)
|
||||
{
|
||||
return tre_fastwncomp(preg, regex, regex ? tre_strlen(regex) : 0, cflags);
|
||||
}
|
||||
|
||||
void
|
||||
tre_fastfree(fastmatch_t *preg)
|
||||
{
|
||||
tre_free_fast(preg);
|
||||
}
|
||||
|
||||
int
|
||||
tre_fastnexec(const fastmatch_t *preg, const char *string, size_t len,
|
||||
size_t nmatch, regmatch_t pmatch[], int eflags)
|
||||
{
|
||||
tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS;
|
||||
|
||||
if (eflags & REG_STARTEND)
|
||||
CALL_WITH_OFFSET(tre_match_fast(preg, &string[offset], slen,
|
||||
type, nmatch, pmatch, eflags));
|
||||
else
|
||||
return tre_match_fast(preg, string, len, type, nmatch,
|
||||
pmatch, eflags);
|
||||
}
|
||||
|
||||
int
|
||||
tre_fastexec(const fastmatch_t *preg, const char *string, size_t nmatch,
|
||||
regmatch_t pmatch[], int eflags)
|
||||
{
|
||||
return tre_fastnexec(preg, string, (size_t)-1, nmatch, pmatch, eflags);
|
||||
}
|
||||
|
||||
int
|
||||
tre_fastwnexec(const fastmatch_t *preg, const wchar_t *string, size_t len,
|
||||
size_t nmatch, regmatch_t pmatch[], int eflags)
|
||||
{
|
||||
tre_str_type_t type = STR_WIDE;
|
||||
|
||||
if (eflags & REG_STARTEND)
|
||||
CALL_WITH_OFFSET(tre_match_fast(preg, &string[offset], slen,
|
||||
type, nmatch, pmatch, eflags));
|
||||
else
|
||||
return tre_match_fast(preg, string, len, type, nmatch,
|
||||
pmatch, eflags);
|
||||
}
|
||||
|
||||
int
|
||||
tre_fastwexec(const fastmatch_t *preg, const wchar_t *string,
|
||||
size_t nmatch, regmatch_t pmatch[], int eflags)
|
||||
{
|
||||
return tre_fastwnexec(preg, string, (size_t)-1, nmatch, pmatch, eflags);
|
||||
}
|
||||
|
@ -1,95 +0,0 @@
|
||||
/* $FreeBSD$ */
|
||||
|
||||
#ifndef FASTMATCH_H
|
||||
#define FASTMATCH_H 1
|
||||
|
||||
#include <limits.h>
|
||||
#include <regex.h>
|
||||
#include <stdbool.h>
|
||||
#include <wchar.h>
|
||||
|
||||
typedef struct {
|
||||
size_t wlen;
|
||||
size_t len;
|
||||
wchar_t *wpattern;
|
||||
bool *wescmap;
|
||||
unsigned int qsBc[UCHAR_MAX + 1];
|
||||
unsigned int *bmGs;
|
||||
char *pattern;
|
||||
bool *escmap;
|
||||
unsigned int defBc;
|
||||
void *qsBc_table;
|
||||
unsigned int *sbmGs;
|
||||
const char *re_endp;
|
||||
|
||||
/* flags */
|
||||
bool hasdot;
|
||||
bool bol;
|
||||
bool eol;
|
||||
bool word;
|
||||
bool icase;
|
||||
bool newline;
|
||||
bool nosub;
|
||||
bool matchall;
|
||||
bool reversed;
|
||||
} fastmatch_t;
|
||||
|
||||
extern int
|
||||
tre_fixcomp(fastmatch_t *preg, const char *regex, int cflags);
|
||||
|
||||
extern int
|
||||
tre_fastcomp(fastmatch_t *preg, const char *regex, int cflags);
|
||||
|
||||
extern int
|
||||
tre_fastexec(const fastmatch_t *preg, const char *string, size_t nmatch,
|
||||
regmatch_t pmatch[], int eflags);
|
||||
|
||||
extern void
|
||||
tre_fastfree(fastmatch_t *preg);
|
||||
|
||||
extern int
|
||||
tre_fixwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags);
|
||||
|
||||
extern int
|
||||
tre_fastwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags);
|
||||
|
||||
extern int
|
||||
tre_fastwexec(const fastmatch_t *preg, const wchar_t *string,
|
||||
size_t nmatch, regmatch_t pmatch[], int eflags);
|
||||
|
||||
/* Versions with a maximum length argument and therefore the capability to
|
||||
handle null characters in the middle of the strings. */
|
||||
extern int
|
||||
tre_fixncomp(fastmatch_t *preg, const char *regex, size_t len, int cflags);
|
||||
|
||||
extern int
|
||||
tre_fastncomp(fastmatch_t *preg, const char *regex, size_t len, int cflags);
|
||||
|
||||
extern int
|
||||
tre_fastnexec(const fastmatch_t *preg, const char *string, size_t len,
|
||||
size_t nmatch, regmatch_t pmatch[], int eflags);
|
||||
|
||||
extern int
|
||||
tre_fixwncomp(fastmatch_t *preg, const wchar_t *regex, size_t len, int cflags);
|
||||
|
||||
extern int
|
||||
tre_fastwncomp(fastmatch_t *preg, const wchar_t *regex, size_t len, int cflags);
|
||||
|
||||
extern int
|
||||
tre_fastwnexec(const fastmatch_t *preg, const wchar_t *string, size_t len,
|
||||
size_t nmatch, regmatch_t pmatch[], int eflags);
|
||||
|
||||
#define fixncomp tre_fixncomp
|
||||
#define fastncomp tre_fastncomp
|
||||
#define fixcomp tre_fixcomp
|
||||
#define fastcomp tre_fastcomp
|
||||
#define fixwncomp tre_fixwncomp
|
||||
#define fastwncomp tre_fastwncomp
|
||||
#define fixwcomp tre_fixwcomp
|
||||
#define fastwcomp tre_fastwcomp
|
||||
#define fastfree tre_fastfree
|
||||
#define fastnexec tre_fastnexec
|
||||
#define fastexec tre_fastexec
|
||||
#define fastwnexec tre_fastwnexec
|
||||
#define fastwexec tre_fastwexec
|
||||
#endif /* FASTMATCH_H */
|
@ -1,67 +0,0 @@
|
||||
/* $FreeBSD$ */
|
||||
|
||||
#ifndef GLUE_H
|
||||
#define GLUE_H
|
||||
|
||||
#include <limits.h>
|
||||
#undef RE_DUP_MAX
|
||||
#include <regex.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define TRE_WCHAR 1
|
||||
#define TRE_MULTIBYTE 1
|
||||
#define HAVE_MBSTATE_T 1
|
||||
|
||||
#define TRE_CHAR(n) L##n
|
||||
#define CHF "%lc"
|
||||
|
||||
#define tre_char_t wchar_t
|
||||
#define tre_mbrtowc(pwc, s, n, ps) (mbrtowc((pwc), (s), (n), (ps)))
|
||||
#define tre_strlen wcslen
|
||||
#define tre_isspace iswspace
|
||||
#define tre_isalnum iswalnum
|
||||
|
||||
#define REG_OK 0
|
||||
#define REG_LITERAL 0020
|
||||
#define REG_WORD 0100
|
||||
#define REG_GNU 0400
|
||||
|
||||
#define TRE_MB_CUR_MAX MB_CUR_MAX
|
||||
|
||||
#ifndef _GREP_DEBUG
|
||||
#define DPRINT(msg)
|
||||
#else
|
||||
#define DPRINT(msg) do {printf msg; fflush(stdout);} while(/*CONSTCOND*/0)
|
||||
#endif
|
||||
|
||||
#define MIN(a,b) ((a > b) ? (b) : (a))
|
||||
#define MAX(a,b) ((a > b) ? (a) : (b))
|
||||
|
||||
typedef enum { STR_WIDE, STR_BYTE, STR_MBS, STR_USER } tre_str_type_t;
|
||||
|
||||
#define CALL_WITH_OFFSET(fn) \
|
||||
do \
|
||||
{ \
|
||||
size_t slen = (size_t)(pmatch[0].rm_eo - pmatch[0].rm_so); \
|
||||
size_t offset = pmatch[0].rm_so; \
|
||||
int ret; \
|
||||
\
|
||||
if ((long long)pmatch[0].rm_eo - pmatch[0].rm_so < 0) \
|
||||
return REG_NOMATCH; \
|
||||
ret = fn; \
|
||||
for (unsigned i = 0; (!preg->nosub && (i < nmatch)); i++) \
|
||||
{ \
|
||||
pmatch[i].rm_so += offset; \
|
||||
pmatch[i].rm_eo += offset; \
|
||||
} \
|
||||
return ret; \
|
||||
} while (0 /*CONSTCOND*/)
|
||||
|
||||
int
|
||||
tre_convert_pattern(const char *regex, size_t n, tre_char_t **w,
|
||||
size_t *wn);
|
||||
|
||||
void
|
||||
tre_free_pattern(tre_char_t *wregex);
|
||||
#endif
|
@ -1,270 +0,0 @@
|
||||
/* $FreeBSD$ */
|
||||
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
|
||||
*
|
||||
* Copyright (C) 2011 Gabor Kovesdan <gabor@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "glue.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "hashtable.h"
|
||||
|
||||
|
||||
/*
|
||||
* Return a 32-bit hash of the given buffer. The init
|
||||
* value should be 0, or the previous hash value to extend
|
||||
* the previous hash.
|
||||
*/
|
||||
static uint32_t
|
||||
hash32_buf(const void *buf, size_t len, uint32_t hash)
|
||||
{
|
||||
const unsigned char *p = buf;
|
||||
|
||||
while (len--)
|
||||
hash = HASHSTEP(hash, *p++);
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initializes a hash table that can hold table_size number of entries,
|
||||
* each of which has a key of key_size bytes and a value of value_size
|
||||
* bytes. On successful allocation returns a pointer to the hash table.
|
||||
* Otherwise, returns NULL and sets errno to indicate the error.
|
||||
*/
|
||||
hashtable
|
||||
*hashtable_init(size_t table_size, size_t key_size, size_t value_size)
|
||||
{
|
||||
hashtable *tbl;
|
||||
|
||||
DPRINT(("hashtable_init: table_size %zu, key_size %zu, value_size %zu\n",
|
||||
table_size, key_size, value_size));
|
||||
|
||||
tbl = malloc(sizeof(hashtable));
|
||||
if (tbl == NULL)
|
||||
goto mem1;
|
||||
|
||||
tbl->entries = calloc(sizeof(hashtable_entry *), table_size);
|
||||
if (tbl->entries == NULL)
|
||||
goto mem2;
|
||||
|
||||
tbl->table_size = table_size;
|
||||
tbl->usage = 0;
|
||||
tbl->key_size = key_size;
|
||||
tbl->value_size = value_size;
|
||||
|
||||
return (tbl);
|
||||
|
||||
mem2:
|
||||
free(tbl);
|
||||
mem1:
|
||||
DPRINT(("hashtable_init: allocation failed\n"));
|
||||
errno = ENOMEM;
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Places the key-value pair to the hashtable tbl.
|
||||
* Returns:
|
||||
* HASH_OK: if the key was not present in the hash table yet
|
||||
* but the kay-value pair has been successfully added.
|
||||
* HASH_UPDATED: if the value for the key has been updated with the
|
||||
* new value.
|
||||
* HASH_FULL: if the hash table is full and the entry could not
|
||||
* be added.
|
||||
* HASH_FAIL: if an error has occurred and errno has been set to
|
||||
* indicate the error.
|
||||
*/
|
||||
int
|
||||
hashtable_put(hashtable *tbl, const void *key, const void *value)
|
||||
{
|
||||
uint32_t hash = 0;
|
||||
|
||||
if (tbl->table_size == tbl->usage)
|
||||
{
|
||||
DPRINT(("hashtable_put: hashtable is full\n"));
|
||||
return (HASH_FULL);
|
||||
}
|
||||
|
||||
hash = hash32_buf(key, tbl->key_size, hash) % tbl->table_size;
|
||||
DPRINT(("hashtable_put: calculated hash %" PRIu32 "\n", hash));
|
||||
|
||||
/*
|
||||
* On hash collision entries are inserted at the next free space,
|
||||
* so we have to increase the index until we either find an entry
|
||||
* with the same key (and update it) or we find a free space.
|
||||
*/
|
||||
for(;;)
|
||||
{
|
||||
if (tbl->entries[hash] == NULL)
|
||||
break;
|
||||
else if (memcmp(tbl->entries[hash]->key, key, tbl->key_size) == 0)
|
||||
{
|
||||
memcpy(tbl->entries[hash]->value, value, tbl->value_size);
|
||||
DPRINT(("hashtable_put: effective location is %" PRIu32
|
||||
", entry updated\n", hash));
|
||||
return (HASH_UPDATED);
|
||||
}
|
||||
if (++hash == tbl->table_size)
|
||||
hash = 0;
|
||||
}
|
||||
|
||||
DPRINT(("hashtable_put: effective location is %" PRIu32 "\n", hash));
|
||||
|
||||
tbl->entries[hash] = malloc(sizeof(hashtable_entry));
|
||||
if (tbl->entries[hash] == NULL)
|
||||
{
|
||||
errno = ENOMEM;
|
||||
goto mem1;
|
||||
}
|
||||
|
||||
tbl->entries[hash]->key = malloc(tbl->key_size);
|
||||
if (tbl->entries[hash]->key == NULL)
|
||||
{
|
||||
errno = ENOMEM;
|
||||
goto mem2;
|
||||
}
|
||||
|
||||
tbl->entries[hash]->value = malloc(tbl->value_size);
|
||||
if (tbl->entries[hash]->value == NULL)
|
||||
{
|
||||
errno = ENOMEM;
|
||||
goto mem3;
|
||||
}
|
||||
|
||||
memcpy(tbl->entries[hash]->key, key, tbl->key_size);
|
||||
memcpy(tbl->entries[hash]->value, value, tbl->value_size);
|
||||
tbl->usage++;
|
||||
|
||||
DPRINT(("hashtable_put: entry successfully inserted\n"));
|
||||
|
||||
return (HASH_OK);
|
||||
|
||||
mem3:
|
||||
free(tbl->entries[hash]->key);
|
||||
mem2:
|
||||
free(tbl->entries[hash]);
|
||||
mem1:
|
||||
DPRINT(("hashtable_put: insertion failed\n"));
|
||||
return (HASH_FAIL);
|
||||
}
|
||||
|
||||
static hashtable_entry
|
||||
**hashtable_lookup(const hashtable *tbl, const void *key)
|
||||
{
|
||||
uint32_t hash = 0;
|
||||
|
||||
hash = hash32_buf(key, tbl->key_size, hash) % tbl->table_size;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (tbl->entries[hash] == NULL)
|
||||
return (NULL);
|
||||
else if (memcmp(key, tbl->entries[hash]->key, tbl->key_size) == 0)
|
||||
{
|
||||
DPRINT(("hashtable_lookup: entry found at location %" PRIu32 "\n", hash));
|
||||
return (&tbl->entries[hash]);
|
||||
}
|
||||
|
||||
if (++hash == tbl->table_size)
|
||||
hash = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Retrieves the value for key from the hash table tbl and places
|
||||
* it to the space indicated by the value argument.
|
||||
* Returns HASH_OK if the value has been found and retrieved or
|
||||
* HASH_NOTFOUND otherwise.
|
||||
*/
|
||||
int
|
||||
hashtable_get(hashtable *tbl, const void *key, void *value)
|
||||
{
|
||||
hashtable_entry **entry;
|
||||
|
||||
entry = hashtable_lookup(tbl, key);
|
||||
if (entry == NULL)
|
||||
{
|
||||
DPRINT(("hashtable_get: entry is not available in the hashtable\n"));
|
||||
return (HASH_NOTFOUND);
|
||||
}
|
||||
|
||||
memcpy(value, (*entry)->value, tbl->value_size);
|
||||
DPRINT(("hashtable_get: entry successfully copied into output buffer\n"));
|
||||
return (HASH_OK);
|
||||
}
|
||||
|
||||
/*
|
||||
* Removes the entry with the specifified key from the hash table
|
||||
* tbl. Returns HASH_OK if the entry has been found and removed
|
||||
* or HASH_NOTFOUND otherwise.
|
||||
*/
|
||||
int
|
||||
hashtable_remove(hashtable *tbl, const void *key)
|
||||
{
|
||||
hashtable_entry **entry;
|
||||
|
||||
entry = hashtable_lookup(tbl, key);
|
||||
if (entry == NULL)
|
||||
{
|
||||
DPRINT(("hashtable_remove: entry is not available in the hashtable\n"));
|
||||
return (HASH_NOTFOUND);
|
||||
}
|
||||
|
||||
free((*entry)->key);
|
||||
free((*entry)->value);
|
||||
free(*entry);
|
||||
*entry = NULL;
|
||||
|
||||
tbl->usage--;
|
||||
DPRINT(("hashtable_remove: entry successfully removed\n"));
|
||||
return (HASH_OK);
|
||||
}
|
||||
|
||||
/*
|
||||
* Frees the resources associated with the hash table tbl.
|
||||
*/
|
||||
void
|
||||
hashtable_free(hashtable *tbl)
|
||||
{
|
||||
if (tbl == NULL)
|
||||
return;
|
||||
|
||||
for (unsigned int i = 0; i < tbl->table_size; i++)
|
||||
if ((tbl->entries[i] != NULL))
|
||||
{
|
||||
free(tbl->entries[i]->key);
|
||||
free(tbl->entries[i]->value);
|
||||
}
|
||||
|
||||
free(tbl->entries);
|
||||
DPRINT(("hashtable_free: resources are successfully freed\n"));
|
||||
}
|
@ -1,35 +0,0 @@
|
||||
/* $FreeBSD$ */
|
||||
|
||||
#ifndef HASHTABLE_H
|
||||
#define HASHTABLE_H 1
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#define HASH_OK 0
|
||||
#define HASH_UPDATED 1
|
||||
#define HASH_FAIL 2
|
||||
#define HASH_FULL 3
|
||||
#define HASH_NOTFOUND 4
|
||||
|
||||
#define HASHSTEP(x,c) (((x << 5) + x) + (c))
|
||||
|
||||
typedef struct {
|
||||
void *key;
|
||||
void *value;
|
||||
} hashtable_entry;
|
||||
|
||||
typedef struct {
|
||||
size_t key_size;
|
||||
size_t table_size;
|
||||
size_t usage;
|
||||
size_t value_size;
|
||||
hashtable_entry **entries;
|
||||
} hashtable;
|
||||
|
||||
void hashtable_free(hashtable *);
|
||||
int hashtable_get(hashtable *, const void *, void *);
|
||||
hashtable *hashtable_init(size_t, size_t, size_t);
|
||||
int hashtable_put(hashtable *, const void *, const void *);
|
||||
int hashtable_remove(hashtable *, const void *);
|
||||
|
||||
#endif /* HASHTABLE.H */
|
@ -1,101 +0,0 @@
|
||||
/* $FreeBSD$ */
|
||||
|
||||
#include "glue.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <regex.h>
|
||||
#include <string.h>
|
||||
#include <wchar.h>
|
||||
|
||||
int
|
||||
tre_convert_pattern(const char *regex, size_t n, tre_char_t **w,
|
||||
size_t *wn)
|
||||
{
|
||||
#if TRE_WCHAR
|
||||
tre_char_t *wregex;
|
||||
size_t wlen;
|
||||
|
||||
wregex = malloc(sizeof(tre_char_t) * (n + 1));
|
||||
if (wregex == NULL)
|
||||
return REG_ESPACE;
|
||||
|
||||
/* If the current locale uses the standard single byte encoding of
|
||||
characters, we don't do a multibyte string conversion. If we did,
|
||||
many applications which use the default locale would break since
|
||||
the default "C" locale uses the 7-bit ASCII character set, and
|
||||
all characters with the eighth bit set would be considered invalid. */
|
||||
#if TRE_MULTIBYTE
|
||||
if (TRE_MB_CUR_MAX == 1)
|
||||
#endif /* TRE_MULTIBYTE */
|
||||
{
|
||||
unsigned int i;
|
||||
const unsigned char *str = (const unsigned char *)regex;
|
||||
tre_char_t *wstr = wregex;
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
*(wstr++) = *(str++);
|
||||
wlen = n;
|
||||
}
|
||||
#if TRE_MULTIBYTE
|
||||
else
|
||||
{
|
||||
int consumed;
|
||||
tre_char_t *wcptr = wregex;
|
||||
#ifdef HAVE_MBSTATE_T
|
||||
mbstate_t state;
|
||||
memset(&state, '\0', sizeof(state));
|
||||
#endif /* HAVE_MBSTATE_T */
|
||||
while (n > 0)
|
||||
{
|
||||
consumed = tre_mbrtowc(wcptr, regex, n, &state);
|
||||
|
||||
switch (consumed)
|
||||
{
|
||||
case 0:
|
||||
if (*regex == '\0')
|
||||
consumed = 1;
|
||||
else
|
||||
{
|
||||
free(wregex);
|
||||
return REG_BADPAT;
|
||||
}
|
||||
break;
|
||||
case -1:
|
||||
DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno)));
|
||||
free(wregex);
|
||||
return REG_BADPAT;
|
||||
case -2:
|
||||
/* The last character wasn't complete. Let's not call it a
|
||||
fatal error. */
|
||||
consumed = n;
|
||||
break;
|
||||
}
|
||||
regex += consumed;
|
||||
n -= consumed;
|
||||
wcptr++;
|
||||
}
|
||||
wlen = wcptr - wregex;
|
||||
}
|
||||
#endif /* TRE_MULTIBYTE */
|
||||
wregex[wlen] = L'\0';
|
||||
*w = wregex;
|
||||
*wn = wlen;
|
||||
return REG_OK;
|
||||
#else /* !TRE_WCHAR */
|
||||
{
|
||||
*w = (tre_char_t * const *)regex;
|
||||
*wn = n;
|
||||
return REG_OK;
|
||||
}
|
||||
#endif /* !TRE_WCHAR */
|
||||
}
|
||||
|
||||
void
|
||||
tre_free_pattern(tre_char_t *wregex)
|
||||
{
|
||||
#if TRE_WCHAR
|
||||
free(wregex);
|
||||
#endif
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,21 +0,0 @@
|
||||
/* $FreeBSD$ */
|
||||
|
||||
#ifndef TRE_FASTMATCH_H
|
||||
#define TRE_FASTMATCH_H 1
|
||||
|
||||
#include <fastmatch.h>
|
||||
#include <hashtable.h>
|
||||
#include <limits.h>
|
||||
#include <regex.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "hashtable.h"
|
||||
|
||||
int tre_compile_literal(fastmatch_t *preg, const tre_char_t *regex,
|
||||
size_t, int);
|
||||
int tre_compile_fast(fastmatch_t *preg, const tre_char_t *regex, size_t, int);
|
||||
int tre_match_fast(const fastmatch_t *fg, const void *data, size_t len,
|
||||
tre_str_type_t type, int nmatch, regmatch_t pmatch[], int eflags);
|
||||
void tre_free_fast(fastmatch_t *preg);
|
||||
|
||||
#endif /* TRE_FASTMATCH_H */
|
@ -52,9 +52,6 @@ __FBSDID("$FreeBSD$");
|
||||
#include <wchar.h>
|
||||
#include <wctype.h>
|
||||
|
||||
#ifndef WITHOUT_FASTMATCH
|
||||
#include "fastmatch.h"
|
||||
#endif
|
||||
#include "grep.h"
|
||||
|
||||
static bool first_match = true;
|
||||
@ -512,14 +509,8 @@ procline(struct parsec *pc)
|
||||
r = litexec(&pattern[i], pc->ln.dat, 1, &pmatch);
|
||||
else
|
||||
#endif
|
||||
#ifndef WITHOUT_FASTMATCH
|
||||
if (fg_pattern[i].pattern)
|
||||
r = fastexec(&fg_pattern[i],
|
||||
pc->ln.dat, 1, &pmatch, leflags);
|
||||
else
|
||||
#endif
|
||||
r = regexec(&r_pattern[i], pc->ln.dat, 1,
|
||||
&pmatch, leflags);
|
||||
r = regexec(&r_pattern[i], pc->ln.dat, 1, &pmatch,
|
||||
leflags);
|
||||
if (r != 0)
|
||||
continue;
|
||||
/* Check for full match */
|
||||
@ -527,11 +518,7 @@ procline(struct parsec *pc)
|
||||
(size_t)pmatch.rm_eo != pc->ln.len))
|
||||
continue;
|
||||
/* Check for whole word match */
|
||||
#ifndef WITHOUT_FASTMATCH
|
||||
if (wflag || fg_pattern[i].word) {
|
||||
#else
|
||||
if (wflag) {
|
||||
#endif
|
||||
wbegin = wend = L' ';
|
||||
if (pmatch.rm_so != 0 &&
|
||||
sscanf(&pc->ln.dat[pmatch.rm_so - 1],
|
||||
|
Loading…
Reference in New Issue
Block a user