mirror of
https://git.FreeBSD.org/src.git
synced 2025-01-25 16:13:17 +00:00
1769 lines
43 KiB
C
1769 lines
43 KiB
C
/*
|
|
|
|
dbz.c V3.2
|
|
|
|
Copyright 1988 Jon Zeeff (zeeff@b-tech.ann-arbor.mi.us)
|
|
You can use this code in any manner, as long as you leave my name on it
|
|
and don't hold me responsible for any problems with it.
|
|
|
|
Hacked on by gdb@ninja.UUCP (David Butler); Sun Jun 5 00:27:08 CDT 1988
|
|
|
|
Various improvments + INCORE by moraes@ai.toronto.edu (Mark Moraes)
|
|
|
|
Major reworking by Henry Spencer as part of the C News project.
|
|
|
|
These routines replace dbm as used by the usenet news software
|
|
(it's not a full dbm replacement by any means). It's fast and
|
|
simple. It contains no AT&T code.
|
|
|
|
In general, dbz's files are 1/20 the size of dbm's. Lookup performance
|
|
is somewhat better, while file creation is spectacularly faster, especially
|
|
if the incore facility is used.
|
|
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <sys/types.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
#include <errno.h>
|
|
#ifndef __STDC__
|
|
extern int errno;
|
|
#endif
|
|
#include <dbz.h>
|
|
|
|
/*
|
|
* #ifdef index. "LIA" = "leave it alone unless you know what you're doing".
|
|
*
|
|
* FUNNYSEEKS SEEK_SET is not 0, get it from <unistd.h>
|
|
* INDEX_SIZE backward compatibility with old dbz; avoid using this
|
|
* NMEMORY number of days of memory for use in sizing new table (LIA)
|
|
* INCORE backward compatibility with old dbz; use dbzincore() instead
|
|
* DBZDEBUG enable debugging
|
|
* DEFSIZE default table size (not as critical as in old dbz)
|
|
* OLDBNEWS default case mapping as in old B News; set NOBUFFER
|
|
* BNEWS default case mapping as in current B News; set NOBUFFER
|
|
* DEFCASE default case-map algorithm selector
|
|
* NOTAGS fseek offsets are strange, do not do tagging (see below)
|
|
* NPAGBUF size of .pag buffer, in longs (LIA)
|
|
* SHISTBUF size of ASCII-file buffer, in bytes (LIA)
|
|
* MAXRUN length of run which shifts to next table (see below) (LIA)
|
|
* OVERFLOW long-int arithmetic overflow must be avoided, will trap
|
|
* NOBUFFER do not buffer hash-table i/o, B News locking is defective
|
|
*/
|
|
|
|
#ifdef FUNNYSEEKS
|
|
#include <unistd.h>
|
|
#else
|
|
#define SEEK_SET 0
|
|
#endif
|
|
#ifdef OVERFLOW
|
|
#include <limits.h>
|
|
#endif
|
|
|
|
static int dbzversion = 3; /* for validating .dir file format */
|
|
|
|
/*
|
|
* The dbz database exploits the fact that when news stores a <key,value>
|
|
* tuple, the `value' part is a seek offset into a text file, pointing to
|
|
* a copy of the `key' part. This avoids the need to store a copy of
|
|
* the key in the dbz files. However, the text file *must* exist and be
|
|
* consistent with the dbz files, or things will fail.
|
|
*
|
|
* The basic format of the database is a simple hash table containing the
|
|
* values. A value is stored by indexing into the table using a hash value
|
|
* computed from the key; collisions are resolved by linear probing (just
|
|
* search forward for an empty slot, wrapping around to the beginning of
|
|
* the table if necessary). Linear probing is a performance disaster when
|
|
* the table starts to get full, so a complication is introduced. The
|
|
* database is actually one *or more* tables, stored sequentially in the
|
|
* .pag file, and the length of linear-probe sequences is limited. The
|
|
* search (for an existing item or an empty slot) always starts in the
|
|
* first table, and whenever MAXRUN probes have been done in table N,
|
|
* probing continues in table N+1. This behaves reasonably well even in
|
|
* cases of massive overflow. There are some other small complications
|
|
* added, see comments below.
|
|
*
|
|
* The table size is fixed for any particular database, but is determined
|
|
* dynamically when a database is rebuilt. The strategy is to try to pick
|
|
* the size so the first table will be no more than 2/3 full, that being
|
|
* slightly before the point where performance starts to degrade. (It is
|
|
* desirable to be a bit conservative because the overflow strategy tends
|
|
* to produce files with holes in them, which is a nuisance.)
|
|
*/
|
|
|
|
/*
|
|
* The following is for backward compatibility.
|
|
*/
|
|
#ifdef INDEX_SIZE
|
|
#define DEFSIZE INDEX_SIZE
|
|
#endif
|
|
|
|
/*
|
|
* ANSI C says the offset argument to fseek is a long, not an off_t, for some
|
|
* reason. Let's use off_t anyway.
|
|
*/
|
|
#define SOF (sizeof(off_t))
|
|
|
|
/*
|
|
* We assume that unused areas of a binary file are zeros, and that the
|
|
* bit pattern of `(off_t)0' is all zeros. The alternative is rather
|
|
* painful file initialization. Note that okayvalue(), if OVERFLOW is
|
|
* defined, knows what value of an offset would cause overflow.
|
|
*/
|
|
#define VACANT ((off_t)0)
|
|
#define BIAS(o) ((o)+1) /* make any valid off_t non-VACANT */
|
|
#define UNBIAS(o) ((o)-1) /* reverse BIAS() effect */
|
|
|
|
/*
|
|
* In a Unix implementation, or indeed any in which an off_t is a byte
|
|
* count, there are a bunch of high bits free in an off_t. There is a
|
|
* use for them. Checking a possible hit by looking it up in the base
|
|
* file is relatively expensive, and the cost can be dramatically reduced
|
|
* by using some of those high bits to tag the value with a few more bits
|
|
* of the key's hash. This detects most false hits without the overhead of
|
|
* seek+read+strcmp. We use the top bit to indicate whether the value is
|
|
* tagged or not, and don't tag a value which is using the tag bits itself.
|
|
* We're in trouble if the off_t representation wants to use the top bit.
|
|
* The actual bitmasks and offset come from the configuration stuff,
|
|
* which permits fiddling with them as necessary, and also suppressing
|
|
* them completely (by defining the masks to 0). We build pre-shifted
|
|
* versions of the masks for efficiency.
|
|
*/
|
|
static off_t tagbits; /* pre-shifted tag mask */
|
|
static off_t taghere; /* pre-shifted tag-enable bit */
|
|
static off_t tagboth; /* tagbits|taghere */
|
|
#define HASTAG(o) ((o)&taghere)
|
|
#define TAG(o) ((o)&tagbits)
|
|
#define NOTAG(o) ((o)&~tagboth)
|
|
#define CANTAG(o) (((o)&tagboth) == 0)
|
|
#define MKTAG(v) (((v)<<conf.tagshift)&tagbits)
|
|
|
|
/*
|
|
* A new, from-scratch database, not built as a rebuild of an old one,
|
|
* needs to know table size, casemap algorithm, and tagging. Normally
|
|
* the user supplies this info, but there have to be defaults.
|
|
*/
|
|
#ifndef DEFSIZE
|
|
#define DEFSIZE 120011 /* 300007 might be better */
|
|
#endif
|
|
#ifdef OLDBNEWS
|
|
#define DEFCASE '0' /* B2.10 -- no mapping */
|
|
#define NOBUFFER /* B News locking is defective */
|
|
#endif
|
|
#ifdef BNEWS
|
|
#define DEFCASE '=' /* B2.11 -- all mapped */
|
|
#define NOBUFFER /* B News locking is defective */
|
|
#endif
|
|
#ifndef DEFCASE /* C News compatibility is the default */
|
|
#define DEFCASE 'C' /* C News -- RFC822 mapping */
|
|
#endif
|
|
#ifndef NOTAGS
|
|
#define TAGENB 0x80 /* tag enable is top bit, tag is next 7 */
|
|
#define TAGMASK 0x7f
|
|
#define TAGSHIFT 24
|
|
#else
|
|
#define TAGENB 0 /* no tags */
|
|
#define TAGMASK 0
|
|
#define TAGSHIFT 0
|
|
#endif
|
|
|
|
/*
|
|
* We read configuration info from the .dir file into this structure,
|
|
* so we can avoid wired-in assumptions for an existing database.
|
|
*
|
|
* Among the info is a record of recent peak usages, so that a new table
|
|
* size can be chosen intelligently when rebuilding. 10 is a good
|
|
* number of usages to keep, since news displays marked fluctuations
|
|
* in volume on a 7-day cycle.
|
|
*/
|
|
struct dbzconfig {
|
|
int olddbz; /* .dir file empty but .pag not? */
|
|
off_t tsize; /* table size */
|
|
# ifndef NMEMORY
|
|
# define NMEMORY 10 /* # days of use info to remember */
|
|
# endif
|
|
# define NUSEDS (1+NMEMORY)
|
|
off_t used[NUSEDS]; /* entries used today, yesterday, ... */
|
|
int valuesize; /* size of table values, == SOF */
|
|
int bytemap[SOF]; /* byte-order map */
|
|
char casemap; /* case-mapping algorithm (see cipoint()) */
|
|
char fieldsep; /* field separator in base file, if any */
|
|
off_t tagenb; /* unshifted tag-enable bit */
|
|
off_t tagmask; /* unshifted tag mask */
|
|
int tagshift; /* shift count for tagmask and tagenb */
|
|
};
|
|
static struct dbzconfig conf;
|
|
static int getconf();
|
|
static long getno();
|
|
static int putconf();
|
|
static void mybytemap();
|
|
static off_t bytemap();
|
|
|
|
/*
|
|
* For a program that makes many, many references to the database, it
|
|
* is a large performance win to keep the table in core, if it will fit.
|
|
* Note that this does hurt robustness in the event of crashes, and
|
|
* dbmclose() *must* be called to flush the in-core database to disk.
|
|
* The code is prepared to deal with the possibility that there isn't
|
|
* enough memory. There *is* an assumption that a size_t is big enough
|
|
* to hold the size (in bytes) of one table, so dbminit() tries to figure
|
|
* out whether this is possible first.
|
|
*
|
|
* The preferred way to ask for an in-core table is to do dbzincore(1)
|
|
* before dbminit(). The default is not to do it, although -DINCORE
|
|
* overrides this for backward compatibility with old dbz.
|
|
*
|
|
* We keep only the first table in core. This greatly simplifies the
|
|
* code, and bounds memory demand. Furthermore, doing this is a large
|
|
* performance win even in the event of massive overflow.
|
|
*/
|
|
#ifdef INCORE
|
|
static int incore = 1;
|
|
#else
|
|
static int incore = 0;
|
|
#endif
|
|
|
|
/*
|
|
* Stdio buffer for .pag reads. Buffering more than about 16 does not help
|
|
* significantly at the densities we try to maintain, and the much larger
|
|
* buffers that most stdios default to are much more expensive to fill.
|
|
* With small buffers, stdio is performance-competitive with raw read(),
|
|
* and it's much more portable.
|
|
*/
|
|
#ifndef NPAGBUF
|
|
#define NPAGBUF 16
|
|
#endif
|
|
#ifndef NOBUFFER
|
|
#ifdef _IOFBF
|
|
static off_t pagbuf[NPAGBUF]; /* only needed if !NOBUFFER && _IOFBF */
|
|
#endif
|
|
#endif
|
|
|
|
/*
|
|
* Stdio buffer for base-file reads. Message-IDs (all news ever needs to
|
|
* read) are essentially never longer than 64 bytes, and the typical stdio
|
|
* buffer is so much larger that it is much more expensive to fill.
|
|
*/
|
|
#ifndef SHISTBUF
|
|
#define SHISTBUF 64
|
|
#endif
|
|
#ifdef _IOFBF
|
|
static char basebuf[SHISTBUF]; /* only needed if _IOFBF exists */
|
|
#endif
|
|
|
|
/*
|
|
* Data structure for recording info about searches.
|
|
*/
|
|
struct searcher {
|
|
off_t place; /* current location in file */
|
|
int tabno; /* which table we're in */
|
|
int run; /* how long we'll stay in this table */
|
|
# ifndef MAXRUN
|
|
# define MAXRUN 100
|
|
# endif
|
|
long hash; /* the key's hash code (for optimization) */
|
|
off_t tag; /* tag we are looking for */
|
|
int seen; /* have we examined current location? */
|
|
int aborted; /* has i/o error aborted search? */
|
|
};
|
|
static void start();
|
|
#define FRESH ((struct searcher *)NULL)
|
|
static off_t search();
|
|
#define NOTFOUND ((off_t)-1)
|
|
static int okayvalue();
|
|
static int set();
|
|
|
|
/*
|
|
* Arguably the searcher struct for a given routine ought to be local to
|
|
* it, but a fetch() is very often immediately followed by a store(), and
|
|
* in some circumstances it is a useful performance win to remember where
|
|
* the fetch() completed. So we use a global struct and remember whether
|
|
* it is current.
|
|
*/
|
|
static struct searcher srch;
|
|
static struct searcher *prevp; /* &srch or FRESH */
|
|
|
|
/* byte-ordering stuff */
|
|
static int mybmap[SOF]; /* my byte order (see mybytemap()) */
|
|
static int bytesame; /* is database order same as mine? */
|
|
#define MAPIN(o) ((bytesame) ? (o) : bytemap((o), conf.bytemap, mybmap))
|
|
#define MAPOUT(o) ((bytesame) ? (o) : bytemap((o), mybmap, conf.bytemap))
|
|
|
|
/*
|
|
* The double parentheses needed to make this work are ugly, but the
|
|
* alternative (under most compilers) is to pack around 2K of unused
|
|
* strings -- there's just no way to get rid of them.
|
|
*/
|
|
static int debug; /* controlled by dbzdebug() */
|
|
#ifdef DBZDEBUG
|
|
#define DEBUG(args) if (debug) { (void) printf args ; }
|
|
#else
|
|
#define DEBUG(args) ;
|
|
#endif
|
|
|
|
/* externals used */
|
|
extern char *malloc();
|
|
extern char *calloc();
|
|
extern void free(); /* ANSI C; some old implementations say int */
|
|
extern int atoi();
|
|
extern long atol();
|
|
|
|
/* misc. forwards */
|
|
static long hash();
|
|
static void crcinit();
|
|
static char *cipoint();
|
|
static char *mapcase();
|
|
static int isprime();
|
|
static FILE *latebase();
|
|
|
|
/* file-naming stuff */
|
|
static char dir[] = ".dir";
|
|
static char pag[] = ".pag";
|
|
static char *enstring();
|
|
|
|
/* central data structures */
|
|
static FILE *basef; /* descriptor for base file */
|
|
static char *basefname; /* name for not-yet-opened base file */
|
|
static FILE *dirf; /* descriptor for .dir file */
|
|
static int dirronly; /* dirf open read-only? */
|
|
static FILE *pagf = NULL; /* descriptor for .pag file */
|
|
static off_t pagpos; /* posn in pagf; only search may set != -1 */
|
|
static int pagronly; /* pagf open read-only? */
|
|
static off_t *corepag; /* incore version of .pag file, if any */
|
|
static FILE *bufpagf; /* well-buffered pagf, for incore rewrite */
|
|
static off_t *getcore();
|
|
static int putcore();
|
|
static int written; /* has a store() been done? */
|
|
|
|
/*
|
|
- dbzfresh - set up a new database, no historical info
|
|
*/
|
|
int /* 0 success, -1 failure */
|
|
dbzfresh(name, size, fs, cmap, tagmask)
|
|
char *name; /* base name; .dir and .pag must exist */
|
|
long size; /* table size (0 means default) */
|
|
int fs; /* field-separator character in base file */
|
|
int cmap; /* case-map algorithm (0 means default) */
|
|
off_t tagmask; /* 0 default, 1 no tags */
|
|
{
|
|
register char *fn;
|
|
struct dbzconfig c;
|
|
register off_t m;
|
|
register FILE *f;
|
|
|
|
if (pagf != NULL) {
|
|
DEBUG(("dbzfresh: database already open\n"));
|
|
return(-1);
|
|
}
|
|
if (size != 0 && size < 2) {
|
|
DEBUG(("dbzfresh: preposterous size (%ld)\n", size));
|
|
return(-1);
|
|
}
|
|
|
|
/* get default configuration */
|
|
if (getconf((FILE *)NULL, (FILE *)NULL, &c) < 0)
|
|
return(-1); /* "can't happen" */
|
|
|
|
/* and mess with it as specified */
|
|
if (size != 0)
|
|
c.tsize = size;
|
|
c.fieldsep = fs;
|
|
switch (cmap) {
|
|
case 0:
|
|
case '0':
|
|
case 'B': /* 2.10 compat */
|
|
c.casemap = '0'; /* '\0' nicer, but '0' printable! */
|
|
break;
|
|
case '=':
|
|
case 'b': /* 2.11 compat */
|
|
c.casemap = '=';
|
|
break;
|
|
case 'C':
|
|
c.casemap = 'C';
|
|
break;
|
|
case '?':
|
|
c.casemap = DEFCASE;
|
|
break;
|
|
default:
|
|
DEBUG(("dbzfresh case map `%c' unknown\n", cmap));
|
|
return(-1);
|
|
break;
|
|
}
|
|
switch (tagmask) {
|
|
case 0: /* default */
|
|
break;
|
|
case 1: /* no tags */
|
|
c.tagshift = 0;
|
|
c.tagmask = 0;
|
|
c.tagenb = 0;
|
|
break;
|
|
default:
|
|
m = tagmask;
|
|
c.tagshift = 0;
|
|
while (!(m&01)) {
|
|
m >>= 1;
|
|
c.tagshift++;
|
|
}
|
|
c.tagmask = m;
|
|
c.tagenb = (m << 1) & ~m;
|
|
break;
|
|
}
|
|
|
|
/* write it out */
|
|
fn = enstring(name, dir);
|
|
if (fn == NULL)
|
|
return(-1);
|
|
f = fopen(fn, "w");
|
|
free(fn);
|
|
if (f == NULL) {
|
|
DEBUG(("dbzfresh: unable to write config\n"));
|
|
return(-1);
|
|
}
|
|
if (putconf(f, &c) < 0) {
|
|
(void) fclose(f);
|
|
return(-1);
|
|
}
|
|
if (fclose(f) == EOF) {
|
|
DEBUG(("dbzfresh: fclose failure\n"));
|
|
return(-1);
|
|
}
|
|
|
|
/* create/truncate .pag */
|
|
fn = enstring(name, pag);
|
|
if (fn == NULL)
|
|
return(-1);
|
|
f = fopen(fn, "w");
|
|
free(fn);
|
|
if (f == NULL) {
|
|
DEBUG(("dbzfresh: unable to create/truncate .pag file\n"));
|
|
return(-1);
|
|
} else
|
|
(void) fclose(f);
|
|
|
|
/* and punt to dbminit for the hard work */
|
|
return(dbminit(name));
|
|
}
|
|
|
|
/*
|
|
- dbzsize - what's a good table size to hold this many entries?
|
|
*/
|
|
long
|
|
dbzsize(contents)
|
|
long contents; /* 0 means what's the default */
|
|
{
|
|
register long n;
|
|
|
|
if (contents <= 0) { /* foulup or default inquiry */
|
|
DEBUG(("dbzsize: preposterous input (%ld)\n", contents));
|
|
return(DEFSIZE);
|
|
}
|
|
n = (contents/2)*3; /* try to keep table at most 2/3 full */
|
|
if (!(n&01)) /* make it odd */
|
|
n++;
|
|
DEBUG(("dbzsize: tentative size %ld\n", n));
|
|
while (!isprime(n)) /* and look for a prime */
|
|
n += 2;
|
|
DEBUG(("dbzsize: final size %ld\n", n));
|
|
|
|
return(n);
|
|
}
|
|
|
|
/*
|
|
- isprime - is a number prime?
|
|
*
|
|
* This is not a terribly efficient approach.
|
|
*/
|
|
static int /* predicate */
|
|
isprime(x)
|
|
register long x;
|
|
{
|
|
static int quick[] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 0 };
|
|
register int *ip;
|
|
register long div;
|
|
register long stop;
|
|
|
|
/* hit the first few primes quickly to eliminate easy ones */
|
|
/* this incidentally prevents ridiculously small tables */
|
|
for (ip = quick; (div = *ip) != 0; ip++)
|
|
if (x%div == 0) {
|
|
DEBUG(("isprime: quick result on %ld\n", (long)x));
|
|
return(0);
|
|
}
|
|
|
|
/* approximate square root of x */
|
|
for (stop = x; x/stop < stop; stop >>= 1)
|
|
continue;
|
|
stop <<= 1;
|
|
|
|
/* try odd numbers up to stop */
|
|
for (div = *--ip; div < stop; div += 2)
|
|
if (x%div == 0)
|
|
return(0);
|
|
|
|
return(1);
|
|
}
|
|
|
|
/*
|
|
- dbzagain - set up a new database to be a rebuild of an old one
|
|
*/
|
|
int /* 0 success, -1 failure */
|
|
dbzagain(name, oldname)
|
|
char *name; /* base name; .dir and .pag must exist */
|
|
char *oldname; /* base name; all must exist */
|
|
{
|
|
register char *fn;
|
|
struct dbzconfig c;
|
|
register int i;
|
|
register long top;
|
|
register FILE *f;
|
|
register int newtable;
|
|
register off_t newsize;
|
|
|
|
if (pagf != NULL) {
|
|
DEBUG(("dbzagain: database already open\n"));
|
|
return(-1);
|
|
}
|
|
|
|
/* pick up the old configuration */
|
|
fn = enstring(oldname, dir);
|
|
if (fn == NULL)
|
|
return(-1);
|
|
f = fopen(fn, "r");
|
|
free(fn);
|
|
if (f == NULL) {
|
|
DEBUG(("dbzagain: cannot open old .dir file\n"));
|
|
return(-1);
|
|
}
|
|
i = getconf(f, (FILE *)NULL, &c);
|
|
(void) fclose(f);
|
|
if (i < 0) {
|
|
DEBUG(("dbzagain: getconf failed\n"));
|
|
return(-1);
|
|
}
|
|
|
|
/* tinker with it */
|
|
top = 0;
|
|
newtable = 0;
|
|
for (i = 0; i < NUSEDS; i++) {
|
|
if (top < c.used[i])
|
|
top = c.used[i];
|
|
if (c.used[i] == 0)
|
|
newtable = 1; /* hasn't got full usage history yet */
|
|
}
|
|
if (top == 0) {
|
|
DEBUG(("dbzagain: old table has no contents!\n"));
|
|
newtable = 1;
|
|
}
|
|
for (i = NUSEDS-1; i > 0; i--)
|
|
c.used[i] = c.used[i-1];
|
|
c.used[0] = 0;
|
|
newsize = dbzsize(top);
|
|
if (!newtable || newsize > c.tsize) /* don't shrink new table */
|
|
c.tsize = newsize;
|
|
|
|
/* write it out */
|
|
fn = enstring(name, dir);
|
|
if (fn == NULL)
|
|
return(-1);
|
|
f = fopen(fn, "w");
|
|
free(fn);
|
|
if (f == NULL) {
|
|
DEBUG(("dbzagain: unable to write new .dir\n"));
|
|
return(-1);
|
|
}
|
|
i = putconf(f, &c);
|
|
(void) fclose(f);
|
|
if (i < 0) {
|
|
DEBUG(("dbzagain: putconf failed\n"));
|
|
return(-1);
|
|
}
|
|
|
|
/* create/truncate .pag */
|
|
fn = enstring(name, pag);
|
|
if (fn == NULL)
|
|
return(-1);
|
|
f = fopen(fn, "w");
|
|
free(fn);
|
|
if (f == NULL) {
|
|
DEBUG(("dbzagain: unable to create/truncate .pag file\n"));
|
|
return(-1);
|
|
} else
|
|
(void) fclose(f);
|
|
|
|
/* and let dbminit do the work */
|
|
return(dbminit(name));
|
|
}
|
|
|
|
/*
|
|
- dbminit - open a database, creating it (using defaults) if necessary
|
|
*
|
|
* We try to leave errno set plausibly, to the extent that underlying
|
|
* functions permit this, since many people consult it if dbminit() fails.
|
|
*/
|
|
int /* 0 success, -1 failure */
|
|
dbminit(name)
|
|
char *name;
|
|
{
|
|
register int i;
|
|
register size_t s;
|
|
register char *dirfname;
|
|
register char *pagfname;
|
|
|
|
if (pagf != NULL) {
|
|
DEBUG(("dbminit: dbminit already called once\n"));
|
|
errno = 0;
|
|
return(-1);
|
|
}
|
|
|
|
/* open the .dir file */
|
|
dirfname = enstring(name, dir);
|
|
if (dirfname == NULL)
|
|
return(-1);
|
|
dirf = fopen(dirfname, "r+");
|
|
if (dirf == NULL) {
|
|
dirf = fopen(dirfname, "r");
|
|
dirronly = 1;
|
|
} else
|
|
dirronly = 0;
|
|
free(dirfname);
|
|
if (dirf == NULL) {
|
|
DEBUG(("dbminit: can't open .dir file\n"));
|
|
return(-1);
|
|
}
|
|
|
|
/* open the .pag file */
|
|
pagfname = enstring(name, pag);
|
|
if (pagfname == NULL) {
|
|
(void) fclose(dirf);
|
|
return(-1);
|
|
}
|
|
pagf = fopen(pagfname, "r+b");
|
|
if (pagf == NULL) {
|
|
pagf = fopen(pagfname, "rb");
|
|
if (pagf == NULL) {
|
|
DEBUG(("dbminit: .pag open failed\n"));
|
|
(void) fclose(dirf);
|
|
free(pagfname);
|
|
return(-1);
|
|
}
|
|
pagronly = 1;
|
|
} else if (dirronly)
|
|
pagronly = 1;
|
|
else
|
|
pagronly = 0;
|
|
#ifdef NOBUFFER
|
|
/*
|
|
* B News does not do adequate locking on its database accesses.
|
|
* Why it doesn't get into trouble using dbm is a mystery. In any
|
|
* case, doing unbuffered i/o does not cure the problem, but does
|
|
* enormously reduce its incidence.
|
|
*/
|
|
(void) setbuf(pagf, (char *)NULL);
|
|
#else
|
|
#ifdef _IOFBF
|
|
(void) setvbuf(pagf, (char *)pagbuf, _IOFBF, sizeof(pagbuf));
|
|
#endif
|
|
#endif
|
|
pagpos = -1;
|
|
/* don't free pagfname, need it below */
|
|
|
|
/* open the base file */
|
|
basef = fopen(name, "r");
|
|
if (basef == NULL) {
|
|
DEBUG(("dbminit: basefile open failed\n"));
|
|
basefname = enstring(name, "");
|
|
if (basefname == NULL) {
|
|
(void) fclose(pagf);
|
|
(void) fclose(dirf);
|
|
free(pagfname);
|
|
pagf = NULL;
|
|
return(-1);
|
|
}
|
|
} else
|
|
basefname = NULL;
|
|
#ifdef _IOFBF
|
|
if (basef != NULL)
|
|
(void) setvbuf(basef, basebuf, _IOFBF, sizeof(basebuf));
|
|
#endif
|
|
|
|
/* pick up configuration */
|
|
if (getconf(dirf, pagf, &conf) < 0) {
|
|
DEBUG(("dbminit: getconf failure\n"));
|
|
(void) fclose(basef);
|
|
(void) fclose(pagf);
|
|
(void) fclose(dirf);
|
|
free(pagfname);
|
|
pagf = NULL;
|
|
errno = EDOM; /* kind of a kludge, but very portable */
|
|
return(-1);
|
|
}
|
|
tagbits = conf.tagmask << conf.tagshift;
|
|
taghere = conf.tagenb << conf.tagshift;
|
|
tagboth = tagbits | taghere;
|
|
mybytemap(mybmap);
|
|
bytesame = 1;
|
|
for (i = 0; i < SOF; i++)
|
|
if (mybmap[i] != conf.bytemap[i])
|
|
bytesame = 0;
|
|
|
|
/* get first table into core, if it looks desirable and feasible */
|
|
s = (size_t)conf.tsize * SOF;
|
|
if (incore && (off_t)(s/SOF) == conf.tsize) {
|
|
bufpagf = fopen(pagfname, (pagronly) ? "rb" : "r+b");
|
|
if (bufpagf != NULL)
|
|
corepag = getcore(bufpagf);
|
|
} else {
|
|
bufpagf = NULL;
|
|
corepag = NULL;
|
|
}
|
|
free(pagfname);
|
|
|
|
/* misc. setup */
|
|
crcinit();
|
|
written = 0;
|
|
prevp = FRESH;
|
|
DEBUG(("dbminit: succeeded\n"));
|
|
return(0);
|
|
}
|
|
|
|
/*
|
|
- enstring - concatenate two strings into a malloced area
|
|
*/
|
|
static char * /* NULL if malloc fails */
|
|
enstring(s1, s2)
|
|
char *s1;
|
|
char *s2;
|
|
{
|
|
register char *p;
|
|
|
|
p = malloc((size_t)strlen(s1) + (size_t)strlen(s2) + 1);
|
|
if (p != NULL) {
|
|
(void) strcpy(p, s1);
|
|
(void) strcat(p, s2);
|
|
} else {
|
|
DEBUG(("enstring(%s, %s) out of memory\n", s1, s2));
|
|
}
|
|
return(p);
|
|
}
|
|
|
|
/*
|
|
- dbmclose - close a database
|
|
*/
|
|
int
|
|
dbmclose()
|
|
{
|
|
register int ret = 0;
|
|
|
|
if (pagf == NULL) {
|
|
DEBUG(("dbmclose: not opened!\n"));
|
|
return(-1);
|
|
}
|
|
|
|
if (fclose(pagf) == EOF) {
|
|
DEBUG(("dbmclose: fclose(pagf) failed\n"));
|
|
ret = -1;
|
|
}
|
|
pagf = basef; /* ensure valid pointer; dbzsync checks it */
|
|
if (dbzsync() < 0)
|
|
ret = -1;
|
|
if (bufpagf != NULL && fclose(bufpagf) == EOF) {
|
|
DEBUG(("dbmclose: fclose(bufpagf) failed\n"));
|
|
ret = -1;
|
|
}
|
|
if (corepag != NULL)
|
|
free((char *)corepag);
|
|
corepag = NULL;
|
|
if (fclose(basef) == EOF) {
|
|
DEBUG(("dbmclose: fclose(basef) failed\n"));
|
|
ret = -1;
|
|
}
|
|
if (basefname != NULL)
|
|
free(basefname);
|
|
basef = NULL;
|
|
pagf = NULL;
|
|
if (fclose(dirf) == EOF) {
|
|
DEBUG(("dbmclose: fclose(dirf) failed\n"));
|
|
ret = -1;
|
|
}
|
|
|
|
DEBUG(("dbmclose: %s\n", (ret == 0) ? "succeeded" : "failed"));
|
|
return(ret);
|
|
}
|
|
|
|
/*
|
|
- dbzsync - push all in-core data out to disk
|
|
*/
|
|
int
|
|
dbzsync()
|
|
{
|
|
register int ret = 0;
|
|
|
|
if (pagf == NULL) {
|
|
DEBUG(("dbzsync: not opened!\n"));
|
|
return(-1);
|
|
}
|
|
if (!written)
|
|
return(0);
|
|
|
|
if (corepag != NULL) {
|
|
if (putcore(corepag, bufpagf) < 0) {
|
|
DEBUG(("dbzsync: putcore failed\n"));
|
|
ret = -1;
|
|
}
|
|
}
|
|
if (!conf.olddbz)
|
|
if (putconf(dirf, &conf) < 0)
|
|
ret = -1;
|
|
|
|
DEBUG(("dbzsync: %s\n", (ret == 0) ? "succeeded" : "failed"));
|
|
return(ret);
|
|
}
|
|
|
|
/*
|
|
- dbzcancel - cancel writing of in-core data
|
|
* Mostly for use from child processes.
|
|
* Note that we don't need to futz around with stdio buffers, because we
|
|
* always fflush them immediately anyway and so they never have stale data.
|
|
*/
|
|
int
|
|
dbzcancel()
|
|
{
|
|
if (pagf == NULL) {
|
|
DEBUG(("dbzcancel: not opened!\n"));
|
|
return(-1);
|
|
}
|
|
|
|
written = 0;
|
|
return(0);
|
|
}
|
|
|
|
/*
|
|
- dbzfetch - fetch() with case mapping built in
|
|
*/
|
|
datum
|
|
dbzfetch(key)
|
|
datum key;
|
|
{
|
|
char buffer[DBZMAXKEY + 1];
|
|
datum mappedkey;
|
|
register size_t keysize;
|
|
|
|
DEBUG(("dbzfetch: (%s)\n", key.dptr));
|
|
|
|
/* Key is supposed to be less than DBZMAXKEY */
|
|
keysize = key.dsize;
|
|
if (keysize >= DBZMAXKEY) {
|
|
keysize = DBZMAXKEY;
|
|
DEBUG(("keysize is %d - truncated to %d\n", key.dsize, DBZMAXKEY));
|
|
}
|
|
|
|
mappedkey.dptr = mapcase(buffer, key.dptr, keysize);
|
|
buffer[keysize] = '\0'; /* just a debug aid */
|
|
mappedkey.dsize = keysize;
|
|
|
|
return(fetch(mappedkey));
|
|
}
|
|
|
|
/*
|
|
- fetch - get an entry from the database
|
|
*
|
|
* Disgusting fine point, in the name of backward compatibility: if the
|
|
* last character of "key" is a NUL, that character is (effectively) not
|
|
* part of the comparison against the stored keys.
|
|
*/
|
|
datum /* dptr NULL, dsize 0 means failure */
|
|
fetch(key)
|
|
datum key;
|
|
{
|
|
char buffer[DBZMAXKEY + 1];
|
|
static off_t key_ptr; /* return value points here */
|
|
datum output;
|
|
register size_t keysize;
|
|
register size_t cmplen;
|
|
register char *sepp;
|
|
|
|
DEBUG(("fetch: (%s)\n", key.dptr));
|
|
output.dptr = NULL;
|
|
output.dsize = 0;
|
|
prevp = FRESH;
|
|
|
|
/* Key is supposed to be less than DBZMAXKEY */
|
|
keysize = key.dsize;
|
|
if (keysize >= DBZMAXKEY) {
|
|
keysize = DBZMAXKEY;
|
|
DEBUG(("keysize is %d - truncated to %d\n", key.dsize, DBZMAXKEY));
|
|
}
|
|
|
|
if (pagf == NULL) {
|
|
DEBUG(("fetch: database not open!\n"));
|
|
return(output);
|
|
} else if (basef == NULL) { /* basef didn't exist yet */
|
|
basef = latebase();
|
|
if (basef == NULL)
|
|
return(output);
|
|
}
|
|
|
|
cmplen = keysize;
|
|
sepp = &conf.fieldsep;
|
|
if (key.dptr[keysize-1] == '\0') {
|
|
cmplen--;
|
|
sepp = &buffer[keysize-1];
|
|
}
|
|
start(&srch, &key, FRESH);
|
|
while ((key_ptr = search(&srch)) != NOTFOUND) {
|
|
DEBUG(("got 0x%lx\n", key_ptr));
|
|
|
|
/* fetch the key */
|
|
if (fseek(basef, key_ptr, SEEK_SET) != 0) {
|
|
DEBUG(("fetch: seek failed\n"));
|
|
return(output);
|
|
}
|
|
if (fread(buffer, 1, keysize, basef) != keysize) {
|
|
DEBUG(("fetch: read failed\n"));
|
|
return(output);
|
|
}
|
|
|
|
/* try it */
|
|
buffer[keysize] = '\0'; /* terminated for DEBUG */
|
|
(void) mapcase(buffer, buffer, keysize);
|
|
DEBUG(("fetch: buffer (%s) looking for (%s) size = %d\n",
|
|
buffer, key.dptr, keysize));
|
|
if (memcmp(key.dptr, buffer, cmplen) == 0 &&
|
|
(*sepp == conf.fieldsep || *sepp == '\0')) {
|
|
/* we found it */
|
|
output.dptr = (char *)&key_ptr;
|
|
output.dsize = SOF;
|
|
DEBUG(("fetch: successful\n"));
|
|
return(output);
|
|
}
|
|
}
|
|
|
|
/* we didn't find it */
|
|
DEBUG(("fetch: failed\n"));
|
|
prevp = &srch; /* remember where we stopped */
|
|
return(output);
|
|
}
|
|
|
|
/*
|
|
- latebase - try to open a base file that wasn't there at the start
|
|
*/
|
|
static FILE *
|
|
latebase()
|
|
{
|
|
register FILE *it;
|
|
|
|
if (basefname == NULL) {
|
|
DEBUG(("latebase: name foulup\n"));
|
|
return(NULL);
|
|
}
|
|
it = fopen(basefname, "r");
|
|
if (it == NULL) {
|
|
DEBUG(("latebase: still can't open base\n"));
|
|
} else {
|
|
DEBUG(("latebase: late open succeeded\n"));
|
|
free(basefname);
|
|
basefname = NULL;
|
|
#ifdef _IOFBF
|
|
(void) setvbuf(it, basebuf, _IOFBF, sizeof(basebuf));
|
|
#endif
|
|
}
|
|
return(it);
|
|
}
|
|
|
|
/*
|
|
- dbzstore - store() with case mapping built in
|
|
*/
|
|
int
|
|
dbzstore(key, data)
|
|
datum key;
|
|
datum data;
|
|
{
|
|
char buffer[DBZMAXKEY + 1];
|
|
datum mappedkey;
|
|
register size_t keysize;
|
|
|
|
DEBUG(("dbzstore: (%s)\n", key.dptr));
|
|
|
|
/* Key is supposed to be less than DBZMAXKEY */
|
|
keysize = key.dsize;
|
|
if (keysize >= DBZMAXKEY) {
|
|
DEBUG(("dbzstore: key size too big (%d)\n", key.dsize));
|
|
return(-1);
|
|
}
|
|
|
|
mappedkey.dptr = mapcase(buffer, key.dptr, keysize);
|
|
buffer[keysize] = '\0'; /* just a debug aid */
|
|
mappedkey.dsize = keysize;
|
|
|
|
return(store(mappedkey, data));
|
|
}
|
|
|
|
/*
|
|
- store - add an entry to the database
|
|
*/
|
|
int /* 0 success, -1 failure */
|
|
store(key, data)
|
|
datum key;
|
|
datum data;
|
|
{
|
|
off_t value;
|
|
|
|
if (pagf == NULL) {
|
|
DEBUG(("store: database not open!\n"));
|
|
return(-1);
|
|
} else if (basef == NULL) { /* basef didn't exist yet */
|
|
basef = latebase();
|
|
if (basef == NULL)
|
|
return(-1);
|
|
}
|
|
if (pagronly) {
|
|
DEBUG(("store: database open read-only\n"));
|
|
return(-1);
|
|
}
|
|
if (data.dsize != SOF) {
|
|
DEBUG(("store: value size wrong (%d)\n", data.dsize));
|
|
return(-1);
|
|
}
|
|
if (key.dsize >= DBZMAXKEY) {
|
|
DEBUG(("store: key size too big (%d)\n", key.dsize));
|
|
return(-1);
|
|
}
|
|
|
|
/* copy the value in to ensure alignment */
|
|
(void) memcpy((char *)&value, data.dptr, SOF);
|
|
DEBUG(("store: (%s, %ld)\n", key.dptr, (long)value));
|
|
if (!okayvalue(value)) {
|
|
DEBUG(("store: reserved bit or overflow in 0x%lx\n", value));
|
|
return(-1);
|
|
}
|
|
|
|
/* find the place, exploiting previous search if possible */
|
|
start(&srch, &key, prevp);
|
|
while (search(&srch) != NOTFOUND)
|
|
continue;
|
|
|
|
prevp = FRESH;
|
|
conf.used[0]++;
|
|
DEBUG(("store: used count %ld\n", conf.used[0]));
|
|
written = 1;
|
|
return(set(&srch, value));
|
|
}
|
|
|
|
/*
|
|
- dbzincore - control attempts to keep .pag file in core
|
|
*/
|
|
int /* old setting */
|
|
dbzincore(value)
|
|
int value;
|
|
{
|
|
register int old = incore;
|
|
|
|
incore = value;
|
|
return(old);
|
|
}
|
|
|
|
/*
|
|
- getconf - get configuration from .dir file
|
|
*/
|
|
static int /* 0 success, -1 failure */
|
|
getconf(df, pf, cp)
|
|
register FILE *df; /* NULL means just give me the default */
|
|
register FILE *pf; /* NULL means don't care about .pag */
|
|
register struct dbzconfig *cp;
|
|
{
|
|
register int c;
|
|
register int i;
|
|
int err = 0;
|
|
|
|
c = (df != NULL) ? getc(df) : EOF;
|
|
if (c == EOF) { /* empty file, no configuration known */
|
|
cp->olddbz = 0;
|
|
if (df != NULL && pf != NULL && getc(pf) != EOF)
|
|
cp->olddbz = 1;
|
|
cp->tsize = DEFSIZE;
|
|
cp->fieldsep = '\t';
|
|
for (i = 0; i < NUSEDS; i++)
|
|
cp->used[i] = 0;
|
|
cp->valuesize = SOF;
|
|
mybytemap(cp->bytemap);
|
|
cp->casemap = DEFCASE;
|
|
cp->tagenb = TAGENB;
|
|
cp->tagmask = TAGMASK;
|
|
cp->tagshift = TAGSHIFT;
|
|
DEBUG(("getconf: defaults (%ld, %c, (0x%lx/0x%lx<<%d))\n",
|
|
cp->tsize, cp->casemap, cp->tagenb,
|
|
cp->tagmask, cp->tagshift));
|
|
return(0);
|
|
}
|
|
(void) ungetc(c, df);
|
|
|
|
/* first line, the vital stuff */
|
|
if (getc(df) != 'd' || getc(df) != 'b' || getc(df) != 'z')
|
|
err = -1;
|
|
if (getno(df, &err) != dbzversion)
|
|
err = -1;
|
|
cp->tsize = getno(df, &err);
|
|
cp->fieldsep = getno(df, &err);
|
|
while ((c = getc(df)) == ' ')
|
|
continue;
|
|
cp->casemap = c;
|
|
cp->tagenb = getno(df, &err);
|
|
cp->tagmask = getno(df, &err);
|
|
cp->tagshift = getno(df, &err);
|
|
cp->valuesize = getno(df, &err);
|
|
if (cp->valuesize != SOF) {
|
|
DEBUG(("getconf: wrong off_t size (%d)\n", cp->valuesize));
|
|
err = -1;
|
|
cp->valuesize = SOF; /* to protect the loops below */
|
|
}
|
|
for (i = 0; i < cp->valuesize; i++)
|
|
cp->bytemap[i] = getno(df, &err);
|
|
if (getc(df) != '\n')
|
|
err = -1;
|
|
DEBUG(("size %ld, sep %d, cmap %c, tags 0x%lx/0x%lx<<%d, ", cp->tsize,
|
|
cp->fieldsep, cp->casemap, cp->tagenb, cp->tagmask,
|
|
cp->tagshift));
|
|
DEBUG(("bytemap (%d)", cp->valuesize));
|
|
for (i = 0; i < cp->valuesize; i++) {
|
|
DEBUG((" %d", cp->bytemap[i]));
|
|
}
|
|
DEBUG(("\n"));
|
|
|
|
/* second line, the usages */
|
|
for (i = 0; i < NUSEDS; i++)
|
|
cp->used[i] = getno(df, &err);
|
|
if (getc(df) != '\n')
|
|
err = -1;
|
|
DEBUG(("used %ld %ld %ld...\n", cp->used[0], cp->used[1], cp->used[2]));
|
|
|
|
if (err < 0) {
|
|
DEBUG(("getconf error\n"));
|
|
return(-1);
|
|
}
|
|
return(0);
|
|
}
|
|
|
|
/*
|
|
- getno - get a long
|
|
*/
|
|
static long
|
|
getno(f, ep)
|
|
FILE *f;
|
|
int *ep;
|
|
{
|
|
register char *p;
|
|
# define MAXN 50
|
|
char getbuf[MAXN];
|
|
register int c;
|
|
|
|
while ((c = getc(f)) == ' ')
|
|
continue;
|
|
if (c == EOF || c == '\n') {
|
|
DEBUG(("getno: missing number\n"));
|
|
*ep = -1;
|
|
return(0);
|
|
}
|
|
p = getbuf;
|
|
*p++ = c;
|
|
while ((c = getc(f)) != EOF && c != '\n' && c != ' ')
|
|
if (p < &getbuf[MAXN-1])
|
|
*p++ = c;
|
|
if (c == EOF) {
|
|
DEBUG(("getno: EOF\n"));
|
|
*ep = -1;
|
|
} else
|
|
(void) ungetc(c, f);
|
|
*p = '\0';
|
|
|
|
if (strspn(getbuf, "-1234567890") != strlen(getbuf)) {
|
|
DEBUG(("getno: `%s' non-numeric\n", getbuf));
|
|
*ep = -1;
|
|
}
|
|
return(atol(getbuf));
|
|
}
|
|
|
|
/*
|
|
- putconf - write configuration to .dir file
|
|
*/
|
|
static int /* 0 success, -1 failure */
|
|
putconf(f, cp)
|
|
register FILE *f;
|
|
register struct dbzconfig *cp;
|
|
{
|
|
register int i;
|
|
register int ret = 0;
|
|
|
|
if (fseek(f, 0, SEEK_SET) != 0) {
|
|
DEBUG(("fseek failure in putconf\n"));
|
|
ret = -1;
|
|
}
|
|
fprintf(f, "dbz %d %ld %d %c %ld %ld %d %d", dbzversion,
|
|
(long)cp->tsize,
|
|
cp->fieldsep, cp->casemap, (long)cp->tagenb,
|
|
(long)cp->tagmask, cp->tagshift,
|
|
cp->valuesize);
|
|
|
|
for (i = 0; i < cp->valuesize; i++)
|
|
fprintf(f, " %d", cp->bytemap[i]);
|
|
fprintf(f, "\n");
|
|
for (i = 0; i < NUSEDS; i++)
|
|
fprintf(f, "%ld%c",
|
|
(long)cp->used[i], (i < NUSEDS-1) ? ' ' : '\n');
|
|
|
|
|
|
(void) fflush(f);
|
|
if (ferror(f))
|
|
ret = -1;
|
|
|
|
DEBUG(("putconf status %d\n", ret));
|
|
return(ret);
|
|
}
|
|
|
|
/*
|
|
- getcore - try to set up an in-core copy of .pag file
|
|
*/
|
|
static off_t * /* pointer to copy, or NULL */
|
|
getcore(f)
|
|
FILE *f;
|
|
{
|
|
register off_t *p;
|
|
register size_t i;
|
|
register size_t nread;
|
|
register char *it;
|
|
|
|
it = malloc((size_t)conf.tsize * SOF);
|
|
if (it == NULL) {
|
|
DEBUG(("getcore: malloc failed\n"));
|
|
return(NULL);
|
|
}
|
|
|
|
nread = fread(it, SOF, (size_t)conf.tsize, f);
|
|
if (ferror(f)) {
|
|
DEBUG(("getcore: read failed\n"));
|
|
free(it);
|
|
return(NULL);
|
|
}
|
|
|
|
p = (off_t *)it + nread;
|
|
i = (size_t)conf.tsize - nread;
|
|
while (i-- > 0)
|
|
*p++ = VACANT;
|
|
return((off_t *)it);
|
|
}
|
|
|
|
/*
|
|
- putcore - try to rewrite an in-core table
|
|
*/
|
|
static int /* 0 okay, -1 fail */
|
|
putcore(tab, f)
|
|
off_t *tab;
|
|
FILE *f;
|
|
{
|
|
if (fseek(f, 0, SEEK_SET) != 0) {
|
|
DEBUG(("fseek failure in putcore\n"));
|
|
return(-1);
|
|
}
|
|
(void) fwrite((char *)tab, SOF, (size_t)conf.tsize, f);
|
|
(void) fflush(f);
|
|
return((ferror(f)) ? -1 : 0);
|
|
}
|
|
|
|
/*
|
|
- start - set up to start or restart a search
|
|
*/
|
|
static void
|
|
start(sp, kp, osp)
|
|
register struct searcher *sp;
|
|
register datum *kp;
|
|
register struct searcher *osp; /* may be FRESH, i.e. NULL */
|
|
{
|
|
register long h;
|
|
|
|
h = hash(kp->dptr, kp->dsize);
|
|
if (osp != FRESH && osp->hash == h) {
|
|
if (sp != osp)
|
|
*sp = *osp;
|
|
DEBUG(("search restarted\n"));
|
|
} else {
|
|
sp->hash = h;
|
|
sp->tag = MKTAG(h / conf.tsize);
|
|
DEBUG(("tag 0x%lx\n", sp->tag));
|
|
sp->place = h % conf.tsize;
|
|
sp->tabno = 0;
|
|
sp->run = (conf.olddbz) ? conf.tsize : MAXRUN;
|
|
sp->aborted = 0;
|
|
}
|
|
sp->seen = 0;
|
|
}
|
|
|
|
/*
|
|
- search - conduct part of a search
|
|
*/
|
|
static off_t /* NOTFOUND if we hit VACANT or error */
|
|
search(sp)
|
|
register struct searcher *sp;
|
|
{
|
|
register off_t dest;
|
|
register off_t value;
|
|
off_t val; /* buffer for value (can't fread register) */
|
|
register off_t place;
|
|
|
|
if (sp->aborted)
|
|
return(NOTFOUND);
|
|
|
|
for (;;) {
|
|
/* determine location to be examined */
|
|
place = sp->place;
|
|
if (sp->seen) {
|
|
/* go to next location */
|
|
if (--sp->run <= 0) {
|
|
sp->tabno++;
|
|
sp->run = MAXRUN;
|
|
}
|
|
place = (place+1)%conf.tsize + sp->tabno*conf.tsize;
|
|
sp->place = place;
|
|
} else
|
|
sp->seen = 1; /* now looking at current location */
|
|
DEBUG(("search @ %ld\n", place));
|
|
|
|
/* get the tagged value */
|
|
if (corepag != NULL && place < conf.tsize) {
|
|
DEBUG(("search: in core\n"));
|
|
value = MAPIN(corepag[place]);
|
|
} else {
|
|
/* seek, if necessary */
|
|
dest = place * SOF;
|
|
if (pagpos != dest) {
|
|
if (fseek(pagf, dest, SEEK_SET) != 0) {
|
|
DEBUG(("search: seek failed\n"));
|
|
pagpos = -1;
|
|
sp->aborted = 1;
|
|
return(NOTFOUND);
|
|
}
|
|
pagpos = dest;
|
|
}
|
|
|
|
/* read it */
|
|
if (fread((char *)&val, sizeof(val), 1, pagf) == 1)
|
|
value = MAPIN(val);
|
|
else if (ferror(pagf)) {
|
|
DEBUG(("search: read failed\n"));
|
|
pagpos = -1;
|
|
sp->aborted = 1;
|
|
return(NOTFOUND);
|
|
} else
|
|
value = VACANT;
|
|
|
|
/* and finish up */
|
|
pagpos += sizeof(val);
|
|
}
|
|
|
|
/* vacant slot is always cause to return */
|
|
if (value == VACANT) {
|
|
DEBUG(("search: empty slot\n"));
|
|
return(NOTFOUND);
|
|
};
|
|
|
|
/* check the tag */
|
|
value = UNBIAS(value);
|
|
DEBUG(("got 0x%lx\n", value));
|
|
if (!HASTAG(value)) {
|
|
DEBUG(("tagless\n"));
|
|
return(value);
|
|
} else if (TAG(value) == sp->tag) {
|
|
DEBUG(("match\n"));
|
|
return(NOTAG(value));
|
|
} else {
|
|
DEBUG(("mismatch 0x%lx\n", TAG(value)));
|
|
}
|
|
}
|
|
/* NOTREACHED */
|
|
}
|
|
|
|
/*
|
|
- okayvalue - check that a value can be stored
|
|
*/
|
|
static int /* predicate */
|
|
okayvalue(value)
|
|
off_t value;
|
|
{
|
|
if (HASTAG(value))
|
|
return(0);
|
|
#ifdef OVERFLOW
|
|
if (value == LONG_MAX) /* BIAS() and UNBIAS() will overflow */
|
|
return(0);
|
|
#endif
|
|
return(1);
|
|
}
|
|
|
|
/*
|
|
- set - store a value into a location previously found by search
|
|
*/
|
|
static int /* 0 success, -1 failure */
|
|
set(sp, value)
|
|
register struct searcher *sp;
|
|
off_t value;
|
|
{
|
|
register off_t place = sp->place;
|
|
register off_t v = value;
|
|
|
|
if (sp->aborted)
|
|
return(-1);
|
|
|
|
if (CANTAG(v) && !conf.olddbz) {
|
|
v |= sp->tag | taghere;
|
|
if (v != UNBIAS(VACANT)) /* BIAS(v) won't look VACANT */
|
|
#ifdef OVERFLOW
|
|
if (v != LONG_MAX) /* and it won't overflow */
|
|
#endif
|
|
value = v;
|
|
}
|
|
DEBUG(("tagged value is 0x%lx\n", value));
|
|
value = BIAS(value);
|
|
value = MAPOUT(value);
|
|
|
|
/* If we have the index file in memory, use it */
|
|
if (corepag != NULL && place < conf.tsize) {
|
|
corepag[place] = value;
|
|
DEBUG(("set: incore\n"));
|
|
return(0);
|
|
}
|
|
|
|
/* seek to spot */
|
|
pagpos = -1; /* invalidate position memory */
|
|
if (fseek(pagf, place * SOF, SEEK_SET) != 0) {
|
|
DEBUG(("set: seek failed\n"));
|
|
sp->aborted = 1;
|
|
return(-1);
|
|
}
|
|
|
|
/* write in data */
|
|
if (fwrite((char *)&value, SOF, 1, pagf) != 1) {
|
|
DEBUG(("set: write failed\n"));
|
|
sp->aborted = 1;
|
|
return(-1);
|
|
}
|
|
/* fflush improves robustness, and buffer re-use is rare anyway */
|
|
if (fflush(pagf) == EOF) {
|
|
DEBUG(("set: fflush failed\n"));
|
|
sp->aborted = 1;
|
|
return(-1);
|
|
}
|
|
|
|
DEBUG(("set: succeeded\n"));
|
|
return(0);
|
|
}
|
|
|
|
/*
|
|
- mybytemap - determine this machine's byte map
|
|
*
|
|
* A byte map is an array of ints, sizeof(off_t) of them. The 0th int
|
|
* is the byte number of the high-order byte in my off_t, and so forth.
|
|
*/
|
|
static void
|
|
mybytemap(map)
|
|
int map[]; /* -> int[SOF] */
|
|
{
|
|
union {
|
|
off_t o;
|
|
char c[SOF];
|
|
} u;
|
|
register int *mp = &map[SOF];
|
|
register int ntodo;
|
|
register int i;
|
|
|
|
u.o = 1;
|
|
for (ntodo = (int)SOF; ntodo > 0; ntodo--) {
|
|
for (i = 0; i < SOF; i++)
|
|
if (u.c[i] != 0)
|
|
break;
|
|
if (i == SOF) {
|
|
/* trouble -- set it to *something* consistent */
|
|
DEBUG(("mybytemap: nonexistent byte %d!!!\n", ntodo));
|
|
for (i = 0; i < SOF; i++)
|
|
map[i] = i;
|
|
return;
|
|
}
|
|
DEBUG(("mybytemap: byte %d\n", i));
|
|
*--mp = i;
|
|
while (u.c[i] != 0)
|
|
u.o <<= 1;
|
|
}
|
|
}
|
|
|
|
/*
|
|
- bytemap - transform an off_t from byte ordering map1 to map2
|
|
*/
|
|
static off_t /* transformed result */
|
|
bytemap(ino, map1, map2)
|
|
off_t ino;
|
|
int *map1;
|
|
int *map2;
|
|
{
|
|
union oc {
|
|
off_t o;
|
|
char c[SOF];
|
|
};
|
|
union oc in;
|
|
union oc out;
|
|
register int i;
|
|
|
|
in.o = ino;
|
|
for (i = 0; i < SOF; i++)
|
|
out.c[map2[i]] = in.c[map1[i]];
|
|
return(out.o);
|
|
}
|
|
|
|
/*
|
|
* This is a simplified version of the pathalias hashing function.
|
|
* Thanks to Steve Belovin and Peter Honeyman
|
|
*
|
|
* hash a string into a long int. 31 bit crc (from andrew appel).
|
|
* the crc table is computed at run time by crcinit() -- we could
|
|
* precompute, but it takes 1 clock tick on a 750.
|
|
*
|
|
* This fast table calculation works only if POLY is a prime polynomial
|
|
* in the field of integers modulo 2. Since the coefficients of a
|
|
* 32-bit polynomial won't fit in a 32-bit word, the high-order bit is
|
|
* implicit. IT MUST ALSO BE THE CASE that the coefficients of orders
|
|
* 31 down to 25 are zero. Happily, we have candidates, from
|
|
* E. J. Watson, "Primitive Polynomials (Mod 2)", Math. Comp. 16 (1962):
|
|
* x^32 + x^7 + x^5 + x^3 + x^2 + x^1 + x^0
|
|
* x^31 + x^3 + x^0
|
|
*
|
|
* We reverse the bits to get:
|
|
* 111101010000000000000000000000001 but drop the last 1
|
|
* f 5 0 0 0 0 0 0
|
|
* 010010000000000000000000000000001 ditto, for 31-bit crc
|
|
* 4 8 0 0 0 0 0 0
|
|
*/
|
|
|
|
#define POLY 0x48000000L /* 31-bit polynomial (avoids sign problems) */
|
|
|
|
static long CrcTable[128];
|
|
|
|
/*
|
|
- crcinit - initialize tables for hash function
|
|
*/
|
|
static void
|
|
crcinit()
|
|
{
|
|
register int i, j;
|
|
register long sum;
|
|
|
|
for (i = 0; i < 128; ++i) {
|
|
sum = 0L;
|
|
for (j = 7 - 1; j >= 0; --j)
|
|
if (i & (1 << j))
|
|
sum ^= POLY >> j;
|
|
CrcTable[i] = sum;
|
|
}
|
|
DEBUG(("crcinit: done\n"));
|
|
}
|
|
|
|
/*
|
|
- hash - Honeyman's nice hashing function
|
|
*/
|
|
static long
|
|
hash(name, size)
|
|
register char *name;
|
|
register int size;
|
|
{
|
|
register long sum = 0L;
|
|
|
|
while (size--) {
|
|
sum = (sum >> 7) ^ CrcTable[(sum ^ (*name++)) & 0x7f];
|
|
}
|
|
DEBUG(("hash: returns (%ld)\n", sum));
|
|
return(sum);
|
|
}
|
|
|
|
/*
|
|
* case-mapping stuff
|
|
*
|
|
* Borrowed from C News, by permission of the authors. Somewhat modified.
|
|
*
|
|
* We exploit the fact that we are dealing only with headers here, and
|
|
* headers are limited to the ASCII characters by RFC822. It is barely
|
|
* possible that we might be dealing with a translation into another
|
|
* character set, but in particular it's very unlikely for a header
|
|
* character to be outside -128..255.
|
|
*
|
|
* Life would be a whole lot simpler if tolower() could safely and portably
|
|
* be applied to any char.
|
|
*/
|
|
|
|
#define OFFSET 128 /* avoid trouble with negative chars */
|
|
|
|
/* must call casencmp before invoking TOLOW... */
|
|
#define TOLOW(c) (cmap[(c)+OFFSET])
|
|
|
|
/* ...but the use of it in CISTREQN is safe without the preliminary call (!) */
|
|
/* CISTREQN is an optimised case-insensitive strncmp(a,b,n)==0; n > 0 */
|
|
#define CISTREQN(a, b, n) \
|
|
(TOLOW((a)[0]) == TOLOW((b)[0]) && casencmp(a, b, n) == 0)
|
|
|
|
#define MAPSIZE (256+OFFSET)
|
|
static char cmap[MAPSIZE]; /* relies on init to '\0' */
|
|
static int mprimed = 0; /* has cmap been set up? */
|
|
|
|
/*
|
|
- mapprime - set up case-mapping stuff
|
|
*/
|
|
static void
|
|
mapprime()
|
|
{
|
|
register char *lp;
|
|
register char *up;
|
|
register int c;
|
|
register int i;
|
|
static char lower[] = "abcdefghijklmnopqrstuvwxyz";
|
|
static char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
|
|
|
for (lp = lower, up = upper; *lp != '\0'; lp++, up++) {
|
|
c = *lp;
|
|
cmap[c+OFFSET] = c;
|
|
cmap[*up+OFFSET] = c;
|
|
}
|
|
for (i = 0; i < MAPSIZE; i++)
|
|
if (cmap[i] == '\0')
|
|
cmap[i] = (char)(i-OFFSET);
|
|
mprimed = 1;
|
|
}
|
|
|
|
/*
|
|
- casencmp - case-independent strncmp
|
|
*/
|
|
static int /* < == > 0 */
|
|
casencmp(s1, s2, len)
|
|
char *s1;
|
|
char *s2;
|
|
int len;
|
|
{
|
|
register char *p1;
|
|
register char *p2;
|
|
register int n;
|
|
|
|
if (!mprimed)
|
|
mapprime();
|
|
|
|
p1 = s1;
|
|
p2 = s2;
|
|
n = len;
|
|
while (--n >= 0 && *p1 != '\0' && TOLOW(*p1) == TOLOW(*p2)) {
|
|
p1++;
|
|
p2++;
|
|
}
|
|
if (n < 0)
|
|
return(0);
|
|
|
|
/*
|
|
* The following case analysis is necessary so that characters
|
|
* which look negative collate low against normal characters but
|
|
* high against the end-of-string NUL.
|
|
*/
|
|
if (*p1 == '\0' && *p2 == '\0')
|
|
return(0);
|
|
else if (*p1 == '\0')
|
|
return(-1);
|
|
else if (*p2 == '\0')
|
|
return(1);
|
|
else
|
|
return(TOLOW(*p1) - TOLOW(*p2));
|
|
}
|
|
|
|
/*
|
|
- mapcase - do case-mapped copy
|
|
*/
|
|
static char * /* returns src or dst */
|
|
mapcase(dst, src, siz)
|
|
char *dst; /* destination, used only if mapping needed */
|
|
char *src; /* source; src == dst is legal */
|
|
size_t siz;
|
|
{
|
|
register char *s;
|
|
register char *d;
|
|
register char *c; /* case break */
|
|
register char *e; /* end of source */
|
|
|
|
|
|
c = cipoint(src, siz);
|
|
if (c == NULL)
|
|
return(src);
|
|
|
|
if (!mprimed)
|
|
mapprime();
|
|
s = src;
|
|
e = s + siz;
|
|
d = dst;
|
|
|
|
while (s < c)
|
|
*d++ = *s++;
|
|
while (s < e)
|
|
*d++ = TOLOW(*s++);
|
|
|
|
return(dst);
|
|
}
|
|
|
|
/*
|
|
- cipoint - where in this message-ID does it become case-insensitive?
|
|
*
|
|
* The RFC822 code is not quite complete. Absolute, total, full RFC822
|
|
* compliance requires a horrible parsing job, because of the arcane
|
|
* quoting conventions -- abc"def"ghi is not equivalent to abc"DEF"ghi,
|
|
* for example. There are three or four things that might occur in the
|
|
* domain part of a message-id that are case-sensitive. They don't seem
|
|
* to ever occur in real news, thank Cthulhu. (What? You were expecting
|
|
* a merciful and forgiving deity to be invoked in connection with RFC822?
|
|
* Forget it; none of them would come near it.)
|
|
*/
|
|
static char * /* pointer into s, or NULL for "nowhere" */
|
|
cipoint(s, siz)
|
|
char *s;
|
|
size_t siz;
|
|
{
|
|
register char *p;
|
|
static char post[] = "postmaster";
|
|
static int plen = sizeof(post)-1;
|
|
|
|
switch (conf.casemap) {
|
|
case '0': /* unmapped, sensible */
|
|
return(NULL);
|
|
break;
|
|
case 'C': /* C News, RFC 822 conformant (approx.) */
|
|
p = memchr(s, '@', siz);
|
|
if (p == NULL) /* no local/domain split */
|
|
return(NULL); /* assume all local */
|
|
else if (p - (s+1) == plen && CISTREQN(s+1, post, plen)) {
|
|
/* crazy -- "postmaster" is case-insensitive */
|
|
return(s);
|
|
} else
|
|
return(p);
|
|
break;
|
|
case '=': /* 2.11, neither sensible nor conformant */
|
|
return(s); /* all case-insensitive */
|
|
break;
|
|
}
|
|
|
|
DEBUG(("cipoint: unknown case mapping `%c'\n", conf.casemap));
|
|
return(NULL); /* just leave it alone */
|
|
}
|
|
|
|
/*
|
|
- dbzdebug - control dbz debugging at run time
|
|
*/
|
|
int /* old value */
|
|
dbzdebug(value)
|
|
int value;
|
|
{
|
|
#ifdef DBZDEBUG
|
|
register int old = debug;
|
|
|
|
debug = value;
|
|
return(old);
|
|
#else
|
|
return(-1);
|
|
#endif
|
|
}
|