1
0
mirror of https://git.FreeBSD.org/src.git synced 2025-01-07 13:14:51 +00:00
freebsd/contrib/mdocml/compat_fts.c
Baptiste Daroussin 7666f5006c Import mandoc cvs snapshot 20170121 (pre 1.14)
Note that mandoc does not use anymore sqlite3 but a home made database format
An important improvement has been made as well in makewhatis performance:
Tests on my laptop shows makewhatis on the entire system goes from 26s to 12s
2017-01-21 13:17:25 +00:00

709 lines
18 KiB
C

#include "config.h"
#if HAVE_FTS
int dummy;
#else
/* $Id: compat_fts.c,v 1.12 2016/10/18 23:58:12 schwarze Exp $ */
/* $OpenBSD: fts.c,v 1.56 2016/09/21 04:38:56 guenther Exp $ */
/*-
* Copyright (c) 1990, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/stat.h>
#include <sys/types.h>
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "compat_fts.h"
#define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b))
static FTSENT *fts_alloc(FTS *, const char *, size_t);
static FTSENT *fts_build(FTS *);
static void fts_lfree(FTSENT *);
static void fts_load(FTS *, FTSENT *);
static size_t fts_maxarglen(char * const *);
static void fts_padjust(FTS *, FTSENT *);
static int fts_palloc(FTS *, size_t);
static FTSENT *fts_sort(FTS *, FTSENT *, int);
static unsigned short fts_stat(FTS *, FTSENT *);
#define ISDOT(a) (a[0] == '.' && (!a[1] || (a[1] == '.' && !a[2])))
#ifndef O_DIRECTORY
#define O_DIRECTORY 0
#endif
#ifndef O_CLOEXEC
#define O_CLOEXEC 0
#endif
#ifndef PATH_MAX
#define PATH_MAX 4096
#endif
#define CLR(opt) (sp->fts_options &= ~(opt))
#define ISSET(opt) (sp->fts_options & (opt))
#define SET(opt) (sp->fts_options |= (opt))
FTS *
fts_open(char * const *argv, int options,
int (*compar)(const FTSENT **, const FTSENT **))
{
FTS *sp;
FTSENT *p, *root;
int nitems;
FTSENT *parent, *tmp;
/* Options check. */
if (options & ~FTS_OPTIONMASK) {
errno = EINVAL;
return (NULL);
}
/* At least one path must be specified. */
if (*argv == NULL) {
errno = EINVAL;
return (NULL);
}
/* Allocate/initialize the stream */
if ((sp = calloc(1, sizeof(FTS))) == NULL)
return (NULL);
sp->fts_compar = compar;
sp->fts_options = options;
/*
* Start out with 1K of path space, and enough, in any case,
* to hold the user's paths.
*/
if (fts_palloc(sp, MAXIMUM(fts_maxarglen(argv), PATH_MAX)))
goto mem1;
/* Allocate/initialize root's parent. */
if ((parent = fts_alloc(sp, "", 0)) == NULL)
goto mem2;
parent->fts_level = FTS_ROOTPARENTLEVEL;
/* Allocate/initialize root(s). */
for (root = NULL, nitems = 0; *argv; ++argv, ++nitems) {
if ((p = fts_alloc(sp, *argv, strlen(*argv))) == NULL)
goto mem3;
p->fts_level = FTS_ROOTLEVEL;
p->fts_parent = parent;
p->fts_accpath = p->fts_name;
p->fts_info = fts_stat(sp, p);
/* Command-line "." and ".." are real directories. */
if (p->fts_info == FTS_DOT)
p->fts_info = FTS_D;
/*
* If comparison routine supplied, traverse in sorted
* order; otherwise traverse in the order specified.
*/
if (compar) {
p->fts_link = root;
root = p;
} else {
p->fts_link = NULL;
if (root == NULL)
tmp = root = p;
else {
tmp->fts_link = p;
tmp = p;
}
}
}
if (compar && nitems > 1)
root = fts_sort(sp, root, nitems);
/*
* Allocate a dummy pointer and make fts_read think that we've just
* finished the node before the root(s); set p->fts_info to FTS_INIT
* so that everything about the "current" node is ignored.
*/
if ((sp->fts_cur = fts_alloc(sp, "", 0)) == NULL)
goto mem3;
sp->fts_cur->fts_link = root;
sp->fts_cur->fts_info = FTS_INIT;
if (nitems == 0)
free(parent);
return (sp);
mem3: fts_lfree(root);
free(parent);
mem2: free(sp->fts_path);
mem1: free(sp);
return (NULL);
}
static void
fts_load(FTS *sp, FTSENT *p)
{
size_t len;
char *cp;
/*
* Load the stream structure for the next traversal. Since we don't
* actually enter the directory until after the preorder visit, set
* the fts_accpath field specially so the chdir gets done to the right
* place and the user can access the first node. From fts_open it's
* known that the path will fit.
*/
len = p->fts_pathlen = p->fts_namelen;
memmove(sp->fts_path, p->fts_name, len + 1);
if ((cp = strrchr(p->fts_name, '/')) && (cp != p->fts_name || cp[1])) {
len = strlen(++cp);
memmove(p->fts_name, cp, len + 1);
p->fts_namelen = len;
}
p->fts_accpath = p->fts_path = sp->fts_path;
sp->fts_dev = p->fts_dev;
}
int
fts_close(FTS *sp)
{
FTSENT *freep, *p;
/*
* This still works if we haven't read anything -- the dummy structure
* points to the root list, so we step through to the end of the root
* list which has a valid parent pointer.
*/
if (sp->fts_cur) {
for (p = sp->fts_cur; p->fts_level >= FTS_ROOTLEVEL;) {
freep = p;
p = p->fts_link ? p->fts_link : p->fts_parent;
free(freep);
}
free(p);
}
/* Free up child linked list, sort array, path buffer, stream ptr.*/
if (sp->fts_child)
fts_lfree(sp->fts_child);
free(sp->fts_array);
free(sp->fts_path);
free(sp);
return (0);
}
/*
* Special case of "/" at the end of the path so that slashes aren't
* appended which would cause paths to be written as "....//foo".
*/
#define NAPPEND(p) \
(p->fts_path[p->fts_pathlen - 1] == '/' \
? p->fts_pathlen - 1 : p->fts_pathlen)
FTSENT *
fts_read(FTS *sp)
{
FTSENT *p, *tmp;
int instr;
char *t;
/* If finished or unrecoverable error, return NULL. */
if (sp->fts_cur == NULL || ISSET(FTS_STOP))
return (NULL);
/* Set current node pointer. */
p = sp->fts_cur;
/* Save and zero out user instructions. */
instr = p->fts_instr;
p->fts_instr = FTS_NOINSTR;
/* Directory in pre-order. */
if (p->fts_info == FTS_D) {
/* If skipped or crossed mount point, do post-order visit. */
if (instr == FTS_SKIP ||
(ISSET(FTS_XDEV) && p->fts_dev != sp->fts_dev)) {
if (sp->fts_child) {
fts_lfree(sp->fts_child);
sp->fts_child = NULL;
}
p->fts_info = FTS_DP;
return (p);
}
/*
* If haven't read do so. If the read fails, fts_build sets
* FTS_STOP or the fts_info field of the node.
*/
if (sp->fts_child) {
/* nothing */
} else if ((sp->fts_child = fts_build(sp)) == NULL) {
if (ISSET(FTS_STOP))
return (NULL);
return (p);
}
p = sp->fts_child;
sp->fts_child = NULL;
goto name;
}
/* Move to the next node on this level. */
next: tmp = p;
if ((p = p->fts_link)) {
free(tmp);
/*
* If reached the top, return to the original directory (or
* the root of the tree), and load the paths for the next root.
*/
if (p->fts_level == FTS_ROOTLEVEL) {
fts_load(sp, p);
return (sp->fts_cur = p);
}
/*
* User may have called fts_set on the node. If skipped,
* ignore. If followed, get a file descriptor so we can
* get back if necessary.
*/
if (p->fts_instr == FTS_SKIP)
goto next;
name: t = sp->fts_path + NAPPEND(p->fts_parent);
*t++ = '/';
memmove(t, p->fts_name, p->fts_namelen + 1);
return (sp->fts_cur = p);
}
/* Move up to the parent node. */
p = tmp->fts_parent;
free(tmp);
if (p->fts_level == FTS_ROOTPARENTLEVEL) {
/*
* Done; free everything up and set errno to 0 so the user
* can distinguish between error and EOF.
*/
free(p);
errno = 0;
return (sp->fts_cur = NULL);
}
/* NUL terminate the pathname. */
sp->fts_path[p->fts_pathlen] = '\0';
p->fts_info = p->fts_errno ? FTS_ERR : FTS_DP;
return (sp->fts_cur = p);
}
/*
* Fts_set takes the stream as an argument although it's not used in this
* implementation; it would be necessary if anyone wanted to add global
* semantics to fts using fts_set. An error return is allowed for similar
* reasons.
*/
int
fts_set(FTS *sp, FTSENT *p, int instr)
{
if (instr && instr != FTS_NOINSTR && instr != FTS_SKIP) {
errno = EINVAL;
return (1);
}
p->fts_instr = instr;
return (0);
}
/*
* This is the tricky part -- do not casually change *anything* in here. The
* idea is to build the linked list of entries that are used by fts_children
* and fts_read. There are lots of special cases.
*
* The real slowdown in walking the tree is the stat calls. If FTS_NOSTAT is
* set and it's a physical walk (so that symbolic links can't be directories),
* we can do things quickly. First, if it's a 4.4BSD file system, the type
* of the file is in the directory entry. Otherwise, we assume that the number
* of subdirectories in a node is equal to the number of links to the parent.
* The former skips all stat calls. The latter skips stat calls in any leaf
* directories and for any files after the subdirectories in the directory have
* been found, cutting the stat calls by about 2/3.
*/
static FTSENT *
fts_build(FTS *sp)
{
struct dirent *dp;
FTSENT *p, *head;
FTSENT *cur, *tail;
DIR *dirp;
void *oldaddr;
size_t dlen, len, maxlen;
int nitems, level, doadjust;
int saved_errno;
char *cp;
/* Set current node pointer. */
cur = sp->fts_cur;
/*
* Open the directory for reading. If this fails, we're done.
* If being called from fts_read, set the fts_info field.
*/
if ((dirp = opendir(cur->fts_accpath)) == NULL) {
cur->fts_info = FTS_DNR;
cur->fts_errno = errno;
return (NULL);
}
/*
* Figure out the max file name length that can be stored in the
* current path -- the inner loop allocates more path as necessary.
* We really wouldn't have to do the maxlen calculations here, we
* could do them in fts_read before returning the path, but it's a
* lot easier here since the length is part of the dirent structure.
*
* If not changing directories set a pointer so that can just append
* each new name into the path.
*/
len = NAPPEND(cur);
cp = sp->fts_path + len;
*cp++ = '/';
len++;
maxlen = sp->fts_pathlen - len;
/*
* fts_level is signed so we must prevent it from wrapping
* around to FTS_ROOTLEVEL and FTS_ROOTPARENTLEVEL.
*/
level = cur->fts_level;
if (level < FTS_MAXLEVEL)
level++;
/* Read the directory, attaching each entry to the `link' pointer. */
doadjust = 0;
for (head = tail = NULL, nitems = 0; dirp && (dp = readdir(dirp));) {
if (ISDOT(dp->d_name))
continue;
#if HAVE_DIRENT_NAMLEN
dlen = dp->d_namlen;
#else
dlen = strlen(dp->d_name);
#endif
if (!(p = fts_alloc(sp, dp->d_name, dlen)))
goto mem1;
if (dlen >= maxlen) { /* include space for NUL */
oldaddr = sp->fts_path;
if (fts_palloc(sp, dlen + len + 1)) {
/*
* No more memory for path or structures. Save
* errno, free up the current structure and the
* structures already allocated.
*/
mem1: saved_errno = errno;
free(p);
fts_lfree(head);
(void)closedir(dirp);
cur->fts_info = FTS_ERR;
SET(FTS_STOP);
errno = saved_errno;
return (NULL);
}
/* Did realloc() change the pointer? */
if (oldaddr != sp->fts_path) {
doadjust = 1;
cp = sp->fts_path + len;
}
maxlen = sp->fts_pathlen - len;
}
p->fts_level = level;
p->fts_parent = sp->fts_cur;
p->fts_pathlen = len + dlen;
if (p->fts_pathlen < len) {
/*
* If we wrap, free up the current structure and
* the structures already allocated, then error
* out with ENAMETOOLONG.
*/
free(p);
fts_lfree(head);
(void)closedir(dirp);
cur->fts_info = FTS_ERR;
SET(FTS_STOP);
errno = ENAMETOOLONG;
return (NULL);
}
/* Build a file name for fts_stat to stat. */
p->fts_accpath = p->fts_path;
memmove(cp, p->fts_name, p->fts_namelen + 1);
/* Stat it. */
p->fts_info = fts_stat(sp, p);
/* We walk in directory order so "ls -f" doesn't get upset. */
p->fts_link = NULL;
if (head == NULL)
head = tail = p;
else {
tail->fts_link = p;
tail = p;
}
++nitems;
}
if (dirp)
(void)closedir(dirp);
/*
* If realloc() changed the address of the path, adjust the
* addresses for the rest of the tree and the dir list.
*/
if (doadjust)
fts_padjust(sp, head);
/*
* If not changing directories, reset the path back to original
* state.
*/
if (len == sp->fts_pathlen || nitems == 0)
--cp;
*cp = '\0';
/* If didn't find anything, return NULL. */
if (!nitems) {
cur->fts_info = FTS_DP;
return (NULL);
}
/* Sort the entries. */
if (sp->fts_compar && nitems > 1)
head = fts_sort(sp, head, nitems);
return (head);
}
static unsigned short
fts_stat(FTS *sp, FTSENT *p)
{
FTSENT *t;
dev_t dev;
ino_t ino;
struct stat *sbp;
/* If user needs stat info, stat buffer already allocated. */
sbp = p->fts_statp;
if (lstat(p->fts_accpath, sbp)) {
p->fts_errno = errno;
memset(sbp, 0, sizeof(struct stat));
return (FTS_NS);
}
if (S_ISDIR(sbp->st_mode)) {
/*
* Set the device/inode. Used to find cycles and check for
* crossing mount points. Also remember the link count, used
* in fts_build to limit the number of stat calls. It is
* understood that these fields are only referenced if fts_info
* is set to FTS_D.
*/
dev = p->fts_dev = sbp->st_dev;
ino = p->fts_ino = sbp->st_ino;
p->fts_nlink = sbp->st_nlink;
if (ISDOT(p->fts_name))
return (FTS_DOT);
/*
* Cycle detection is done by brute force when the directory
* is first encountered. If the tree gets deep enough or the
* number of symbolic links to directories is high enough,
* something faster might be worthwhile.
*/
for (t = p->fts_parent;
t->fts_level >= FTS_ROOTLEVEL; t = t->fts_parent)
if (ino == t->fts_ino && dev == t->fts_dev) {
p->fts_cycle = t;
return (FTS_DC);
}
return (FTS_D);
}
if (S_ISLNK(sbp->st_mode))
return (FTS_SL);
if (S_ISREG(sbp->st_mode))
return (FTS_F);
return (FTS_DEFAULT);
}
static FTSENT *
fts_sort(FTS *sp, FTSENT *head, int nitems)
{
FTSENT **ap, *p;
/*
* Construct an array of pointers to the structures and call qsort(3).
* Reassemble the array in the order returned by qsort. If unable to
* sort for memory reasons, return the directory entries in their
* current order. Allocate enough space for the current needs plus
* 40 so don't realloc one entry at a time.
*/
if (nitems > sp->fts_nitems) {
struct _ftsent **a;
sp->fts_nitems = nitems + 40;
if ((a = reallocarray(sp->fts_array,
sp->fts_nitems, sizeof(FTSENT *))) == NULL) {
free(sp->fts_array);
sp->fts_array = NULL;
sp->fts_nitems = 0;
return (head);
}
sp->fts_array = a;
}
for (ap = sp->fts_array, p = head; p; p = p->fts_link)
*ap++ = p;
qsort(sp->fts_array, nitems, sizeof(FTSENT *), sp->fts_compar);
for (head = *(ap = sp->fts_array); --nitems; ++ap)
ap[0]->fts_link = ap[1];
ap[0]->fts_link = NULL;
return (head);
}
static FTSENT *
fts_alloc(FTS *sp, const char *name, size_t namelen)
{
FTSENT *p;
size_t len;
len = sizeof(FTSENT) + namelen;
if ((p = calloc(1, len)) == NULL)
return (NULL);
p->fts_path = sp->fts_path;
p->fts_namelen = namelen;
p->fts_instr = FTS_NOINSTR;
p->fts_statp = malloc(sizeof(struct stat));
if (p->fts_statp == NULL) {
free(p);
return (NULL);
}
memcpy(p->fts_name, name, namelen);
return (p);
}
static void
fts_lfree(FTSENT *head)
{
FTSENT *p;
/* Free a linked list of structures. */
while ((p = head)) {
head = head->fts_link;
free(p);
}
}
/*
* Allow essentially unlimited paths; find, rm, ls should all work on any tree.
* Most systems will allow creation of paths much longer than PATH_MAX, even
* though the kernel won't resolve them. Add the size (not just what's needed)
* plus 256 bytes so don't realloc the path 2 bytes at a time.
*/
static int
fts_palloc(FTS *sp, size_t more)
{
char *p;
/*
* Check for possible wraparound.
*/
more += 256;
if (sp->fts_pathlen + more < sp->fts_pathlen) {
free(sp->fts_path);
sp->fts_path = NULL;
errno = ENAMETOOLONG;
return (1);
}
sp->fts_pathlen += more;
p = realloc(sp->fts_path, sp->fts_pathlen);
if (p == NULL) {
free(sp->fts_path);
sp->fts_path = NULL;
return (1);
}
sp->fts_path = p;
return (0);
}
/*
* When the path is realloc'd, have to fix all of the pointers in structures
* already returned.
*/
static void
fts_padjust(FTS *sp, FTSENT *head)
{
FTSENT *p;
char *addr = sp->fts_path;
#define ADJUST(p) { \
if ((p)->fts_accpath != (p)->fts_name) { \
(p)->fts_accpath = \
(char *)addr + ((p)->fts_accpath - (p)->fts_path); \
} \
(p)->fts_path = addr; \
}
/* Adjust the current set of children. */
for (p = sp->fts_child; p; p = p->fts_link)
ADJUST(p);
/* Adjust the rest of the tree, including the current level. */
for (p = head; p->fts_level >= FTS_ROOTLEVEL;) {
ADJUST(p);
p = p->fts_link ? p->fts_link : p->fts_parent;
}
}
static size_t
fts_maxarglen(char * const *argv)
{
size_t len, max;
for (max = 0; *argv; ++argv)
if ((len = strlen(*argv)) > max)
max = len;
return (max + 1);
}
#endif