2000-07-01 06:55:11 +00:00
|
|
|
.\" $NetBSD$
|
|
|
|
.\" Copyright (c) 1997 Todd C. Miller <Todd.Miller@courtesan.com>
|
|
|
|
.\" All rights reserved.
|
|
|
|
.\"
|
|
|
|
.\" Redistribution and use in source and binary forms, with or without
|
|
|
|
.\" modification, are permitted provided that the following conditions
|
|
|
|
.\" are met:
|
|
|
|
.\" 1. Redistributions of source code must retain the above copyright
|
|
|
|
.\" notice, this list of conditions and the following disclaimer.
|
|
|
|
.\" 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
.\" notice, this list of conditions and the following disclaimer in the
|
|
|
|
.\" documentation and/or other materials provided with the distribution.
|
|
|
|
.\" 3. The name of the author may not be used to endorse or promote products
|
|
|
|
.\" derived from this software without specific prior written permission.
|
|
|
|
.\"
|
|
|
|
.\" THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
|
|
|
.\" INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
|
|
|
.\" AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
|
|
|
.\" THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
|
|
.\" EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
|
|
.\" PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
|
|
|
.\" OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
|
|
|
.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
|
|
|
.\" OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
|
|
|
.\" ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
.\"
|
2001-07-15 07:53:42 +00:00
|
|
|
.\" OpenBSD: tsearch.3,v 1.2 1998/06/21 22:13:49 millert Exp
|
2000-07-01 06:55:11 +00:00
|
|
|
.\" $FreeBSD$
|
|
|
|
.\"
|
2017-07-14 17:07:28 +00:00
|
|
|
.Dd June 4, 2017
|
2000-07-01 06:55:11 +00:00
|
|
|
.Dt TSEARCH 3
|
|
|
|
.Os
|
|
|
|
.Sh NAME
|
2001-02-01 16:38:02 +00:00
|
|
|
.Nm tsearch , tfind , tdelete , twalk
|
2000-07-01 06:55:11 +00:00
|
|
|
.Nd manipulate binary search trees
|
|
|
|
.Sh SYNOPSIS
|
2001-09-07 14:46:36 +00:00
|
|
|
.In search.h
|
2000-07-01 06:55:11 +00:00
|
|
|
.Ft void *
|
2016-10-13 18:25:40 +00:00
|
|
|
.Fn tdelete "const void * restrict key" "posix_tnode ** restrict rootp" "int (*compar) (const void *, const void *)"
|
|
|
|
.Ft posix_tnode *
|
|
|
|
.Fn tfind "const void *key" "posix_tnode * const *rootp" "int (*compar) (const void *, const void *)"
|
|
|
|
.Ft posix_tnode *
|
|
|
|
.Fn tsearch "const void *key" "posix_tnode **rootp" "int (*compar) (const void *, const void *)"
|
2000-07-01 06:55:11 +00:00
|
|
|
.Ft void
|
2016-10-13 18:25:40 +00:00
|
|
|
.Fn twalk "const posix_tnode *root" "void (*action) (const posix_tnode *, VISIT, int)"
|
2000-07-01 06:55:11 +00:00
|
|
|
.Sh DESCRIPTION
|
|
|
|
The
|
|
|
|
.Fn tdelete ,
|
|
|
|
.Fn tfind ,
|
|
|
|
.Fn tsearch ,
|
|
|
|
and
|
|
|
|
.Fn twalk
|
Let tsearch()/tdelete() use an AVL tree.
The existing implementations of POSIX tsearch() and tdelete() don't
attempt to perform any balancing at all. Testing reveals that inserting
100k nodes into a tree sequentially takes approximately one minute on my
system.
Though most other BSDs also don't use any balanced tree internally, C
libraries like glibc and musl do provide better implementations. glibc
uses a red-black tree and musl uses an AVL tree.
Red-black trees have the advantage over AVL trees that they only require
O(1) rotations after insertion and deletion, but have the disadvantage
that the tree has a maximum depth of 2*log2(n) instead of 1.44*log2(n).
My take is that it's better to focus on having a lower maximum depth,
for the reason that in the case of tsearch() the invocation of the
comparator likely dominates the running time.
This change replaces the tsearch() and tdelete() functions by versions
that create an AVL tree. Compared to musl's implementation, this version
is different in two different ways:
- We don't keep track of heights; just balances. This is sufficient.
This has the advantage that it reduces the number of nodes that are
being accessed. Storing heights requires us to also access all of the
siblings along the path.
- Don't use any recursion at all. We know that the tree cannot 2^64
elements in size, so the height of the tree can never be larger than
96. Use a 128-bit bitmask to keep track of the path that is computed.
This allows us to iterate over the same path twice, meaning we can
apply rotations from top to bottom.
Inserting 100k nodes into a tree now only takes 0.015 seconds. Insertion
seems to be twice as fast as glibc, whereas deletion has about the same
performance. Unlike glibc, it uses a fixed amount of memory.
I also experimented with both recursive and iterative bottom-up
implementations of the same algorithm. This iterative top-down version
performs similar to the recursive bottom-up version in terms of speed
and code size.
For some reason, the iterative bottom-up algorithm was actually 30%
faster for deletion, but has a quadratic memory complexity to keep track
of all the parent pointers.
Reviewed by: jilles
Obtained from: https://github.com/NuxiNL/cloudlibc
Differential Revision: https://reviews.freebsd.org/D4412
2015-12-22 18:12:11 +00:00
|
|
|
functions manage binary search trees.
|
|
|
|
This implementation uses a balanced AVL tree,
|
|
|
|
which due to its strong theoretical limit on the height of the tree has
|
|
|
|
the advantage of calling the comparison function relatively
|
|
|
|
infrequently.
|
|
|
|
.Pp
|
2004-07-02 23:52:20 +00:00
|
|
|
The comparison function passed in by
|
2000-07-01 06:55:11 +00:00
|
|
|
the user has the same style of return values as
|
|
|
|
.Xr strcmp 3 .
|
|
|
|
.Pp
|
2002-12-18 12:45:11 +00:00
|
|
|
The
|
|
|
|
.Fn tfind
|
|
|
|
function
|
2000-07-01 06:55:11 +00:00
|
|
|
searches for the datum matched by the argument
|
|
|
|
.Fa key
|
|
|
|
in the binary tree rooted at
|
|
|
|
.Fa rootp ,
|
|
|
|
returning a pointer to the datum if it is found and NULL
|
|
|
|
if it is not.
|
|
|
|
.Pp
|
2002-12-18 12:45:11 +00:00
|
|
|
The
|
|
|
|
.Fn tsearch
|
|
|
|
function
|
2000-07-01 06:55:11 +00:00
|
|
|
is identical to
|
|
|
|
.Fn tfind
|
|
|
|
except that if no match is found,
|
|
|
|
.Fa key
|
2004-07-02 23:52:20 +00:00
|
|
|
is inserted into the tree and a pointer to it is returned.
|
|
|
|
If
|
2000-07-01 06:55:11 +00:00
|
|
|
.Fa rootp
|
|
|
|
points to a NULL value a new binary search tree is created.
|
|
|
|
.Pp
|
2002-12-18 12:45:11 +00:00
|
|
|
The
|
|
|
|
.Fn tdelete
|
|
|
|
function
|
2000-07-01 06:55:11 +00:00
|
|
|
deletes a node from the specified binary search tree and returns
|
|
|
|
a pointer to the parent of the node to be deleted.
|
|
|
|
It takes the same arguments as
|
|
|
|
.Fn tfind
|
|
|
|
and
|
|
|
|
.Fn tsearch .
|
|
|
|
If the node to be deleted is the root of the binary search tree,
|
|
|
|
.Fa rootp
|
|
|
|
will be adjusted.
|
|
|
|
.Pp
|
2002-12-18 12:45:11 +00:00
|
|
|
The
|
|
|
|
.Fn twalk
|
|
|
|
function
|
2000-07-01 06:55:11 +00:00
|
|
|
walks the binary search tree rooted in
|
2000-11-06 15:46:57 +00:00
|
|
|
.Fa root
|
2000-07-01 06:55:11 +00:00
|
|
|
and calls the function
|
|
|
|
.Fa action
|
|
|
|
on each node.
|
2002-12-19 09:40:28 +00:00
|
|
|
The
|
|
|
|
.Fa action
|
|
|
|
function
|
2000-07-01 06:55:11 +00:00
|
|
|
is called with three arguments: a pointer to the current node,
|
2001-07-15 07:53:42 +00:00
|
|
|
a value from the enum
|
2000-07-01 06:55:11 +00:00
|
|
|
.Sy "typedef enum { preorder, postorder, endorder, leaf } VISIT;"
|
|
|
|
specifying the traversal type, and a node level (where level
|
|
|
|
zero is the root of the tree).
|
|
|
|
.Sh RETURN VALUES
|
|
|
|
The
|
|
|
|
.Fn tsearch
|
|
|
|
function returns NULL if allocation of a new node fails (usually
|
|
|
|
due to a lack of free memory).
|
|
|
|
.Pp
|
2002-12-18 12:45:11 +00:00
|
|
|
The
|
|
|
|
.Fn tfind ,
|
2000-07-01 06:55:11 +00:00
|
|
|
.Fn tsearch ,
|
|
|
|
and
|
|
|
|
.Fn tdelete
|
2002-12-18 12:45:11 +00:00
|
|
|
functions
|
2000-07-01 06:55:11 +00:00
|
|
|
return NULL if
|
|
|
|
.Fa rootp
|
|
|
|
is NULL or the datum cannot be found.
|
|
|
|
.Pp
|
|
|
|
The
|
|
|
|
.Fn twalk
|
|
|
|
function returns no value.
|
2017-07-14 17:07:28 +00:00
|
|
|
.Sh EXAMPLES
|
|
|
|
This example uses
|
|
|
|
.Fn tsearch
|
|
|
|
to search for four strings in
|
|
|
|
.Dv root .
|
|
|
|
Because the strings are not already present, they are added.
|
|
|
|
.Fn tsearch
|
|
|
|
is called twice on the fourth string to demonstrate that a string is not added when it is already present.
|
|
|
|
.Fn tfind
|
|
|
|
is used to find the single instance of the fourth string, and
|
|
|
|
.Fn tdelete
|
|
|
|
removes it.
|
|
|
|
Finally,
|
|
|
|
.Fn twalk
|
|
|
|
is used to return and display the resulting binary search tree.
|
|
|
|
.Bd -literal
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <search.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
int
|
|
|
|
comp(const void *a, const void *b)
|
|
|
|
{
|
|
|
|
|
|
|
|
return strcmp(a, b);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
printwalk(const posix_tnode * node, VISIT v, int __unused0)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (v == postorder || v == leaf) {
|
2017-07-14 17:27:15 +00:00
|
|
|
printf("node: %s\en", *(char **)node);
|
2017-07-14 17:07:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
main(void)
|
|
|
|
{
|
|
|
|
posix_tnode *root = NULL;
|
|
|
|
|
|
|
|
char one[] = "blah1";
|
|
|
|
char two[] = "blah-2";
|
|
|
|
char three[] = "blah-3";
|
|
|
|
char four[] = "blah-4";
|
|
|
|
|
|
|
|
tsearch(one, &root, comp);
|
|
|
|
tsearch(two, &root, comp);
|
|
|
|
tsearch(three, &root, comp);
|
|
|
|
tsearch(four, &root, comp);
|
|
|
|
tsearch(four, &root, comp);
|
2017-07-14 17:27:15 +00:00
|
|
|
printf("four: %s\en", *(char **)tfind(four, &root, comp));
|
2017-07-14 17:07:28 +00:00
|
|
|
tdelete(four, &root, comp);
|
|
|
|
|
|
|
|
twalk(root, printwalk);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
.Ed
|
2005-01-20 09:17:07 +00:00
|
|
|
.Sh SEE ALSO
|
|
|
|
.Xr bsearch 3 ,
|
|
|
|
.Xr hsearch 3 ,
|
|
|
|
.Xr lsearch 3
|
2016-10-13 18:25:40 +00:00
|
|
|
.Sh STANDARDS
|
|
|
|
These functions conform to
|
|
|
|
.St -p1003.1-2008 .
|
|
|
|
.Pp
|
|
|
|
The
|
|
|
|
.Fa posix_tnode
|
|
|
|
type is not part of
|
|
|
|
.St -p1003.1-2008 ,
|
2016-10-15 08:09:55 +00:00
|
|
|
but is expected to be standardized by future versions of the standard.
|
2016-10-13 18:25:40 +00:00
|
|
|
It is defined as
|
|
|
|
.Fa void
|
|
|
|
for source-level compatibility.
|
|
|
|
Using
|
|
|
|
.Fa posix_tnode
|
2016-10-15 08:09:55 +00:00
|
|
|
makes distinguishing between nodes and keys easier.
|