freebsd/lib/libc/stdlib/strhash.c

#ifndef lint
static char *rcsid = "$Header: /home/ncvs/src/lib/libc/stdlib/strhash.c,v 1.5 1995/10/22 14:53:17 phk Exp $";
#endif

/*
 *
 *                      Copyright 1990
 *               Terry Jones & Jordan Hubbard
 *
 *		  PCS Computer Systeme, GmbH.
 *	             Munich, West Germany
 *
 *
 *  All rights reserved.
 *
 *  This is unsupported software and is subject to change without notice.
 *  the author makes no representations about the suitability of this software
 *  for any purpose. It is supplied "as is" without express or implied
 *  warranty.
 *
 *  Permission to use, copy, modify, and distribute this software and its
 *  documentation for any purpose and without fee is hereby granted, provided
 *  that the above copyright notice appear in all copies and that both that
 *  copyright notice and this permission notice appear in supporting
 *  documentation, and that the name of the author not be used in
 *  advertising or publicity pertaining to distribution of the software
 *  without specific, written prior permission.
 *
 */

/*
 * This is a fairly simple open addressing hash scheme.
 * Terry did all the code, I just did the spec.
 * Thanks again, you crazy Aussie..
 *
 */

/*
 * $Log: strhash.c,v $
 * Revision 1.5  1995/10/22  14:53:17  phk
 * Mino cleanup, #includes & unused vars.
 *
 * Revision 1.4  1995/05/30  05:41:55  rgrimes
 * Remove trailing whitespace.
 *
 * Revision 1.3  1995/03/28  08:41:02  jkh
 * Fix a missing _hash() to prevent namespace pollution with the db/hash routines.
 * Grrr.  If the dbhash routines weren't grossly overengineered I wouldn't
 * even need to do this! :-(
 *
 * Also now export the hash_stats routine.  Manpage coming RSN - I promise.
 *
 * Revision 1.2  1995/03/26  19:32:24  ache
 * Hash 8bit chars without sign extension
 *
 * Revision 1.1  1995/03/26  10:21:55  jkh
 * Add the strhash family of routines.  They provide a number of features
 * that the db/hash functions don't, and they're much simpler to use for
 * low-overhead string hashing.
 *
 * Revision 1.1  1995/02/25  02:16:34  jkh
 * Second version of this - now support the essentials of a basic
 * attributed file system for storing menu information and command
 * templates.  This is not finished yet, but it does compile so I can
 * commit it to the tree now and continue working on it.
 *
 * Revision 2.0  90/03/26  01:44:26  jkh
 * pre-beta check-in
 *
 * Revision 1.8  90/03/09  19:22:35  jkh
 * Fixed bogus comment.
 *
 * Revision 1.7  90/03/09  19:01:08  jkh
 * Added comments, GPL.
 *
 * Revision 1.6  90/03/08  17:55:58  terry
 * Rearranged hash_purge to be a tiny bit more efficient.
 * Added verbose option to hash_stats.
 *
 * Revision 1.5  90/03/08  17:19:54  terry
 * Added hash_purge. Added arg to hash_traverse. Changed all
 * void * to Generic.
 *
 * Revision 1.4  90/03/08  12:02:35  terry
 * Fixed problems with allocation that I screwed up last night.
 * Changed bucket lists to be singly linked. Thanks to JKH, my hero.
 *
 * Revision 1.3  90/03/07  21:33:33  terry
 * Cleaned up a few decls to keep gcc -Wall quiet.
 *
 * Revision 1.2  90/03/07  21:14:53  terry
 * Comments. Added HASH_STATS define. Removed hash_find()
 * and new_node().
 *
 * Revision 1.1  90/03/07  20:49:45  terry
 * Initial revision
 *
 *
 */


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <strhash.h>

#define HASH_NULL    (hash_table *)0
#define NODE_NULL    (hash_node *)0
#define GENERIC_NULL (void *)0

#define HASH_SZ 97


static int _hash(int size, char *key);
static hash_node *list_find(caddr_t key, hash_node *head);


/*
 * hash_create()
 *
 * Malloc room for a new hash table and then room for its
 * bucket pointers. Then set all the buckets to
 * point to 0. Return the address of the new table.
 */
hash_table *
hash_create(int size)
{
    register int i;
    hash_table *new = (hash_table *)malloc(sizeof(hash_table));

    if (!new || size < 0){
	return HASH_NULL;
    }

    if (size == 0){
	size = HASH_SZ;
    }

    if (!(new->buckets = (hash_node **)malloc(size * sizeof(hash_node *)))){
	return HASH_NULL;
    }

    for (i = 0; i < size; i++){
	new->buckets[i] = NODE_NULL;
    }
    new->size = size;

    return new;
}


/*
 * list_find()
 *
 * Find the key in the linked list pointed to by head.
 */
static hash_node *
list_find(caddr_t key, hash_node *head)
{
    while (head){
	if (!strcmp(head->key, key)){
	    return head;
	}
	head = head->next;
    }
    return NODE_NULL;
}


/*
 * _hash()
 *
 * Compute the hash value for the given key.
 */
static int
_hash(int size, char *key)
{
    unsigned int h = 0x0;

    while (*key){
	h = (h << 1) ^ (h ^ (unsigned char) *key++);
    }

    h %= size;
    return h;
}

/*
 * hash_destroy()
 *
 * Find the key and (if it's there) remove it entirely.
 * The function (*nukefunc)() is in charge of disposing
 * of the storage help by the data associated with the node.
 */
void
hash_destroy(hash_table *table, char *key, void (*nukefunc)())
{
    int bucket = _hash(table->size, key);
    hash_node *found = table->buckets[bucket];
    hash_node *to_free = NODE_NULL;

    if (!found) {
	return;
    }

    if (!strcmp(found->key, key)) {
	/*
	 * It was the head of the list.
	 */
	table->buckets[bucket] = found->next;
	to_free = found;
    }
    else {
	/*
	 * Walk the list, looking one ahead.
	 */
	while (found->next) {
	    if (!strcmp(found->next->key, key)) {
		to_free = found->next;
		found->next = found->next->next;
		break;
	    }
	    found = found->next;
	}

	if (!to_free){
	    return;
	}
    }

    if (nukefunc)
	(*nukefunc)(to_free->key, to_free->data);
    free(to_free);
    return;
}


/*
 * hash_search()
 *
 * Search the table for the given key. Then:
 *
 * 1) If you find it and there is no replacement function, just
 *    return what you found. (This is a simple search).
 * 2) If you find it and there is a replacement function, run
 *    the function on the data you found, and replace the old
 *    data with whatever is passed in datum. Return 0.
 * 3) If you don't find it and there is some datum, insert a
 *    new item into the table. Insertions go at the front of
 *    the bucket. Return 0.
 * 4) Otherwise just return 0.
 *
 */
void *
hash_search(hash_table *table, caddr_t key, void *datum,
	    void (*replace_func)())
{
    int bucket = _hash(table->size, key);
    hash_node *found = list_find(key, table->buckets[bucket]);

    if (found){
	if (!replace_func){
	    return found->data;
	}
	else{
	    (*replace_func)(found->data);
	    found->data = datum;
	}
    }
    else{
	if (datum){

	    static int assign_key();

	    hash_node *new = (hash_node *)malloc(sizeof(hash_node));

	    if (!new || !assign_key(key, new)){
		return GENERIC_NULL;
	    }
	    new->data = datum;
	    new->next = table->buckets[bucket];
	    table->buckets[bucket] = new;
	    return new;
	}
    }
    return GENERIC_NULL;
}


/*
 * assign_key()
 *
 * Set the key value of a node to be 'key'. Get some space from
 * malloc and copy it in etc. Return 1 if all is well, 0 otherwise.
 */
static int
assign_key(char *key, hash_node *node)
{
    if (!node || !key){
	return 0;
    }

    if (!(node->key = (char *)malloc(strlen(key) + 1))){
	return 0;
    }

    node->key[0] = '\0';
    strcat(node->key, key);
    return 1;
}

/*
 * hash_traverse()
 *
 * Traverse the hash table and run the function func on the
 * data found at each node and the argument we're passed for it.
 */
void
hash_traverse(hash_table *table, int (*func)(), void *arg)
{
    register int i;
    register int size = table->size;

    if (!func)
	return;

    for (i = 0; i < size; i++) {
	hash_node *n = table->buckets[i];
	while (n) {
	    if ((*func)(n->key, n->data, arg) == 0)
		return;
	    n = n->next;
	}
    }
    return;
}

/*
 * hash_purge()
 *
 * Run through the entire hash table. Call purge_func
 * on the data found at each node, and then free the node.
 * Set all the bucket pointers to 0.
 */
void
hash_purge(hash_table *table, void (*purge_func)(char *p1, void *p2))
{
    register int i;
    register int size = table->size;

    for (i = 0; i < size; i++) {
	hash_node *n = table->buckets[i];
	if (n) {
	    do {
		hash_node *to_free = n;
		if (purge_func) {
		    (*purge_func)(n->key, n->data);
		}
		n = n->next;
		free(to_free);
	    } while (n);
	    table->buckets[i] = NODE_NULL;
	}
    }
}

#undef min
#define min(a, b) (a) < (b) ? (a) : (b)

/*
 * hash_stats()
 *
 * Print statistics about the current table allocation to stdout.
 */
void
hash_stats(hash_table *table, int verbose)
{
    register int i;
    int total_elements = 0;
    int non_empty_buckets = 0;
    int max_count = 0;
    int max_repeats = 0;
    int *counts;
    int size = table->size;

    if (!(counts = (int *)malloc(size * sizeof(int)))){
	fprintf(stderr, "malloc returns 0\n");
	exit(1);
    }

    for (i = 0; i < size; i++){
	int x = 0;
	hash_node *n = table->buckets[i];
	counts[i] = 0;
	while (n){
	    if (!x){
		x = 1;
		non_empty_buckets++;
		if (verbose){
		    printf("bucket %2d: ", i);
		}
	    }
	    if (verbose){
		printf(" %s", n->key);
	    }
	    counts[i]++;
	    n = n->next;
	}

	total_elements += counts[i];
	if (counts[i] > max_count){
	    max_count = counts[i];
	    max_repeats = 1;
	}
	else if (counts[i] == max_count){
	    max_repeats++;
	}

	if (counts[i] && verbose){
	    printf(" (%d)\n", counts[i]);
	}
    }

    printf("\n");
    printf("%d element%s in storage.\n", total_elements, total_elements == 1 ? "" : "s");

    if (total_elements){
	printf("%d of %d (%.2f%%) buckets are in use\n", non_empty_buckets, size,
	       (double)100 * (double)non_empty_buckets / (double)(size));
	printf("the maximum number of elements in a bucket is %d (%d times)\n", max_count, max_repeats);
	printf("average per bucket is %f\n", (double)total_elements / (double)non_empty_buckets);
	printf("optimal would be %f\n", (double)total_elements / (double)(min(size, total_elements)));
    }
    return;
}