diff options
Diffstat (limited to 'utils/hashtable.c')
-rwxr-xr-x | utils/hashtable.c | 717 |
1 files changed, 717 insertions, 0 deletions
diff --git a/utils/hashtable.c b/utils/hashtable.c new file mode 100755 index 0000000..209b467 --- /dev/null +++ b/utils/hashtable.c @@ -0,0 +1,717 @@ +/****************************************************************** +* $Id: hashtable.c,v 1.1 2004/10/29 09:28:25 snowdrop Exp $ +* +* CSOAP Project: A http client/server library in C +* Copyright (C) 2003-2004 Ferhat Ayaz +* +* This library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Library General Public +* License as published by the Free Software Foundation; either +* version 2 of the License, or (at your option) any later version. +* +* This library is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* Library General Public License for more details. +* +* You should have received a copy of the GNU Library General Public +* License along with this library; if not, write to the +* Free Software Foundation, Inc., 59 Temple Place - Suite 330, +* Boston, MA 02111-1307, USA. +* +* Email: ferhatayaz@yahoo.com +******************************************************************/ +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> +#include <utils/hashtable.h> + +static int pointercmp(const void *pointer1, const void *pointer2); +static int stringcmp(const void *pointer1, const void *pointer2); +static unsigned long pointerHashFunction(const void *pointer); +static int isProbablePrime(long number); +static long calculateIdealNumOfBuckets(hashtable_t *hashTable); + +/*--------------------------------------------------------------------------*\ + * NAME: + * hashtable_create() - creates a new HashTable + * DESCRIPTION: + * Creates a new HashTable. When finished with this HashTable, it + * should be explicitly destroyed by calling the hashtable_destroy() + * function. + * EFFICIENCY: + * O(1) + * ARGUMENTS: + * numOfBuckets - the number of buckets to start the HashTable out with. + * Must be greater than zero, and should be prime. + * Ideally, the number of buckets should between 1/5 + * and 1 times the expected number of elements in the + * HashTable. Values much more or less than this will + * result in wasted memory or decreased performance + * respectively. The number of buckets in a HashTable + * can be re-calculated to an appropriate number by + * calling the hashtable_rehash() function once the + * HashTable has been populated. The number of buckets + * in a HashTable may also be re-calculated + * automatically if the ratio of elements to buckets + * passes the thresholds set by hashtable_set_ideal_ration(). + * RETURNS: + * HashTable - a new Hashtable, or NULL on error +\*--------------------------------------------------------------------------*/ + +hashtable_t *hashtable_create(long numOfBuckets) { + hashtable_t *hashTable; + int i; + + assert(numOfBuckets > 0); + + hashTable = (hashtable_t *) malloc(sizeof(hashtable_t)); + if (hashTable == NULL) + return NULL; + + hashTable->bucketArray = (hashpair_t **) + malloc(numOfBuckets * sizeof(hashpair_t *)); + if (hashTable->bucketArray == NULL) { + free(hashTable); + return NULL; + } + + hashTable->numOfBuckets = numOfBuckets; + hashTable->numOfElements = 0; + + for (i=0; i<numOfBuckets; i++) + hashTable->bucketArray[i] = NULL; + + hashTable->idealRatio = 3.0; + hashTable->lowerRehashThreshold = 0.0; + hashTable->upperRehashThreshold = 15.0; + + hashTable->keycmp = stringcmp; /*pointercmp;*/ + hashTable->valuecmp = pointercmp; + hashTable->hashFunction = hashtable_string_hash_function; /*pointerHashFunction;*/ + hashTable->keyDeallocator = NULL; + hashTable->valueDeallocator = NULL; + + return hashTable; +} + +/*--------------------------------------------------------------------------*\ + * NAME: + * hashtable_destroy() - destroys an existing HashTable + * DESCRIPTION: + * Destroys an existing HashTable. + * EFFICIENCY: + * O(n) + * ARGUMENTS: + * hashTable - the HashTable to destroy + * RETURNS: + * <nothing> +\*--------------------------------------------------------------------------*/ + +void hashtable_destroy(hashtable_t *hashTable) { + int i; + hashpair_t *pair, *nextPair; + + for (i=0; i<hashTable->numOfBuckets; i++) { + pair = hashTable->bucketArray[i]; + while (pair != NULL) { + nextPair = pair->next; + if (hashTable->keyDeallocator != NULL) + hashTable->keyDeallocator((void *) pair->key); + if (hashTable->valueDeallocator != NULL) + hashTable->valueDeallocator(pair->value); + free(pair); + pair = nextPair; + } + } + + free(hashTable->bucketArray); + free(hashTable); +} + +/*--------------------------------------------------------------------------*\ + * NAME: + * hashtable_contains_key() - checks the existence of a key in a HashTable + * DESCRIPTION: + * Determines whether or not the specified HashTable contains the + * specified key. Uses the comparison function specified by + * hashtable_set_key_comparision_function(). + * EFFICIENCY: + * O(1), assuming a good hash function and element-to-bucket ratio + * ARGUMENTS: + * hashTable - the HashTable to search + * key - the key to search for + * RETURNS: + * bool - whether or not the specified HashTable contains the + * specified key. +\*--------------------------------------------------------------------------*/ + +int hashtable_contains_key(const hashtable_t *hashTable, const void *key) { + return (hashtable_get(hashTable, key) != NULL); +} + +/*--------------------------------------------------------------------------*\ + * NAME: + * hashtable_contains_value() + * - checks the existence of a value in a HashTable + * DESCRIPTION: + * Determines whether or not the specified HashTable contains the + * specified value. Unlike hashtable_contains_key(), this function is + * not very efficient, since it has to scan linearly looking for a + * match. Uses the comparison function specified by + * hashtable_set_value_comparision_function(). + * EFFICIENCY: + * O(n) + * ARGUMENTS: + * hashTable - the HashTable to search + * value - the value to search for + * RETURNS: + * bool - whether or not the specified HashTable contains the + * specified value. +\*--------------------------------------------------------------------------*/ + +int hashtable_contains_value(const hashtable_t *hashTable, const void *value) { + int i; + hashpair_t *pair; + + for (i=0; i<hashTable->numOfBuckets; i++) { + pair = hashTable->bucketArray[i]; + while (pair != NULL) { + if (hashTable->valuecmp(value, pair->value) == 0) + return 1; + pair = pair->next; + } + } + + return 0; +} + +/*--------------------------------------------------------------------------*\ + * NAME: + * hashtable_put() - adds a key/value pair to a HashTable + * DESCRIPTION: + * Adds the specified key/value pair to the specified HashTable. If + * the key already exists in the HashTable (determined by the comparison + * function specified by hashtable_set_key_comparision_function()), its value + * is replaced by the new value. May trigger an auto-rehash (see + * hashtable_set_ideal_ration()). It is illegal to specify NULL as the + * key or value. + * EFFICIENCY: + * O(1), assuming a good hash function and element-to-bucket ratio + * ARGUMENTS: + * hashTable - the HashTable to add to + * key - the key to add or whose value to replace + * value - the value associated with the key + * RETURNS: + * err - 0 if successful, -1 if an error was encountered +\*--------------------------------------------------------------------------*/ + +int hashtable_put(hashtable_t *hashTable, const void *key, void *value) { + long hashValue; + hashpair_t *pair; + + assert(key != NULL); + assert(value != NULL); + + hashValue = hashTable->hashFunction(key) % hashTable->numOfBuckets; + pair = hashTable->bucketArray[hashValue]; + + while (pair != NULL && hashTable->keycmp(key, pair->key) != 0) + pair = pair->next; + + if (pair) { + if (pair->key != key) { + if (hashTable->keyDeallocator != NULL) + hashTable->keyDeallocator((void *) pair->key); + pair->key = key; + } + if (pair->value != value) { + if (hashTable->valueDeallocator != NULL) + hashTable->valueDeallocator(pair->value); + pair->value = value; + } + } + else { + hashpair_t *newPair = (hashpair_t *) malloc(sizeof(hashpair_t)); + if (newPair == NULL) { + return -1; + } + else { + newPair->key = key; + newPair->value = value; + newPair->next = hashTable->bucketArray[hashValue]; + hashTable->bucketArray[hashValue] = newPair; + hashTable->numOfElements++; + + if (hashTable->upperRehashThreshold > hashTable->idealRatio) { + float elementToBucketRatio = (float) hashTable->numOfElements / + (float) hashTable->numOfBuckets; + if (elementToBucketRatio > hashTable->upperRehashThreshold) + hashtable_rehash(hashTable, 0); + } + } + } + + return 0; +} + +/*--------------------------------------------------------------------------*\ + * NAME: + * HashTableGet() - retrieves the value of a key in a HashTable + * DESCRIPTION: + * Retrieves the value of the specified key in the specified HashTable. + * Uses the comparison function specified by + * hashtable_set_key_comparision_function(). + * EFFICIENCY: + * O(1), assuming a good hash function and element-to-bucket ratio + * ARGUMENTS: + * hashTable - the HashTable to search + * key - the key whose value is desired + * RETURNS: + * void * - the value of the specified key, or NULL if the key + * doesn't exist in the HashTable +\*--------------------------------------------------------------------------*/ + +void *hashtable_get(const hashtable_t *hashTable, const void *key) { + long hashValue = hashTable->hashFunction(key) % hashTable->numOfBuckets; + hashpair_t *pair = hashTable->bucketArray[hashValue]; + + while (pair != NULL && hashTable->keycmp(key, pair->key) != 0) + pair = pair->next; + + return (pair == NULL)? NULL : pair->value; +} + +/*--------------------------------------------------------------------------*\ + * NAME: + * hashtable_remove() - removes a key/value pair from a HashTable + * DESCRIPTION: + * Removes the key/value pair identified by the specified key from the + * specified HashTable if the key exists in the HashTable. May trigger + * an auto-rehash (see hashtable_set_ideal_ration()). + * EFFICIENCY: + * O(1), assuming a good hash function and element-to-bucket ratio + * ARGUMENTS: + * hashTable - the HashTable to remove the key/value pair from + * key - the key specifying the key/value pair to be removed + * RETURNS: + * <nothing> +\*--------------------------------------------------------------------------*/ + +void hashtable_remove(hashtable_t *hashTable, const void *key) { + long hashValue = hashTable->hashFunction(key) % hashTable->numOfBuckets; + hashpair_t *pair = hashTable->bucketArray[hashValue]; + hashpair_t *previousPair = NULL; + + while (pair != NULL && hashTable->keycmp(key, pair->key) != 0) { + previousPair = pair; + pair = pair->next; + } + + if (pair != NULL) { + if (hashTable->keyDeallocator != NULL) + hashTable->keyDeallocator((void *) pair->key); + if (hashTable->valueDeallocator != NULL) + hashTable->valueDeallocator(pair->value); + if (previousPair != NULL) + previousPair->next = pair->next; + else + hashTable->bucketArray[hashValue] = pair->next; + free(pair); + hashTable->numOfElements--; + + if (hashTable->lowerRehashThreshold > 0.0) { + float elementToBucketRatio = (float) hashTable->numOfElements / + (float) hashTable->numOfBuckets; + if (elementToBucketRatio < hashTable->lowerRehashThreshold) + hashtable_rehash(hashTable, 0); + } + } +} + +/*--------------------------------------------------------------------------*\ + * NAME: + * hashtable_remove_all() - removes all key/value pairs from a HashTable + * DESCRIPTION: + * Removes all key/value pairs from the specified HashTable. May trigger + * an auto-rehash (see hashtable_set_ideal_ration()). + * EFFICIENCY: + * O(n) + * ARGUMENTS: + * hashTable - the HashTable to remove all key/value pairs from + * RETURNS: + * <nothing> +\*--------------------------------------------------------------------------*/ + +void hashtable_remove_all(hashtable_t *hashTable) { + int i; + + for (i=0; i<hashTable->numOfBuckets; i++) { + hashpair_t *pair = hashTable->bucketArray[i]; + while (pair != NULL) { + hashpair_t *nextPair = pair->next; + if (hashTable->keyDeallocator != NULL) + hashTable->keyDeallocator((void *) pair->key); + if (hashTable->valueDeallocator != NULL) + hashTable->valueDeallocator(pair->value); + free(pair); + pair = nextPair; + } + hashTable->bucketArray[i] = NULL; + } + + hashTable->numOfElements = 0; + hashtable_rehash(hashTable, 5); +} + +/*--------------------------------------------------------------------------*\ + * NAME: + * hashtable_is_empty() - determines if a HashTable is empty + * DESCRIPTION: + * Determines whether or not the specified HashTable contains any + * key/value pairs. + * EFFICIENCY: + * O(1) + * ARGUMENTS: + * hashTable - the HashTable to check + * RETURNS: + * bool - whether or not the specified HashTable contains any + * key/value pairs +\*--------------------------------------------------------------------------*/ + +int hashtable_is_empty(const hashtable_t *hashTable) { + return (hashTable->numOfElements == 0); +} + +/*--------------------------------------------------------------------------*\ + * NAME: + * hashtable_size() - returns the number of elements in a HashTable + * DESCRIPTION: + * Returns the number of key/value pairs that are present in the + * specified HashTable. + * EFFICIENCY: + * O(1) + * ARGUMENTS: + * hashTable - the HashTable whose size is requested + * RETURNS: + * long - the number of key/value pairs that are present in + * the specified HashTable +\*--------------------------------------------------------------------------*/ + +long hashtable_size(const hashtable_t *hashTable) { + return hashTable->numOfElements; +} + +/*--------------------------------------------------------------------------*\ + * NAME: + * HashTableGetNumBuckets() - returns the number of buckets in a HashTable + * DESCRIPTION: + * Returns the number of buckets that are in the specified HashTable. + * This may change dynamically throughout the life of a HashTable if + * automatic or manual rehashing is performed. + * EFFICIENCY: + * O(1) + * ARGUMENTS: + * hashTable - the HashTable whose number of buckets is requested + * RETURNS: + * long - the number of buckets that are in the specified + * HashTable +\*--------------------------------------------------------------------------*/ + +long hashtable_get_num_buckets(const hashtable_t *hashTable) { + return hashTable->numOfBuckets; +} + +/*--------------------------------------------------------------------------*\ + * NAME: + * hashtable_set_key_comparision_function() + * - specifies the function used to compare keys in a HashTable + * DESCRIPTION: + * Specifies the function used to compare keys in the specified + * HashTable. The specified function should return zero if the two + * keys are considered equal, and non-zero otherwise. The default + * function is one that simply compares pointers. + * ARGUMENTS: + * hashTable - the HashTable whose key comparison function is being + * specified + * keycmp - a function which returns zero if the two arguments + * passed to it are considered "equal" keys and non-zero + * otherwise + * RETURNS: + * <nothing> +\*--------------------------------------------------------------------------*/ + +void hashtable_set_key_comparision_function(hashtable_t *hashTable, + int (*keycmp)(const void *key1, const void *key2)) { + assert(keycmp != NULL); + hashTable->keycmp = keycmp; +} + +/*--------------------------------------------------------------------------*\ + * NAME: + * hashtable_set_value_comparision_function() + * - specifies the function used to compare values in a HashTable + * DESCRIPTION: + * Specifies the function used to compare values in the specified + * HashTable. The specified function should return zero if the two + * values are considered equal, and non-zero otherwise. The default + * function is one that simply compares pointers. + * ARGUMENTS: + * hashTable - the HashTable whose value comparison function is being + * specified + * valuecmp - a function which returns zero if the two arguments + * passed to it are considered "equal" values and non-zero + * otherwise + * RETURNS: + * <nothing> +\*--------------------------------------------------------------------------*/ + +void hashtable_set_value_comparision_function(hashtable_t *hashTable, + int (*valuecmp)(const void *value1, const void *value2)) { + assert(valuecmp != NULL); + hashTable->valuecmp = valuecmp; +} + +/*--------------------------------------------------------------------------*\ + * NAME: + * hashtable_set_hashfunction() + * - specifies the hash function used by a HashTable + * DESCRIPTION: + * Specifies the function used to determine the hash value for a key + * in the specified HashTable (before modulation). An ideal hash + * function is one which is easy to compute and approximates a + * "random" function. The default function is one that works + * relatively well for pointers. If the HashTable keys are to be + * strings (which is probably the case), then this default function + * will not suffice, in which case consider using the provided + * hashtable_string_hash_function() function. + * ARGUMENTS: + * hashTable - the HashTable whose hash function is being specified + * hashFunction - a function which returns an appropriate hash code + * for a given key + * RETURNS: + * <nothing> +\*--------------------------------------------------------------------------*/ + +void hashtable_set_hashfunction(hashtable_t *hashTable, + unsigned long (*hashFunction)(const void *key)) +{ + assert(hashFunction != NULL); + hashTable->hashFunction = hashFunction; +} + +/*--------------------------------------------------------------------------*\ + * NAME: + * hashtable_rehash() - reorganizes a HashTable to be more efficient + * DESCRIPTION: + * Reorganizes a HashTable to be more efficient. If a number of + * buckets is specified, the HashTable is rehashed to that number of + * buckets. If 0 is specified, the HashTable is rehashed to a number + * of buckets which is automatically calculated to be a prime number + * that achieves (as closely as possible) the ideal element-to-bucket + * ratio specified by the hashtable_set_ideal_ration() function. + * EFFICIENCY: + * O(n) + * ARGUMENTS: + * hashTable - the HashTable to be reorganized + * numOfBuckets - the number of buckets to rehash the HashTable to. + * Should be prime. Ideally, the number of buckets + * should be between 1/5 and 1 times the expected + * number of elements in the HashTable. Values much + * more or less than this will result in wasted memory + * or decreased performance respectively. If 0 is + * specified, an appropriate number of buckets is + * automatically calculated. + * RETURNS: + * <nothing> +\*--------------------------------------------------------------------------*/ + +void hashtable_rehash(hashtable_t *hashTable, long numOfBuckets) { + hashpair_t **newBucketArray; + int i; + + assert(numOfBuckets >= 0); + if (numOfBuckets == 0) + numOfBuckets = calculateIdealNumOfBuckets(hashTable); + + if (numOfBuckets == hashTable->numOfBuckets) + return; /* already the right size! */ + + newBucketArray = (hashpair_t **) + malloc(numOfBuckets * sizeof(hashpair_t *)); + if (newBucketArray == NULL) { + /* Couldn't allocate memory for the new array. This isn't a fatal + * error; we just can't perform the rehash. */ + return; + } + + for (i=0; i<numOfBuckets; i++) + newBucketArray[i] = NULL; + + for (i=0; i<hashTable->numOfBuckets; i++) { + hashpair_t *pair = hashTable->bucketArray[i]; + while (pair != NULL) { + hashpair_t *nextPair = pair->next; + long hashValue = hashTable->hashFunction(pair->key) % numOfBuckets; + pair->next = newBucketArray[hashValue]; + newBucketArray[hashValue] = pair; + pair = nextPair; + } + } + + free(hashTable->bucketArray); + hashTable->bucketArray = newBucketArray; + hashTable->numOfBuckets = numOfBuckets; +} + +/*--------------------------------------------------------------------------*\ + * NAME: + * hashtable_set_ideal_ration() + * - sets the ideal element-to-bucket ratio of a HashTable + * DESCRIPTION: + * Sets the ideal element-to-bucket ratio, as well as the lower and + * upper auto-rehash thresholds, of the specified HashTable. Note + * that this function doesn't actually perform a rehash. + * + * The default values for these properties are 3.0, 0.0 and 15.0 + * respectively. This is likely fine for most situations, so there + * is probably no need to call this function. + * ARGUMENTS: + * hashTable - a HashTable + * idealRatio - the ideal element-to-bucket ratio. When a rehash + * occurs (either manually via a call to the + * hashtable_rehash() function or automatically due the + * the triggering of one of the thresholds below), the + * number of buckets in the HashTable will be + * recalculated to be a prime number that achieves (as + * closely as possible) this ideal ratio. Must be a + * positive number. + * lowerRehashThreshold + * - the element-to-bucket ratio that is considered + * unacceptably low (i.e., too few elements per bucket). + * If the actual ratio falls below this number, a + * rehash will automatically be performed. Must be + * lower than the value of idealRatio. If no ratio + * is considered unacceptably low, a value of 0.0 can + * be specified. + * upperRehashThreshold + * - the element-to-bucket ratio that is considered + * unacceptably high (i.e., too many elements per bucket). + * If the actual ratio rises above this number, a + * rehash will automatically be performed. Must be + * higher than idealRatio. However, if no ratio + * is considered unacceptably high, a value of 0.0 can + * be specified. + * RETURNS: + * <nothing> +\*--------------------------------------------------------------------------*/ + +void hashtable_set_ideal_ration(hashtable_t *hashTable, float idealRatio, + float lowerRehashThreshold, float upperRehashThreshold) { + assert(idealRatio > 0.0); + assert(lowerRehashThreshold < idealRatio); + assert(upperRehashThreshold == 0.0 || upperRehashThreshold > idealRatio); + + hashTable->idealRatio = idealRatio; + hashTable->lowerRehashThreshold = lowerRehashThreshold; + hashTable->upperRehashThreshold = upperRehashThreshold; +} + +/*--------------------------------------------------------------------------*\ + * NAME: + * hashtable_set_deallocation_function() + * - sets the key and value deallocation functions of a HashTable + * DESCRIPTION: + * Sets the key and value deallocation functions of the specified + * HashTable. This determines what happens to a key or a value when it + * is removed from the HashTable. If the deallocation function is NULL + * (the default if this function is never called), its reference is + * simply dropped and it is up to the calling program to perform the + * proper memory management. If the deallocation function is non-NULL, + * it is called to free the memory used by the object. E.g., for simple + * objects, an appropriate deallocation function may be free(). + * + * This affects the behaviour of the hashtable_destroy(), hashtable_put(), + * hashtable_remove() and hashtable_remove_all() functions. + * ARGUMENTS: + * hashTable - a HashTable + * keyDeallocator + * - if non-NULL, the function to be called when a key is + * removed from the HashTable. + * valueDeallocator + * - if non-NULL, the function to be called when a value is + * removed from the HashTable. + * RETURNS: + * <nothing> +\*--------------------------------------------------------------------------*/ + +void hashtable_set_deallocation_function(hashtable_t *hashTable, + void (*keyDeallocator)(void *key), + void (*valueDeallocator)(void *value)) { + hashTable->keyDeallocator = keyDeallocator; + hashTable->valueDeallocator = valueDeallocator; +} + +/*--------------------------------------------------------------------------*\ + * NAME: + * hashtable_string_hash_function() - a good hash function for strings + * DESCRIPTION: + * A hash function that is appropriate for hashing strings. Note that + * this is not the default hash function. To make it the default hash + * function, call hashtable_set_hashfunction(hashTable, + * hashtable_string_hash_function). + * ARGUMENTS: + * key - the key to be hashed + * RETURNS: + * unsigned long - the unmodulated hash value of the key +\*--------------------------------------------------------------------------*/ + +unsigned long hashtable_string_hash_function(const void *key) { + const unsigned char *str = (const unsigned char *) key; + unsigned long hashValue = 0; + int i; + + for (i=0; str[i] != '\0'; i++) + hashValue = hashValue * 37 + str[i]; + + return hashValue; +} + + + +static int stringcmp(const void *pointer1, const void *pointer2) { + return (strcmp((const char*)pointer1, (const char*)pointer2)); +} + +static int pointercmp(const void *pointer1, const void *pointer2) { + return (pointer1 != pointer2); +} + +static unsigned long pointerHashFunction(const void *pointer) { + return ((unsigned long) pointer) >> 4; +} + +static int isProbablePrime(long oddNumber) { + long i; + + for (i=3; i<51; i+=2) + if (oddNumber == i) + return 1; + else if (oddNumber%i == 0) + return 0; + + return 1; /* maybe */ +} + +static long calculateIdealNumOfBuckets(hashtable_t *hashTable) { + long idealNumOfBuckets = hashTable->numOfElements / hashTable->idealRatio; + if (idealNumOfBuckets < 5) + idealNumOfBuckets = 5; + else + idealNumOfBuckets |= 0x01; /* make it an odd number */ + while (!isProbablePrime(idealNumOfBuckets)) + idealNumOfBuckets += 2; + + return idealNumOfBuckets; +} + |