From 582c59bf7dcf37270c2fd7e99b4982ebc9bcbc74 Mon Sep 17 00:00:00 2001 From: Nikias Bassen Date: Fri, 18 Nov 2016 03:22:25 +0100 Subject: Improve plist_dict_set_item performance for large dictionaries with hash table --- src/bplist.c | 2 +- src/hashtable.c | 40 ++++++++++++++++++++++++-- src/hashtable.h | 7 +++-- src/plist.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++-------- src/plist.h | 1 + 5 files changed, 119 insertions(+), 18 deletions(-) diff --git a/src/bplist.c b/src/bplist.c index 9cc380c..124b024 100644 --- a/src/bplist.c +++ b/src/bplist.c @@ -1156,7 +1156,7 @@ PLIST_API void plist_to_bin(plist_t plist, char **plist_bin, uint32_t * length) //list of objects objects = ptr_array_new(256); //hashtable to write only once same nodes - ref_table = hash_table_new(plist_data_hash, plist_data_compare); + ref_table = hash_table_new(plist_data_hash, plist_data_compare, free); //serialize plist ser_s.objects = objects; diff --git a/src/hashtable.c b/src/hashtable.c index 3568f3c..dd6dbfc 100644 --- a/src/hashtable.c +++ b/src/hashtable.c @@ -2,7 +2,7 @@ * hashtable.c * really simple hash table implementation * - * Copyright (c) 2011 Nikias Bassen, All Rights Reserved. + * Copyright (c) 2011-2016 Nikias Bassen, All Rights Reserved. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -20,7 +20,7 @@ */ #include "hashtable.h" -hashtable_t* hash_table_new(hash_func_t hash_func, compare_func_t compare_func) +hashtable_t* hash_table_new(hash_func_t hash_func, compare_func_t compare_func, free_func_t free_func) { hashtable_t* ht = (hashtable_t*)malloc(sizeof(hashtable_t)); int i; @@ -30,6 +30,7 @@ hashtable_t* hash_table_new(hash_func_t hash_func, compare_func_t compare_func) ht->count = 0; ht->hash_func = hash_func; ht->compare_func = compare_func; + ht->free_func = free_func; return ht; } @@ -42,7 +43,9 @@ void hash_table_destroy(hashtable_t *ht) if (ht->entries[i]) { hashentry_t* e = ht->entries[i]; while (e) { - free(e->value); + if (ht->free_func) { + ht->free_func(e->value); + } hashentry_t* old = e; e = e->next; free(old); @@ -104,3 +107,34 @@ void* hash_table_lookup(hashtable_t* ht, void *key) } return NULL; } + +void hash_table_remove(hashtable_t* ht, void *key) +{ + if (!ht || !key) return; + + unsigned int hash = ht->hash_func(key); + + int idx0 = hash & 0xFFF; + + // get the idx0 list + hashentry_t* e = ht->entries[idx0]; + hashentry_t* last = e; + while (e) { + if (ht->compare_func(e->key, key)) { + // found element, remove it from the list + hashentry_t* old = e; + if (e == ht->entries[idx0]) { + ht->entries[idx0] = e->next; + } else { + last->next = e->next; + } + if (ht->free_func) { + ht->free_func(old->value); + } + free(old); + return; + } + last = e; + e = e->next; + } +} diff --git a/src/hashtable.h b/src/hashtable.h index 60a40ab..42d7b93 100644 --- a/src/hashtable.h +++ b/src/hashtable.h @@ -2,7 +2,7 @@ * hashtable.h * header file for really simple hash table implementation * - * Copyright (c) 2011 Nikias Bassen, All Rights Reserved. + * Copyright (c) 2011-2016 Nikias Bassen, All Rights Reserved. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -30,18 +30,21 @@ typedef struct hashentry_t { typedef unsigned int(*hash_func_t)(const void* key); typedef int (*compare_func_t)(const void *a, const void *b); +typedef void (*free_func_t)(void *ptr); typedef struct hashtable_t { hashentry_t *entries[4096]; size_t count; hash_func_t hash_func; compare_func_t compare_func; + free_func_t free_func; } hashtable_t; -hashtable_t* hash_table_new(hash_func_t hash_func, compare_func_t compare_func); +hashtable_t* hash_table_new(hash_func_t hash_func, compare_func_t compare_func, free_func_t free_func); void hash_table_destroy(hashtable_t *ht); void hash_table_insert(hashtable_t* ht, void *key, void *value); void* hash_table_lookup(hashtable_t* ht, void *key); +void hash_table_remove(hashtable_t* ht, void *key); #endif diff --git a/src/plist.c b/src/plist.c index a3d88b9..6a267eb 100644 --- a/src/plist.c +++ b/src/plist.c @@ -37,6 +37,7 @@ #include #include +#include extern void plist_xml_init(void); extern void plist_xml_deinit(void); @@ -148,6 +149,31 @@ plist_data_t plist_new_plist_data(void) return data; } +static unsigned int dict_key_hash(const void *data) +{ + plist_data_t keydata = (plist_data_t)data; + unsigned int hash = 5381; + size_t i; + char *str = keydata->strval; + for (i = 0; i < keydata->length; str++, i++) { + hash = ((hash << 5) + hash) + *str; + } + return hash; +} + +static int dict_key_compare(const void* a, const void* b) +{ + plist_data_t data_a = (plist_data_t)a; + plist_data_t data_b = (plist_data_t)b; + if (data_a->strval == NULL || data_b->strval == NULL) { + return FALSE; + } + if (data_a->length != data_b->length) { + return FALSE; + } + return (strcmp(data_a->strval, data_b->strval) == 0) ? TRUE : FALSE; +} + void plist_free_data(plist_data_t data) { if (data) @@ -161,6 +187,9 @@ void plist_free_data(plist_data_t data) case PLIST_DATA: free(data->buff); break; + case PLIST_DICT: + hash_table_destroy(data->hashtable); + break; default: break; } @@ -483,20 +512,27 @@ PLIST_API plist_t plist_dict_get_item(plist_t node, const char* key) if (node && PLIST_DICT == plist_get_node_type(node)) { - - plist_t current = NULL; - for (current = (plist_t)node_first_child(node); + plist_data_t data = plist_get_data(node); + hashtable_t *ht = (hashtable_t*)data->hashtable; + if (ht) { + struct plist_data_s sdata; + sdata.strval = (char*)key; + sdata.length = strlen(key); + ret = (plist_t)hash_table_lookup(ht, &sdata); + } else { + plist_t current = NULL; + for (current = (plist_t)node_first_child(node); current; current = (plist_t)node_next_sibling(node_next_sibling(current))) - { - - plist_data_t data = plist_get_data(current); - assert( PLIST_KEY == plist_get_node_type(current) ); - - if (data && !strcmp(key, data->strval)) { - ret = (plist_t)node_next_sibling(current); - break; + data = plist_get_data(current); + assert( PLIST_KEY == plist_get_node_type(current) ); + + if (data && !strcmp(key, data->strval)) + { + ret = (plist_t)node_next_sibling(current); + break; + } } } } @@ -507,6 +543,7 @@ PLIST_API void plist_dict_set_item(plist_t node, const char* key, plist_t item) { if (node && PLIST_DICT == plist_get_node_type(node)) { node_t* old_item = plist_dict_get_item(node, key); + plist_t key_node = NULL; if (old_item) { int idx = plist_free_node(old_item); if (idx < 0) { @@ -514,10 +551,32 @@ PLIST_API void plist_dict_set_item(plist_t node, const char* key, plist_t item) } else { node_insert(node, idx, item); } + key_node = node_prev_sibling(item); } else { - node_attach(node, plist_new_key(key)); + key_node = plist_new_key(key); + node_attach(node, key_node); node_attach(node, item); } + + hashtable_t *ht = ((plist_data_t)((node_t*)node)->data)->hashtable; + if (ht) { + /* store pointer to item in hash table */ + hash_table_insert(ht, (plist_data_t)((node_t*)key_node)->data, item); + } else { + if (((node_t*)node)->count > 500) { + /* make new hash table */ + ht = hash_table_new(dict_key_hash, dict_key_compare, NULL); + /* calculate the hashes for all entries we have so far */ + plist_t current = NULL; + for (current = (plist_t)node_first_child(node); + ht && current; + current = (plist_t)node_next_sibling(node_next_sibling(current))) + { + hash_table_insert(ht, ((node_t*)current)->data, node_next_sibling(current)); + } + ((plist_data_t)((node_t*)node)->data)->hashtable = ht; + } + } } return; } @@ -535,6 +594,10 @@ PLIST_API void plist_dict_remove_item(plist_t node, const char* key) if (old_item) { plist_t key_node = node_prev_sibling(old_item); + hashtable_t* ht = ((plist_data_t)((node_t*)node)->data)->hashtable; + if (ht) { + hash_table_remove(ht, ((node_t*)key_node)->data); + } plist_free(key_node); plist_free(old_item); } diff --git a/src/plist.h b/src/plist.h index da8f9ca..7bf62a8 100644 --- a/src/plist.h +++ b/src/plist.h @@ -56,6 +56,7 @@ struct plist_data_s double realval; char *strval; uint8_t *buff; + void *hashtable; }; uint64_t length; plist_type type; -- cgit v1.1-32-gdbae