universalservice/examples/hashtable/uns_hashtable.c

296 lines
8.2 KiB
C
Raw Permalink Normal View History

2024-06-13 20:57:07 -04:00
/* Copyright (c) 2024, Peter McGoron
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1) Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2) Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <assert.h>
#include <string.h>
#include "uns.h"
#include "uns_hashtable.h"
enum {
HTBL_LEN_PTR,
HTBL_USED_PTR,
HTBL_HDR_LEN
};
static size_t get_size_t(Uns_GC gc, struct uns_ctr *root, size_t i)
2024-06-13 20:57:07 -04:00
{
size_t r;
void *p = uns_get(gc, root->p, i, NULL);
2024-06-13 20:57:07 -04:00
memcpy(&r, p, sizeof(r));
return r;
}
static void set_size_t(Uns_GC gc, struct uns_ctr *root, size_t i, size_t val)
2024-06-13 20:57:07 -04:00
{
void *p = uns_get(gc, root->p, i, NULL);
2024-06-13 20:57:07 -04:00
memcpy(p, &val, sizeof(val));
}
static void *index(Uns_GC gc, struct uns_ctr *htbl, size_t i)
2024-06-21 16:49:55 -04:00
{
return uns_get(gc, htbl->p, i + HTBL_HDR_LEN, NULL);
2024-06-21 16:49:55 -04:00
}
void uns_hashtable_alloc_into(Uns_GC gc, struct uns_ctr *r, size_t size)
2024-06-13 20:57:07 -04:00
{
unsigned char *p;
size_t i;
r->p = uns_alloc_rec(gc, size + HTBL_HDR_LEN, 0);
2024-06-13 20:57:07 -04:00
p = uns_alloc(gc, sizeof(size_t), 0);
uns_set(gc, r->p, HTBL_LEN_PTR, UNS_POINTER, p);
2024-06-13 20:57:07 -04:00
set_size_t(gc, r, HTBL_LEN_PTR, size);
p = uns_alloc(gc, sizeof(size_t), 0);
uns_set(gc, r->p, HTBL_USED_PTR, UNS_POINTER, p);
2024-06-13 20:57:07 -04:00
set_size_t(gc, r, HTBL_USED_PTR, 0);
/* Set entries to zero. */
for (i = HTBL_HDR_LEN; i <= size; i++)
uns_set(gc, r->p, i, UNS_POINTER, NULL);
2024-06-13 20:57:07 -04:00
}
size_t uns_hashtable_len(Uns_GC gc, struct uns_ctr *htbl)
2024-06-13 20:57:07 -04:00
{
return get_size_t(gc, htbl, HTBL_LEN_PTR);
}
size_t uns_hashtable_used(Uns_GC gc, struct uns_ctr *htbl)
2024-06-13 20:57:07 -04:00
{
return get_size_t(gc, htbl, HTBL_USED_PTR);
}
enum {
LENGTH,
STRING,
VALUE,
NEXT,
HTBL_ENTRY_LEN
};
static void alloc_holder(Uns_GC gc, struct uns_ctr *r, struct uns_ctr *string,
2024-06-13 20:57:07 -04:00
struct uns_ctr *value, size_t len)
{
unsigned char *alloc_ptr;
r->p = uns_alloc_rec(gc, HTBL_ENTRY_LEN, 0);
2024-06-13 20:57:07 -04:00
alloc_ptr = uns_alloc(gc, sizeof(size_t), 0);
2024-06-13 20:57:07 -04:00
memcpy(alloc_ptr, &len, sizeof(size_t));
uns_set(gc, r->p, LENGTH, UNS_POINTER, alloc_ptr);
uns_set(gc, r->p, STRING, UNS_POINTER, string->p);
uns_set(gc, r->p, VALUE, UNS_POINTER, value->p);
uns_set(gc, r->p, NEXT, UNS_POINTER, NULL);
2024-06-13 20:57:07 -04:00
}
static unsigned long fnv(unsigned char *p, size_t len)
{
unsigned long hash = 0x811c9dc5;
size_t i;
for (i = 0; i < len; i++) {
hash ^= p[i];
hash *= 16777619;
}
return hash;
}
static int search_container(Uns_GC gc, struct uns_ctr *cur_ctr,
2024-06-13 20:57:07 -04:00
unsigned char *str, size_t len)
{
unsigned char *ctr_str = uns_get(gc, cur_ctr->p, STRING, NULL);
2024-06-13 20:57:07 -04:00
size_t ctr_str_len = get_size_t(gc, cur_ctr, LENGTH);
void *next;
if (ctr_str_len != len || memcmp(ctr_str, str, len) != 0) {
/* strings do not match */
next = uns_get(gc, cur_ctr->p, NEXT, NULL);
2024-06-13 20:57:07 -04:00
if (!next)
return 0;
cur_ctr->p = next;
return search_container(gc, cur_ctr, str, len);
} else {
return 1;
}
}
static int search_for_container(Uns_GC gc, struct uns_ctr *htbl,
2024-06-13 20:57:07 -04:00
struct uns_ctr *container,
unsigned char *str, size_t len, size_t *hash_out)
{
size_t htbl_len = uns_hashtable_len(gc, htbl);
size_t hash = fnv(str, len) % htbl_len;
if (hash_out)
*hash_out = hash;
2024-06-21 16:49:55 -04:00
container->p = index(gc, htbl, hash);
2024-06-13 20:57:07 -04:00
if (!container->p)
return 0;
return search_container(gc, container, str, len);
}
2024-06-21 16:49:55 -04:00
/* This could be made better with internal pointers, but that is not
* portable.
*/
void uns_hashtable_del(Uns_GC gc, struct uns_ctr *htbl,
2024-06-21 16:49:55 -04:00
unsigned char *str, size_t len,
struct uns_ctr *old_value)
{
size_t hash = fnv(str, len) % uns_hashtable_len(gc, htbl);
struct uns_ctr rec = {0};
struct uns_ctr prevptr = {0};
old_value->p = NULL;
rec.p = index(gc, htbl, hash);
if (!rec.p) {
return;
}
/* Special case: first pointer */
if (len == get_size_t(gc, &rec, LENGTH) &&
memcmp(str, uns_get(gc, rec.p, STRING, NULL), len) == 0) {
old_value->p = uns_get(gc, rec.p, VALUE, NULL);
uns_set(gc, htbl->p, HTBL_HDR_LEN + hash, UNS_POINTER,
uns_get(gc, rec.p, NEXT, NULL));
2024-06-21 16:49:55 -04:00
}
prevptr = rec;
for (rec.p = uns_get(gc, rec.p, NEXT, NULL); rec.p;
rec.p = uns_get(gc, rec.p, NEXT, NULL)) {
2024-06-21 16:49:55 -04:00
if (len == get_size_t(gc, &rec, LENGTH)
&& memcmp(str, uns_get(gc, rec.p, VALUE, NULL), len) == 0) {
old_value->p = uns_get(gc, rec.p, VALUE, NULL);
uns_set(gc, prevptr.p, UNS_POINTER, NEXT,
uns_get(gc, rec.p, NEXT, NULL));
2024-06-21 16:49:55 -04:00
return;
}
}
}
void uns_hashtable_search(Uns_GC gc, struct uns_ctr *htbl,
2024-06-13 20:57:07 -04:00
unsigned char *str, size_t string_len,
struct uns_ctr *found)
{
int r;
r = search_for_container(gc, htbl, found, str, string_len, NULL);
if (r)
found->p = uns_get(gc, found->p, VALUE, NULL);
2024-06-13 20:57:07 -04:00
else
found->p = NULL;
}
void uns_hashtable_add(Uns_GC gc, struct uns_ctr *htbl,
2024-06-13 20:57:07 -04:00
struct uns_ctr *string, size_t string_len,
struct uns_ctr *value, struct uns_ctr *old_value)
{
struct uns_ctr container = {0};
struct uns_ctr next_container = {0};
size_t hash;
if (uns_hashtable_used(gc, htbl) >= uns_hashtable_len(gc, htbl)*7/10)
uns_hashtable_resize(gc, htbl, uns_hashtable_len(gc, htbl)*2);
if (search_for_container(gc, htbl, &container, (unsigned char *)string->p, string_len, &hash)) {
old_value->p = uns_get(gc, container.p, VALUE, NULL);
uns_set(gc, container.p, VALUE, UNS_POINTER, value->p);
2024-06-13 20:57:07 -04:00
return;
}
set_size_t(gc, htbl, HTBL_USED_PTR,
uns_hashtable_used(gc, htbl) + 1);
old_value->p = NULL;
uns_root_add(gc, &container);
uns_root_add(gc, &next_container);
alloc_holder(gc, &next_container, string, value, string_len);
if (container.p) {
uns_set(gc, container.p, NEXT, UNS_POINTER, next_container.p);
2024-06-13 20:57:07 -04:00
} else {
uns_set(gc, htbl->p, hash + HTBL_HDR_LEN, UNS_POINTER, next_container.p);
2024-06-13 20:57:07 -04:00
}
uns_root_remove(gc, &next_container);
uns_root_remove(gc, &container);
}
static void add_for_ent(Uns_GC gc, struct uns_ctr *htbl, void *ctrp)
2024-06-13 20:57:07 -04:00
{
struct uns_ctr ctr = {0};
struct uns_ctr str = {0};
size_t str_len;
struct uns_ctr value = {0};
struct uns_ctr old_value_should_never_happen = {0};
if (!ctrp)
return;
ctr.p = ctrp;
uns_root_add(gc, &ctr);
uns_root_add(gc, &str);
uns_root_add(gc, &value);
uns_root_add(gc, &old_value_should_never_happen);
str.p = uns_get(gc, ctr.p, STRING, NULL);
value.p = uns_get(gc, ctr.p, VALUE, NULL);
2024-06-13 20:57:07 -04:00
str_len = get_size_t(gc, &ctr, LENGTH);
uns_hashtable_add(gc, htbl, &str, str_len, &value,
&old_value_should_never_happen);
assert(!old_value_should_never_happen.p);
uns_root_remove(gc, &old_value_should_never_happen);
uns_root_remove(gc, &value);
uns_root_remove(gc, &str);
uns_root_remove(gc, &ctr);
ctrp = uns_get(gc, ctr.p, NEXT, NULL);
2024-06-13 20:57:07 -04:00
add_for_ent(gc, htbl, ctrp);
}
void uns_hashtable_resize(Uns_GC gc, struct uns_ctr *htbl, size_t newsize)
2024-06-13 20:57:07 -04:00
{
struct uns_ctr newtbl = {0};
size_t i;
size_t oldlen = uns_hashtable_len(gc, htbl);
uns_root_add(gc, &newtbl);
uns_hashtable_alloc_into(gc, &newtbl, newsize);
set_size_t(gc, &newtbl, HTBL_USED_PTR,
uns_hashtable_used(gc, &newtbl));
for (i = 0; i < oldlen; i++)
2024-06-21 16:49:55 -04:00
add_for_ent(gc, &newtbl, index(gc, htbl, i));
2024-06-13 20:57:07 -04:00
htbl->p = newtbl.p;
uns_root_remove(gc, &newtbl);
}