/* Copyright (c) 2024, Peter McGoron * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1) Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include "uns.h" #include "uns_hashtable.h" enum { HTBL_LEN_PTR, HTBL_USED_PTR, HTBL_HDR_LEN }; static size_t get_size_t(struct uns_gc *gc, struct uns_ctr *root, size_t i) { size_t r; void *p = gc->record_get_ptr(root->p, i); memcpy(&r, p, sizeof(r)); return r; } static void set_size_t(struct uns_gc *gc, struct uns_ctr *root, size_t i, size_t val) { void *p = gc->record_get_ptr(root->p, i); memcpy(p, &val, sizeof(val)); } static void *index(struct uns_gc *gc, struct uns_ctr *htbl, size_t i) { return gc->record_get_ptr(htbl->p, i + HTBL_HDR_LEN); } void uns_hashtable_alloc_into(struct uns_gc *gc, struct uns_ctr *r, size_t size) { unsigned char *p; size_t i; r->p = gc->alloc_record(gc, size + HTBL_HDR_LEN); p = gc->alloc(gc, sizeof(size_t)); gc->record_set_ptr(r->p, HTBL_LEN_PTR, p); set_size_t(gc, r, HTBL_LEN_PTR, size); p = gc->alloc(gc, sizeof(size_t)); gc->record_set_ptr(r->p, HTBL_USED_PTR, p); set_size_t(gc, r, HTBL_USED_PTR, 0); /* Set entries to zero. */ for (i = HTBL_HDR_LEN; i <= size; i++) gc->record_set_ptr(r->p, i, NULL); } size_t uns_hashtable_len(struct uns_gc *gc, struct uns_ctr *htbl) { return get_size_t(gc, htbl, HTBL_LEN_PTR); } size_t uns_hashtable_used(struct uns_gc *gc, struct uns_ctr *htbl) { return get_size_t(gc, htbl, HTBL_USED_PTR); } enum { LENGTH, STRING, VALUE, NEXT, HTBL_ENTRY_LEN }; static void alloc_holder(struct uns_gc *gc, struct uns_ctr *r, struct uns_ctr *string, struct uns_ctr *value, size_t len) { unsigned char *alloc_ptr; r->p = gc->alloc_record(gc, HTBL_ENTRY_LEN); alloc_ptr = gc->alloc(gc, sizeof(size_t)); memcpy(alloc_ptr, &len, sizeof(size_t)); gc->record_set_ptr(r->p, LENGTH, alloc_ptr); gc->record_set_ptr(r->p, STRING, string->p); gc->record_set_ptr(r->p, VALUE, value->p); gc->record_set_ptr(r->p, NEXT, NULL); } static unsigned long fnv(unsigned char *p, size_t len) { unsigned long hash = 0x811c9dc5; size_t i; for (i = 0; i < len; i++) { hash ^= p[i]; hash *= 16777619; } return hash; } static int search_container(struct uns_gc *gc, struct uns_ctr *cur_ctr, unsigned char *str, size_t len) { unsigned char *ctr_str = gc->record_get_ptr(cur_ctr->p, STRING); size_t ctr_str_len = get_size_t(gc, cur_ctr, LENGTH); void *next; if (ctr_str_len != len || memcmp(ctr_str, str, len) != 0) { /* strings do not match */ next = gc->record_get_ptr(cur_ctr->p, NEXT); if (!next) return 0; cur_ctr->p = next; return search_container(gc, cur_ctr, str, len); } else { return 1; } } static int search_for_container(struct uns_gc *gc, struct uns_ctr *htbl, struct uns_ctr *container, unsigned char *str, size_t len, size_t *hash_out) { size_t htbl_len = uns_hashtable_len(gc, htbl); size_t hash = fnv(str, len) % htbl_len; if (hash_out) *hash_out = hash; container->p = index(gc, htbl, hash); if (!container->p) return 0; return search_container(gc, container, str, len); } /* This could be made better with internal pointers, but that is not * portable. */ void uns_hashtable_del(struct uns_gc *gc, struct uns_ctr *htbl, unsigned char *str, size_t len, struct uns_ctr *old_value) { size_t hash = fnv(str, len) % uns_hashtable_len(gc, htbl); struct uns_ctr rec = {0}; struct uns_ctr prevptr = {0}; old_value->p = NULL; rec.p = index(gc, htbl, hash); if (!rec.p) { return; } /* Special case: first pointer */ if (len == get_size_t(gc, &rec, LENGTH) && memcmp(str, gc->record_get_ptr(rec.p, STRING), len) == 0) { old_value->p = gc->record_get_ptr(rec.p, VALUE); gc->record_set_ptr(htbl->p, HTBL_HDR_LEN + hash, gc->record_get_ptr(rec.p, NEXT)); } prevptr = rec; for (rec.p = gc->record_get_ptr(rec.p, NEXT); rec.p; rec.p = gc->record_get_ptr(rec.p, NEXT)) { if (len == get_size_t(gc, &rec, LENGTH) && memcmp(str, gc->record_get_ptr(rec.p, VALUE), len) == 0) { old_value->p = gc->record_get_ptr(rec.p, VALUE); gc->record_set_ptr(prevptr.p, NEXT, gc->record_get_ptr(rec.p, NEXT)); return; } } } void uns_hashtable_search(struct uns_gc *gc, struct uns_ctr *htbl, unsigned char *str, size_t string_len, struct uns_ctr *found) { int r; r = search_for_container(gc, htbl, found, str, string_len, NULL); if (r) found->p = gc->record_get_ptr(found->p, VALUE); else found->p = NULL; } void uns_hashtable_add(struct uns_gc *gc, struct uns_ctr *htbl, struct uns_ctr *string, size_t string_len, struct uns_ctr *value, struct uns_ctr *old_value) { struct uns_ctr container = {0}; struct uns_ctr next_container = {0}; size_t hash; if (uns_hashtable_used(gc, htbl) >= uns_hashtable_len(gc, htbl)*7/10) uns_hashtable_resize(gc, htbl, uns_hashtable_len(gc, htbl)*2); if (search_for_container(gc, htbl, &container, (unsigned char *)string->p, string_len, &hash)) { old_value->p = gc->record_get_ptr(container.p, VALUE); gc->record_set_ptr(container.p, VALUE, value->p); return; } set_size_t(gc, htbl, HTBL_USED_PTR, uns_hashtable_used(gc, htbl) + 1); old_value->p = NULL; uns_root_add(gc, &container); uns_root_add(gc, &next_container); alloc_holder(gc, &next_container, string, value, string_len); if (container.p) { gc->record_set_ptr(container.p, NEXT, next_container.p); } else { gc->record_set_ptr(htbl->p, hash + HTBL_HDR_LEN, next_container.p); } uns_root_remove(gc, &next_container); uns_root_remove(gc, &container); } /* TODO: add delete */ static void add_for_ent(struct uns_gc *gc, struct uns_ctr *htbl, void *ctrp) { struct uns_ctr ctr = {0}; struct uns_ctr str = {0}; size_t str_len; struct uns_ctr value = {0}; struct uns_ctr old_value_should_never_happen = {0}; if (!ctrp) return; ctr.p = ctrp; uns_root_add(gc, &ctr); uns_root_add(gc, &str); uns_root_add(gc, &value); uns_root_add(gc, &old_value_should_never_happen); str.p = gc->record_get_ptr(ctr.p, STRING); value.p = gc->record_get_ptr(ctr.p, VALUE); str_len = get_size_t(gc, &ctr, LENGTH); uns_hashtable_add(gc, htbl, &str, str_len, &value, &old_value_should_never_happen); assert(!old_value_should_never_happen.p); uns_root_remove(gc, &old_value_should_never_happen); uns_root_remove(gc, &value); uns_root_remove(gc, &str); uns_root_remove(gc, &ctr); ctrp = gc->record_get_ptr(ctr.p, NEXT); add_for_ent(gc, htbl, ctrp); } void uns_hashtable_resize(struct uns_gc *gc, struct uns_ctr *htbl, size_t newsize) { struct uns_ctr newtbl = {0}; size_t i; size_t oldlen = uns_hashtable_len(gc, htbl); uns_root_add(gc, &newtbl); uns_hashtable_alloc_into(gc, &newtbl, newsize); set_size_t(gc, &newtbl, HTBL_USED_PTR, uns_hashtable_used(gc, &newtbl)); for (i = 0; i < oldlen; i++) add_for_ent(gc, &newtbl, index(gc, htbl, i)); htbl->p = newtbl.p; uns_root_remove(gc, &newtbl); }