hashtable test
This commit is contained in:
parent
503b08caad
commit
3d7e39cfd2
|
@ -0,0 +1,20 @@
|
|||
.PHONY: test clean
|
||||
TESTS=test_insert.test
|
||||
COMMON_OBJS=../test_common.o uns_hashtable.o
|
||||
.SUFFIXES: .test
|
||||
CFLAGS=-Wall -std=c89 -Werror -pedantic -fPIC -g -Iinclude
|
||||
|
||||
test: $(TESTS) $(COMMON_OBJS)
|
||||
for i in $(TESTS); do \
|
||||
LD_LIBRARY_PATH=$$(pwd)/../../ valgrind ./$$i || exit 1; \
|
||||
done
|
||||
|
||||
test_insert.test: $(COMMON_OBJS)
|
||||
|
||||
.c.test:
|
||||
$(CC) -I../../include $(CFLAGS) $< $(COMMON_OBJS) -L../../ -luniversalservice -o $@
|
||||
.c.o:
|
||||
$(CC) -I../../include $(CFLAGS) $< -c -o $@
|
||||
|
||||
clean:
|
||||
rm -f $(TESTS) $(COMMON_OBJS)
|
|
@ -0,0 +1,87 @@
|
|||
/* Copyright (c) 2024, Peter McGoron
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1) Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2) Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include "uns.h"
|
||||
#include "uns_hashtable.h"
|
||||
#include "testnames.h"
|
||||
|
||||
#define ARRLEN(a) (sizeof(a) /sizeof((a)[0]))
|
||||
|
||||
int test(struct uns_gc *gc)
|
||||
{
|
||||
/* Keys and values will be stored as NUL terminated for simplicity. */
|
||||
struct uns_ctr htbl = {0};
|
||||
struct uns_ctr str = {0};
|
||||
struct uns_ctr val = {0};
|
||||
struct uns_ctr old_value = {0};
|
||||
int i, j;
|
||||
size_t slen;
|
||||
|
||||
uns_root_add(gc, &htbl);
|
||||
uns_root_add(gc, &str);
|
||||
uns_root_add(gc, &val);
|
||||
uns_root_add(gc, &old_value);
|
||||
|
||||
uns_hashtable_alloc_into(gc, &htbl, 32);
|
||||
|
||||
for (i = 0; i < ARRLEN(names); i++) {
|
||||
printf("%d\r", i);
|
||||
fflush(stdout);
|
||||
|
||||
slen = strlen(names[i]);
|
||||
str.p = gc->alloc(gc, slen);
|
||||
val.p = gc->alloc(gc, strlen(values[i]));
|
||||
strcpy(str.p, names[i]);
|
||||
strcpy(val.p, values[i]);
|
||||
|
||||
uns_hashtable_add(gc, &htbl, &str, slen, &val, &old_value);
|
||||
assert(!old_value.p);
|
||||
|
||||
for (j = 0; j < ARRLEN(names); j++) {
|
||||
uns_hashtable_search(gc, &htbl, (unsigned char*)names[j], strlen(names[j]),
|
||||
&val);
|
||||
if (j <= i && (!val.p || strcmp(val.p, values[j]) != 0)) {
|
||||
printf("\n%s, %s\n", (char *)val.p, values[j]);
|
||||
printf("\n%d-%d failed\n", i, j);
|
||||
exit(1);
|
||||
} else if (j > i && val.p) {
|
||||
printf("\n%d: %d should not exist\n", i, j);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
printf("\ndone\n");
|
||||
|
||||
uns_root_remove(gc, &old_value);
|
||||
uns_root_remove(gc, &val);
|
||||
uns_root_remove(gc, &str);
|
||||
uns_root_remove(gc, &htbl);
|
||||
gc->collect(gc);
|
||||
return 0;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,254 @@
|
|||
/* Copyright (c) 2024, Peter McGoron
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1) Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2) Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include "uns.h"
|
||||
#include "uns_hashtable.h"
|
||||
|
||||
enum {
|
||||
HTBL_LEN_PTR,
|
||||
HTBL_USED_PTR,
|
||||
HTBL_HDR_LEN
|
||||
};
|
||||
|
||||
static size_t get_size_t(struct uns_gc *gc, struct uns_ctr *root, size_t i)
|
||||
{
|
||||
size_t r;
|
||||
|
||||
void *p = gc->record_get_ptr(root->p, i);
|
||||
memcpy(&r, p, sizeof(r));
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static void set_size_t(struct uns_gc *gc, struct uns_ctr *root, size_t i, size_t val)
|
||||
{
|
||||
void *p = gc->record_get_ptr(root->p, i);
|
||||
memcpy(p, &val, sizeof(val));
|
||||
}
|
||||
|
||||
void uns_hashtable_alloc_into(struct uns_gc *gc, struct uns_ctr *r, size_t size)
|
||||
{
|
||||
unsigned char *p;
|
||||
size_t i;
|
||||
r->p = gc->alloc_record(gc, size + HTBL_HDR_LEN);
|
||||
|
||||
p = gc->alloc(gc, sizeof(size_t));
|
||||
gc->record_set_ptr(r->p, HTBL_LEN_PTR, p);
|
||||
set_size_t(gc, r, HTBL_LEN_PTR, size);
|
||||
|
||||
p = gc->alloc(gc, sizeof(size_t));
|
||||
gc->record_set_ptr(r->p, HTBL_USED_PTR, p);
|
||||
set_size_t(gc, r, HTBL_USED_PTR, 0);
|
||||
|
||||
/* Set entries to zero. */
|
||||
for (i = HTBL_HDR_LEN; i <= size; i++)
|
||||
gc->record_set_ptr(r->p, i, NULL);
|
||||
}
|
||||
|
||||
size_t uns_hashtable_len(struct uns_gc *gc, struct uns_ctr *htbl)
|
||||
{
|
||||
return get_size_t(gc, htbl, HTBL_LEN_PTR);
|
||||
}
|
||||
|
||||
size_t uns_hashtable_used(struct uns_gc *gc, struct uns_ctr *htbl)
|
||||
{
|
||||
return get_size_t(gc, htbl, HTBL_USED_PTR);
|
||||
}
|
||||
|
||||
enum {
|
||||
LENGTH,
|
||||
STRING,
|
||||
VALUE,
|
||||
NEXT,
|
||||
HTBL_ENTRY_LEN
|
||||
};
|
||||
|
||||
static void alloc_holder(struct uns_gc *gc, struct uns_ctr *r, struct uns_ctr *string,
|
||||
struct uns_ctr *value, size_t len)
|
||||
{
|
||||
unsigned char *alloc_ptr;
|
||||
r->p = gc->alloc_record(gc, HTBL_ENTRY_LEN);
|
||||
|
||||
alloc_ptr = gc->alloc(gc, sizeof(size_t));
|
||||
memcpy(alloc_ptr, &len, sizeof(size_t));
|
||||
|
||||
gc->record_set_ptr(r->p, LENGTH, alloc_ptr);
|
||||
gc->record_set_ptr(r->p, STRING, string->p);
|
||||
gc->record_set_ptr(r->p, VALUE, value->p);
|
||||
gc->record_set_ptr(r->p, NEXT, NULL);
|
||||
}
|
||||
|
||||
static unsigned long fnv(unsigned char *p, size_t len)
|
||||
{
|
||||
unsigned long hash = 0x811c9dc5;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
hash ^= p[i];
|
||||
hash *= 16777619;
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
static int search_container(struct uns_gc *gc, struct uns_ctr *cur_ctr,
|
||||
unsigned char *str, size_t len)
|
||||
{
|
||||
unsigned char *ctr_str = gc->record_get_ptr(cur_ctr->p, STRING);
|
||||
size_t ctr_str_len = get_size_t(gc, cur_ctr, LENGTH);
|
||||
void *next;
|
||||
|
||||
if (ctr_str_len != len || memcmp(ctr_str, str, len) != 0) {
|
||||
/* strings do not match */
|
||||
next = gc->record_get_ptr(cur_ctr->p, NEXT);
|
||||
if (!next)
|
||||
return 0;
|
||||
cur_ctr->p = next;
|
||||
return search_container(gc, cur_ctr, str, len);
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
static int search_for_container(struct uns_gc *gc, struct uns_ctr *htbl,
|
||||
struct uns_ctr *container,
|
||||
unsigned char *str, size_t len, size_t *hash_out)
|
||||
{
|
||||
size_t htbl_len = uns_hashtable_len(gc, htbl);
|
||||
size_t hash = fnv(str, len) % htbl_len;
|
||||
if (hash_out)
|
||||
*hash_out = hash;
|
||||
|
||||
container->p = gc->record_get_ptr(htbl->p, hash + HTBL_HDR_LEN);
|
||||
if (!container->p)
|
||||
return 0;
|
||||
return search_container(gc, container, str, len);
|
||||
}
|
||||
|
||||
void uns_hashtable_search(struct uns_gc *gc, struct uns_ctr *htbl,
|
||||
unsigned char *str, size_t string_len,
|
||||
struct uns_ctr *found)
|
||||
{
|
||||
int r;
|
||||
|
||||
r = search_for_container(gc, htbl, found, str, string_len, NULL);
|
||||
if (r)
|
||||
found->p = gc->record_get_ptr(found->p, VALUE);
|
||||
else
|
||||
found->p = NULL;
|
||||
}
|
||||
|
||||
void uns_hashtable_add(struct uns_gc *gc, struct uns_ctr *htbl,
|
||||
struct uns_ctr *string, size_t string_len,
|
||||
struct uns_ctr *value, struct uns_ctr *old_value)
|
||||
{
|
||||
struct uns_ctr container = {0};
|
||||
struct uns_ctr next_container = {0};
|
||||
size_t hash;
|
||||
|
||||
if (uns_hashtable_used(gc, htbl) >= uns_hashtable_len(gc, htbl)*7/10)
|
||||
uns_hashtable_resize(gc, htbl, uns_hashtable_len(gc, htbl)*2);
|
||||
|
||||
if (search_for_container(gc, htbl, &container, string->p, string_len, &hash)) {
|
||||
old_value->p = gc->record_get_ptr(container.p, VALUE);
|
||||
gc->record_set_ptr(container.p, VALUE, value->p);
|
||||
return;
|
||||
}
|
||||
|
||||
set_size_t(gc, htbl, HTBL_USED_PTR,
|
||||
uns_hashtable_used(gc, htbl) + 1);
|
||||
old_value->p = NULL;
|
||||
uns_root_add(gc, &container);
|
||||
uns_root_add(gc, &next_container);
|
||||
|
||||
alloc_holder(gc, &next_container, string, value, string_len);
|
||||
|
||||
if (container.p) {
|
||||
gc->record_set_ptr(container.p, NEXT, next_container.p);
|
||||
} else {
|
||||
gc->record_set_ptr(htbl->p, hash + HTBL_HDR_LEN, next_container.p);
|
||||
}
|
||||
|
||||
uns_root_remove(gc, &next_container);
|
||||
uns_root_remove(gc, &container);
|
||||
}
|
||||
|
||||
/* TODO: add delete */
|
||||
|
||||
static void add_for_ent(struct uns_gc *gc, struct uns_ctr *htbl, void *ctrp)
|
||||
{
|
||||
struct uns_ctr ctr = {0};
|
||||
struct uns_ctr str = {0};
|
||||
size_t str_len;
|
||||
struct uns_ctr value = {0};
|
||||
struct uns_ctr old_value_should_never_happen = {0};
|
||||
|
||||
if (!ctrp)
|
||||
return;
|
||||
|
||||
ctr.p = ctrp;
|
||||
uns_root_add(gc, &ctr);
|
||||
uns_root_add(gc, &str);
|
||||
uns_root_add(gc, &value);
|
||||
uns_root_add(gc, &old_value_should_never_happen);
|
||||
|
||||
str.p = gc->record_get_ptr(ctr.p, STRING);
|
||||
value.p = gc->record_get_ptr(ctr.p, VALUE);
|
||||
str_len = get_size_t(gc, &ctr, LENGTH);
|
||||
|
||||
uns_hashtable_add(gc, htbl, &str, str_len, &value,
|
||||
&old_value_should_never_happen);
|
||||
assert(!old_value_should_never_happen.p);
|
||||
|
||||
uns_root_remove(gc, &old_value_should_never_happen);
|
||||
uns_root_remove(gc, &value);
|
||||
uns_root_remove(gc, &str);
|
||||
uns_root_remove(gc, &ctr);
|
||||
|
||||
ctrp = gc->record_get_ptr(ctr.p, NEXT);
|
||||
add_for_ent(gc, htbl, ctrp);
|
||||
}
|
||||
|
||||
void uns_hashtable_resize(struct uns_gc *gc, struct uns_ctr *htbl, size_t newsize)
|
||||
{
|
||||
struct uns_ctr newtbl = {0};
|
||||
size_t i;
|
||||
size_t oldlen = uns_hashtable_len(gc, htbl);
|
||||
|
||||
uns_root_add(gc, &newtbl);
|
||||
|
||||
uns_hashtable_alloc_into(gc, &newtbl, newsize);
|
||||
set_size_t(gc, &newtbl, HTBL_USED_PTR,
|
||||
uns_hashtable_used(gc, &newtbl));
|
||||
|
||||
for (i = 0; i < oldlen; i++)
|
||||
add_for_ent(gc, &newtbl, gc->record_get_ptr(htbl->p, i + HTBL_HDR_LEN));
|
||||
|
||||
htbl->p = newtbl.p;
|
||||
uns_root_remove(gc, &newtbl);
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
#ifndef UNS_STRING_H
|
||||
#define UNS_STRING_H
|
||||
|
||||
/* Copyright (c) 2024, Peter McGoron
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1) Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2) Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "uns.h"
|
||||
|
||||
/* Imperative hash tables with internal chaining.
|
||||
*
|
||||
* Internal chaining is inefficient, but it is a good showcase of garbage
|
||||
* collection.
|
||||
*
|
||||
* Hashtable:
|
||||
* [size][used][pointers to entries]
|
||||
* NULL or |/---------------------------------------------\
|
||||
* [pointer to length][pointer to string][pointer to next] /
|
||||
* | NULL or /
|
||||
* \-----------/
|
||||
*/
|
||||
|
||||
void uns_hashtable_search(struct uns_gc *gc, struct uns_ctr *htbl,
|
||||
unsigned char *str, size_t string_len,
|
||||
struct uns_ctr *found);
|
||||
size_t uns_hashtable_used(struct uns_gc *gc, struct uns_ctr *htbl);
|
||||
size_t uns_hashtable_len(struct uns_gc *gc, struct uns_ctr *htbl);
|
||||
/*
|
||||
void uns_hashtable_del(struct uns_gc *gc, struct uns_ctr *htbl,
|
||||
unsigned char *str, size_t string_len,
|
||||
struct uns_ctr *old_value);
|
||||
*/
|
||||
|
||||
/* The following functions may collect. */
|
||||
void uns_hashtable_alloc_into(struct uns_gc *gc, struct uns_ctr *r, size_t size);
|
||||
void uns_hashtable_add(struct uns_gc *gc, struct uns_ctr *htbl,
|
||||
struct uns_ctr *string, size_t string_len,
|
||||
struct uns_ctr *value, struct uns_ctr *old_value);
|
||||
void uns_hashtable_resize(struct uns_gc *gc, struct uns_ctr *htbl, size_t newsize);
|
||||
|
||||
#endif
|
|
@ -1,6 +1,6 @@
|
|||
.PHONY: test clean
|
||||
TESTS=test_small.test test_inefficient.test test_large.test
|
||||
COMMON_OBJS=test_common.o uns_string.o
|
||||
COMMON_OBJS=../test_common.o uns_string.o
|
||||
.SUFFIXES: .test
|
||||
CFLAGS=-Wall -Wno-overlength-strings -std=c89 -Werror -pedantic -fPIC -g -Iinclude
|
||||
|
||||
|
|
Loading…
Reference in New Issue