diff --git a/Makefile b/Makefile index 7935619..7440a9d 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ .SUFFIXES: .c .o .test CC=cc -CFLAGS=-Wall -std=c89 -Werror -pedantic -fPIC -g -Iinclude +CFLAGS=-Wall -std=c89 -Werror -pedantic -fPIC -g -Iinclude -I. tests: string_tests htable_tests @@ -36,6 +36,12 @@ examples/hashtable/uns_hashtable.o: examples/hashtable/uns_hashtable.c \ include/uns.h UNS_HASHTABLE_OBJS=examples/hashtable/uns_hashtable.o +examples/lisp/uns_lisp.o: include/uns.h \ + examples/lisp/uns_lisp.c \ + examples/string/uns_string.h + +UNS_LISP_OBJS=examples/lisp/uns_lisp.o examples/string/uns_string.o + EXAMPLE_OBJS=${UNS_STRING_OBJS} ${UNS_HASHTABLE_OBJS} ## Clean diff --git a/README.rst b/README.rst index ba606a6..0944439 100644 --- a/README.rst +++ b/README.rst @@ -12,12 +12,12 @@ Terminology ----------- Generally speaking, ``UNS_WORD`` must be an integer that can be converted -to and from a pointer to data. ``UNS_SWORD`` is the signed version of -``UNS_WORD``. Both must be integer types. +to and from a pointer to data (such as ``uintptr_t``). ``UNS_SWORD`` is the +signed version of ``UNS_WORD``. Both must be integer types. In collectors where this conversion cannot be assumed (like C89 collectors) -or not possible, then ``UNS_WORD`` should be a type that can be used to -index any arrays (like ``size_t``) and ``UNS_SWORD`` is like ``ssize_t``. +or is not possible, then ``UNS_WORD`` should be a type that can be used to +index any arrays (like ``size_t``). A "region" denotes a block of memory in the heap. The "header" of a region is a hidden area of the region that holds information about @@ -86,5 +86,4 @@ Todo ---- * call before gc and after gc -* Make makefiles simpler and POSIX compliant * Address sanitizer, ub sanitizer if available diff --git a/examples/lisp/README.rst b/examples/lisp/README.rst new file mode 100644 index 0000000..76d6a21 --- /dev/null +++ b/examples/lisp/README.rst @@ -0,0 +1,8 @@ +====================== +Universal Service LISP +====================== + +A Scheme R5RS interpreter written C89 using Universal Service GCs. + +You shouldn't use it for real programming, but it will work on any C89 +compiler. diff --git a/examples/lisp/prelude.scm b/examples/lisp/prelude.scm new file mode 100644 index 0000000..b181e34 --- /dev/null +++ b/examples/lisp/prelude.scm @@ -0,0 +1,264 @@ +; Copyright (c) 2024, Peter McGoron +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; +; 1) Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; 2) Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in the +; documentation and/or other materials provided with the distribution. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +; Primitives are special syntatic forms. They differ from built-in +; functions. +; +; primitives: +; __lambda: lambda of one argument +; __define: define in the environment +; __define-macro: Like __define but for macros +; set! +; if +; +; Others: +; __uniapply: Apply function to single argument +; : A value that can only be passed to __lambda. It cannot +; be bound to a top-level, or "set!", or added to a list. +; It can also be the parameter of a __lambda, although +; it still cannot be used. +; +; Macros are functions that return an sexpr. + +(__define-macro let + (__lambda l ; car = args, cdr = body + (if (null? (car args)) + (cdr args) + ; (car args) = ((A a) (B b) ...) + ; (car (car args)) = (A a) + ; (cdr (car (car args))) = (a) + `((__lambda ,(car (car (car args))) + (let ,(cdr (car args)) ,@body)) + ,@(car (cdr (car (car args)))) + ) + ) + ) +) + +(__define-macro let* let) + +; if and __lambda only execute one statement. +(__define-macro begin body + (let ((first (car body)) + (rest (cdr body)) + ) + (if (null? rest) + first + `((__lambda (begin ,@rest)) ,first) + ) + ) +) + +;;; (cond (case1 expr1) (case2 expr2) ... (else exprelse)) +(__define-macro cond body + (let ((cases (car body))) + (if (null? body) + + (let* ((branch (car cases)) + (rest (cdr cases)) + (test (car branch)) + (to-exec (cdr branch)) + ) + (if (null? to-exec) + (let ((tmp (gensym))) + `(let ((,tmp ,test)) + (if ,tmp ,tmp (cond ,@rest)) + ) + ) + (if (eqv? test 'else) + (if (null? rest) + `(begin ,@rest) + (error "invalid else clause") + ) + (if (eqv? (car to-exec) '=>) + (let ((tmp (gensym)) + (fun (cdr to-exec)) + ) + `(let ((,tmp ,test)) + (if ,tmp (,fun ,tmp) (cond ,@rest)) + ) + ) + `(if ,tmp (begin ,@rest) (cond ,@rest)) + ) + ) + ) + ) + ) + ) +) + +(__define-macro __bindlambda + (__lambda l + (let ((larg (car l)) + (args (car (cdr l))) + (body (cdr (cdr l))) + ) + (if (null? args) + `(__lambda larg + (if (not (null? larg)) + (raise "incorrect number of arguments") + (begin ,@body) + ) + ) + (let* ((argval (cons args)) + (rest (cdr args)) + (arg (cons argval)) + (val (cons (cdr argval))) + ) + `(__lambda ,larg + (let ((,arg ,val)) __bindlambda ,larg ,rest ,@body) + ) + ) + ) + ) + ) +) + +(__define-macro lambda + (__lambda l + (let ((args (car l)) + (body (cdr l)) + ) + (if (symbol? args) + `(__lambda ,args (begin ,@body)) + (if (null? args) + `(__lambda (begin ,@body)) + (let ((larg (gensym))) + `(__bindlambda ,larg ,@body) + ) + ) + ) + ) + ) +) + +(__define-macro define-macro + (__lambda l + (let* ((name-and-args (car l)) + (name (car name-and-args)) + (args (cdr name-and-args)) + (body (cdr l)) + (tmpname (gensym)) + ) + `(__define-macro ,name + (lambda ,args ,@body) + ) + ) + ) +) + +(define-macro (and . body) + (if (null? body) + 1 + (let ((first (car body)) + (rest (cdr body)) + ) + `(if ,first (and ,@rest) 0) + ) + ) +) + +(define-macro (or . body) + (if (null? body) + 0 + (let ((first (car body)) + (rest (cdr body)) + ) + `(if ,first 1 (or ,@rest)) + ) + ) +) + +(define-macro (letrec args . body) + (if (null? args) + body + (let* ((argval (car args)) + (arg (car argval)) + (val (car (cdr argval))) + (rest (cdr args)) + ) + `(let ((,arg )) + (letrec ,rest (begin (set! ,arg ,val) ,@body)) + ) + ) + ) +) + +(define-macro (define name . body) + (if (symbol? name) + `(__define ,name ,@body) + (let ((fname (car name)) + (args (cdr name)) + (tmparg (gensym)) + ) + `(__define ,fname + (lambda ,tmparg + (letrec ((,fname (lambda ,args ,@body))) + (,fname . ,tmparg) + ) + ) + ) + ) + ) +) + +;;;;;;;;;;;;;; Standard Library ;;;;;;;;;;;;;;;;;;; + +;; for a list of (v1 v2 ... vn) +;; runs (f vn (... (f v2 (f v1 start)))) +(define (foldl f start l) + (letrec + ((loop + (lambda (ret-list cur-list) + (if (eqv? value '()) + value + (loop (f (car cur-list) value) + (cdr cur-list)) + ) + ) + )) + (loop start l) + ) +) + +(define (reverse l) (foldl cons '() l)) + +;; for a list of (v1 v2 ... vn) +;; runs (f v1 (f v2 (... (f vn start)))) +(define (foldr f start l) (foldl f start (reverse l))) + +(define (append . l) + (foldr (lambda (to-prepend collected) + (foldr cons collected to-prepend) + ) + '() + l + ) +) + +(define (apply f . l) (__uniapply f (__uniapply append l))) +(define (list . l) l) + +;; (define (+ . l) (foldl __bin+ 0 l)) +;; (define (* . l) (foldl __bin* 1 l)) diff --git a/examples/lisp/uns_lisp.c b/examples/lisp/uns_lisp.c new file mode 100644 index 0000000..5e89716 --- /dev/null +++ b/examples/lisp/uns_lisp.c @@ -0,0 +1,921 @@ +/* Copyright (c) 2024, Peter McGoron + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1) Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2) Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include "uns.h" +#include "examples/string/uns_string.h" + +static Uns_GC gc; +static struct uns_ctr empty_list; + +enum token_type { + T_EOF, + LPAREN, + RPAREN, + QUOTE, + QUASIQUOTE, + UNQUOTE, + UNQUOTE_LIST, + T_IDENT, + T_INT, + T_FLOAT, + T_STRING, + T_DOT, + TOKEN_NUM +}; + +static const char *token2string_repr[TOKEN_NUM] = { + "'EOF", + "(", + ")", + "quote", + "quasiquote", + "unquote", + "unquote-list", + "'ident", + "'number", + "'float", + "'string-tok", + "'dot" +}; + +struct location { + unsigned long line; + unsigned long offset; +}; + +struct file { + FILE *f; + struct location loc; +}; + +struct token { + enum token_type typ; + struct uns_ctr dat; + + double f; + long i; +}; + +static int is_ws(int c) +{ + return c == '\n' || c == '\t' || c == '\r' || c == '\v' || c == ' '; +} + +static int get(struct file *f) +{ + int c = getc(f->f); + + if (c == '\n') { + f->loc.line++; + f->loc.offset = 0; + } else { + f->loc.offset += 1; + } + + return c; +} + +static void unget(int c, struct file *f) +{ + ungetc(c, f->f); +} + +static int get_skipws(struct file *input) +{ + int c; + + for (;;) { + c = get(input); + if (c == ';') { + do { c = get(input); } while (c != '\n'); + unget(c, input); + continue; + } + + if (is_ws(c)) + continue; + return c; + } +} + +static void tok_string(struct file *input, struct token *tok) +{ + int c; + + tok->typ = T_STRING; + uns_string_alloc(gc, &tok->dat, 32); + for (;;) { + c = get(input); + switch (c) { + case '\\': + c = get(input); + switch (c) { + case ' ': case '\t': case '\r': case '\n': case '\v': + do { + c = get(input); + } while (is_ws(c)); + unget(c, input); + continue; + case 'r': + uns_string_append_char(gc, &tok->dat, '\r'); + continue; + case 'n': + uns_string_append_char(gc, &tok->dat, '\n'); + continue; + case 'v': + uns_string_append_char(gc, &tok->dat, '\v'); + continue; + case 't': + uns_string_append_char(gc, &tok->dat, '\t'); + continue; + case '"': + uns_string_append_char(gc, &tok->dat, '"'); + continue; + default: + uns_string_append_char(gc, &tok->dat, '\\'); + uns_string_append_char(gc, &tok->dat, c); + } + case '"': /* " */ + return; + } + } +} + +static int tonum(int c) +{ + switch (c) { + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + return c - '0'; + default: + return -1; + } +} + +static void tok_num(struct file *input, struct token *tok, int c) +{ + int is_float = 0; + + uns_string_alloc(gc, &tok->dat, 32); + do { + if (c == '.' || c == 'e' || c == 'E' || c == '-' || c == '+') + is_float = 1; + uns_string_append_char(gc, &tok->dat, c); + c = get(input); + } while (tonum(c) >= 0 || c == '.' || c == 'e' || c == 'E' || c == '-' || c == '+'); + unget(c, input); + + if (is_float) { + tok->typ = T_FLOAT; + tok->f = strtod(uns_string_cstring(gc, &tok->dat), NULL); + } else { + tok->typ = T_INT; + tok->i = strtol(uns_string_cstring(gc, &tok->dat), NULL, 10); + } + + tok->dat.p = NULL; +} + +static int part_of_ident(int c) +{ + return !is_ws(c) && c != '(' && c != ')' && c != '`' + && c != '\'' && c != '\\' && c != '\"' && c != ',' + && c != ';'; +} + +static void tok_ident(struct file *input, struct token *tok, int c) +{ + tok->typ = T_IDENT; + + uns_string_alloc(gc, &tok->dat, 32); + do { + uns_string_append_char(gc, &tok->dat, c); + c = get(input); + } while (part_of_ident(c)); + unget(c, input); +} + +static void tokenize(struct file *input, struct token *tok) +{ + int c = get_skipws(input); + int c2; + + tok->dat.p = NULL; + + switch (c) { + case EOF: + tok->typ = T_EOF; + return; + case '(': + tok->typ = LPAREN; + return; + case ')': + tok->typ = RPAREN; + return; + case '\'': + tok->typ = QUOTE; + return; + case '`': + tok->typ = QUASIQUOTE; + return; + case ',': + c = get(input); + if (c == '@') { + tok->typ = UNQUOTE_LIST; + } else { + unget(c, input); + tok->typ = UNQUOTE; + } + return; + case '"': + tok_string(input, tok); + return; + tok->typ = T_STRING; + return; + case '+': case '-': + c2 = get(input); + unget(c2, input); + if (tonum(c2) >= 0) { + tok_num(input, tok, c); + return; + } + /* FALLTHROUGH */ + default: + if (c == '.') { + c2 = get(input); + if (tonum(c2) >= 0) { + unget(c2, input); + tok_num(input, tok, c); + } else if (part_of_ident(c2)) { + unget(c2, input); + tok_ident(input, tok, c); + } else { + tok->typ = T_DOT; + } + } else if (tonum(c) >= 0) { + tok_num(input, tok, c); + } else { + tok_ident(input, tok, c); + } + return; + } +} + +enum item_type { + CELL, + LAMBDA, + INTEGER, + FLOAT, + STRING, + SYMBOL, + EMPTY_LIST, + LISP_NULL +}; + +/* TODO: if the allocator allows for weak references, do a lookup of + * the symbol in the tree, and if it exists, use that instead of the + * token. + * + * This check should always be ignored on collectors without weak + * references. + */ +static void alloc_of_type(struct uns_ctr *ctr, int typ) +{ + void *p; + int fields; + int i; + + switch (typ) { + case CELL: fields = 2; break; + case LAMBDA: fields = 2; break; + case INTEGER: fields = 1; break; + case STRING: fields = 1; break; + case SYMBOL: fields = 1; break; + case FLOAT: fields = 1; break; + case EMPTY_LIST: fields = 0; break; + case LISP_NULL: fields = 0; break; + default: abort(); + } + + ctr->p = uns_alloc_rec(gc, fields + 1, 0); + p = uns_alloc(gc, sizeof(int), 0); + memcpy(p, &typ, sizeof(int)); + uns_set(gc, ctr->p, 0, UNS_POINTER, p); + + for (i = 0; i < fields; i++) + uns_set(gc, ctr->p, i + 1, UNS_POINTER, NULL); +} + +static int get_type(Uns_ptr p) +{ + int typ; + void *innerp; + + if (!p) + return LISP_NULL; + innerp = uns_get(gc, p, 0, NULL); + + memcpy(&typ, innerp, sizeof(int)); + return typ; +} + +static void alloc_symbol_from_cstring(struct uns_ctr *ctr, const char *s, size_t slen) +{ + struct uns_ctr str = {0}; + + uns_root_add(gc, &str); + uns_string_alloc(gc, &str, slen); + uns_string_append_bytes(gc, &str, s, slen); + + alloc_of_type(ctr, SYMBOL); + uns_set(gc, ctr->p, 1, UNS_POINTER, str.p); + + uns_root_remove(gc, &str); +} + +enum expr_stack_state { + EXPR_STACK_INITIAL, + EXPR_STACK_QUOTELIKE, + EXPR_STACK_SURROUND_OTHER, + EXPR_STACK_START_LIST, + EXPR_STACK_IN_LIST, + EXPR_STACK_IMPROPER_LIST, + EXPR_STACK_IMPROPER_LIST_END, + EXPR_STACK_INVALID +}; + +enum expr_stack_fields { + EXPR_FIELD_NEXT, + EXPR_FIELD_PTR, + EXPR_FIELD_STATE, + EXPR_FIELD_NUM +}; + +static void expr_stack_push(struct uns_ctr *stack, struct uns_ctr *loc, enum expr_stack_state state) +{ + struct uns_ctr tmp = {0}; + + uns_root_add(gc, &tmp); + tmp.p = stack->p; + stack->p = uns_alloc_rec(gc, EXPR_FIELD_NUM, 0); + uns_set(gc, stack->p, EXPR_FIELD_NEXT, UNS_POINTER, tmp.p); + uns_root_remove(gc, &tmp); + + uns_set(gc, stack->p, EXPR_FIELD_PTR, UNS_POINTER, loc->p); + + tmp.p = uns_alloc(gc, sizeof(state), 0); + memcpy(tmp.p, &state, sizeof(state)); + uns_set(gc, stack->p, EXPR_FIELD_STATE, UNS_POINTER, tmp.p); +} + +static enum expr_stack_state expr_stack_state(struct uns_ctr *stack) +{ + enum expr_stack_state r; + if (!stack->p) + return EXPR_STACK_INVALID; + memcpy(&r, uns_get(gc, stack->p, EXPR_FIELD_STATE, NULL), + sizeof(r)); + return r; +} + +static void expr_stack_change_state(struct uns_ctr *stack, enum expr_stack_state newst) +{ + memcpy(uns_get(gc, stack->p, EXPR_FIELD_STATE, NULL), + &newst, + sizeof(newst)); +} + +static void expr_stack_ctr(struct uns_ctr *stack, struct uns_ctr *loc) +{ + loc->p = uns_get(gc, stack->p, EXPR_FIELD_PTR, NULL); +} + +static int expr_stack_pop(struct uns_ctr *stack) +{ + if (!stack->p) + return 0; + stack->p = uns_get(gc, stack->p, EXPR_FIELD_NEXT, NULL); + return 1; +} + +enum parser_return { + EXPR_PARSE_OK, + EXPR_PARSE_EOF, + EXPR_PARSE_INCOMPLETE, + EXPR_PARSE_EXCESS_RPAREN, + EXPR_PARSE_INVALID_EMPTY_LIST, + EXPR_PARSE_IMPROPER_LIST_OVERFLOW, + EXPR_PARSE_BAD_IMPROPER_LIST, + EXPR_PARSE_BAD_STACK_START, + EXPR_PARSE_BAD_QUOTE, + EXPR_PARSE_INTERNAL_ERROR +}; + +static int tok_to_type(enum token_type tokt) +{ + switch (tokt) { + case T_IDENT: return SYMBOL; + case T_STRING: return STRING; + default: return -1; + } +} + +/* Parse expressions using an explicit stack. + * This is feasible to to hand-written and has the advantage that error + * recovery is much easier than recursion (implicit stack). + * + * The stack consists of a LIFO: + * [previous value][container][parser state] + * + * The implementation stores containers on the stack and when a container + * is complete or an atom is read, it is stored in "expr" and the stack + * is modified. + */ +static enum parser_return expr_parse(struct file *input, struct uns_ctr *expr) +{ + struct uns_ctr stack = {0}; + struct uns_ctr loc = {0}; + struct uns_ctr new_cell = {0}; + struct token tok = {0}; + int store = 0; + enum parser_return r = EXPR_PARSE_OK; + + uns_root_add(gc, &stack); + uns_root_add(gc, &loc); + uns_root_add(gc, &tok.dat); + uns_root_add(gc, &new_cell); + + /* Allocate expr->p to be a single pointer. */ + expr->p = uns_alloc_rec(gc, 1, 0); + + /* Initialize the stack with the final result. */ + expr_stack_push(&stack, expr, EXPR_STACK_INITIAL); + + while (stack.p) { + tokenize(input, &tok); + + switch (tok.typ) { + case LPAREN: + /* Push a new cell onto the stack and parse it later. */ + alloc_of_type(expr, CELL); + expr_stack_push(&stack, expr, EXPR_STACK_START_LIST); + break; + case QUOTE: case QUASIQUOTE: case UNQUOTE: case UNQUOTE_LIST: + /* push (token-name ) onto the stack, and have + * it point to the first cell. + */ + alloc_of_type(&loc, CELL); + alloc_symbol_from_cstring(expr, token2string_repr[tok.typ], + strlen(token2string_repr[tok.typ])); + uns_set(gc, loc.p, 1, UNS_POINTER, expr->p); + + alloc_of_type(expr, CELL); + uns_set(gc, loc.p, 2, UNS_POINTER, expr->p); + uns_set(gc, expr->p, 2, UNS_POINTER, empty_list.p); + + if (tok.typ == QUOTE || tok.typ == QUASIQUOTE) + expr_stack_push(&stack, &loc, EXPR_STACK_QUOTELIKE); + else + expr_stack_push(&stack, &loc, EXPR_STACK_SURROUND_OTHER); + break; + + /* All atoms are allocated into "expr" and will be stored in the + * container after the switch. + */ + case T_IDENT: case T_STRING: + alloc_of_type(expr, tok_to_type(tok.typ)); + uns_set(gc, expr->p, 1, UNS_POINTER, tok.dat.p); + store = 1; + break; + case T_INT: + alloc_of_type(expr, INTEGER); + loc.p = uns_alloc(gc, sizeof(tok.i), 0); + memcpy(loc.p, &tok.i, sizeof(tok.i)); + uns_set(gc, expr->p, 1, UNS_POINTER, loc.p); + store = 1; + break; + case T_FLOAT: + alloc_of_type(expr, FLOAT); + loc.p = uns_alloc(gc, sizeof(tok.f), 0); + memcpy(loc.p, &tok.f, sizeof(tok.f)); + uns_set(gc, expr->p, 1, UNS_POINTER, loc.p); + store = 1; + break; + + case RPAREN: + switch (expr_stack_state(&stack)) { + case EXPR_STACK_START_LIST: + /* Empty list found. Pop the cell and discard it. + * If we are not in a "surround" state, report error. + * Replace the surround state with an empty list. + */ + expr_stack_pop(&stack); + if (!stack.p) { + r = EXPR_PARSE_INTERNAL_ERROR; + goto end; + } + + if (expr_stack_state(&stack) != EXPR_STACK_QUOTELIKE) { + r = EXPR_PARSE_INVALID_EMPTY_LIST; + goto end; + } + + /* Discard quote/quasiquote and store the empty list instead. */ + if (!expr_stack_pop(&stack)) { + r = EXPR_PARSE_INTERNAL_ERROR; + goto end; + } + + expr->p = empty_list.p; + store = 1; + break; + case EXPR_STACK_IN_LIST: + /* In a list, there are two things on the stack: + * first cell and last cell. + * Place the empty list in the cdr of the last cell, + * and then discard the last cell. Put the first cell into expr + * and store it in the container in the stack before it. + */ + expr_stack_ctr(&stack, &loc); + if (get_type(loc.p) != CELL) { + r = EXPR_PARSE_INTERNAL_ERROR; + goto end; + } + + uns_set(gc, loc.p, 2, UNS_POINTER, empty_list.p); + /* FALLTHROUGH */ + case EXPR_STACK_IMPROPER_LIST_END: + /* CDR of the list has been filled. Pop the last cell. */ + expr_stack_pop(&stack); + if (!stack.p) { + r = EXPR_PARSE_INTERNAL_ERROR; + goto end; + } + + /* Put the first cell in expr, and read it into the next item + * of the stack. + */ + expr_stack_ctr(&stack, expr); + expr_stack_pop(&stack); + store = 1; + break; + case EXPR_STACK_INITIAL: + /* Expression is not balanced. */ + r = EXPR_PARSE_EXCESS_RPAREN; + goto end; + case EXPR_STACK_IMPROPER_LIST: + /* (values .) is an invalid expression. */ + r = EXPR_PARSE_BAD_IMPROPER_LIST; + goto end; + case EXPR_STACK_QUOTELIKE: case EXPR_STACK_SURROUND_OTHER: + /* ,) ') etc are invalid */ + r = EXPR_PARSE_BAD_QUOTE; + goto end; + case EXPR_STACK_INVALID: + r = EXPR_PARSE_INTERNAL_ERROR; + goto end; + } + break; + case T_DOT: + if (expr_stack_state(&stack) != EXPR_STACK_IN_LIST) { + r = EXPR_PARSE_BAD_IMPROPER_LIST; + goto end; + } + + expr_stack_change_state(&stack, EXPR_STACK_IMPROPER_LIST); + break; + case T_EOF: + /* If EOF is encountered in the middle of an expression, repor + * that. Otherwise, the parser reads nothing. + */ + if (expr_stack_state(&stack) != EXPR_STACK_INITIAL) { + r = EXPR_PARSE_INCOMPLETE; + goto end; + } + r = EXPR_PARSE_EOF; + goto end; + case TOKEN_NUM: + r = EXPR_PARSE_INTERNAL_ERROR; + goto end; + } + + /* If store is true, that means "expr" contains a value to + * be placed into the container at the top of the stack. + * + * This is a loop because some cases need to cascade values up the + * stack without acting on a token. When the loop stops, further + * action requires a new token to be read. + */ + while (store) switch (expr_stack_state(&stack)) { + case EXPR_STACK_INVALID: + r = EXPR_PARSE_INTERNAL_ERROR; + goto end; + case EXPR_STACK_START_LIST: + /* The current thing on the stack is the first part of the + * list. Add the expression to car of the cell, and add + * push a pointer to the cell. This is the last cell in + * the list. + */ + expr_stack_ctr(&stack, &loc); + if (get_type(loc.p) != CELL) { + r = EXPR_PARSE_INTERNAL_ERROR; + goto end; + } + + uns_set(gc, loc.p, 1, UNS_POINTER, expr->p); + expr_stack_push(&stack, &loc, EXPR_STACK_IN_LIST); + store = 0; + break; + case EXPR_STACK_IN_LIST: + /* The pointer on the top of the stack is the end of the + * list. Add a new list to it's cdr, and put expr into + * the car of the new end of the list. + */ + expr_stack_ctr(&stack, &loc); + if (get_type(loc.p) != CELL) { + r = EXPR_PARSE_INTERNAL_ERROR; + goto end; + } + + alloc_of_type(&new_cell, CELL); + uns_set(gc, loc.p, 2, UNS_POINTER, new_cell.p); + uns_set(gc, new_cell.p, 1, UNS_POINTER, expr->p); + expr_stack_pop(&stack); + expr_stack_push(&stack, &new_cell, EXPR_STACK_IN_LIST); + store = 0; + break; + case EXPR_STACK_IMPROPER_LIST: + /* Add the expression to the cdr of the last element. + * Set the state to expect the end of an improper list. + */ + expr_stack_ctr(&stack, &loc); + uns_set(gc, loc.p, 2, UNS_POINTER, expr->p); + if (get_type(loc.p) != CELL) { + r = EXPR_PARSE_INTERNAL_ERROR; + goto end; + } + + expr_stack_change_state(&stack, EXPR_STACK_IMPROPER_LIST_END); + store = 0; + break; + case EXPR_STACK_IMPROPER_LIST_END: + r = EXPR_PARSE_IMPROPER_LIST_OVERFLOW; + goto end; + case EXPR_STACK_QUOTELIKE: case EXPR_STACK_SURROUND_OTHER: + /* Expression at the top of the stack is + * (surrouding-thing ) + * Replace (car (cdr (top-of-stack)) with expr. + */ + expr_stack_ctr(&stack, &loc); + if (get_type(loc.p) != CELL) { + r = EXPR_PARSE_INTERNAL_ERROR; + goto end; + } + + loc.p = uns_get(gc, loc.p, 2, NULL); + if (get_type(loc.p) != CELL) { + r = EXPR_PARSE_INTERNAL_ERROR; + goto end; + } + + uns_set(gc, loc.p, 1, UNS_POINTER, expr->p); + + /* Get the head of the quoted value again. */ + expr_stack_ctr(&stack, expr); + expr_stack_pop(&stack); + + /* This value needs to be stored somewhere. Continue looping. */ + store = 1; + break; + case EXPR_STACK_INITIAL: + /* Finished parsing an expression at toplevel. */ + expr_stack_ctr(&stack, &loc); + uns_set(gc, loc.p, 0, UNS_POINTER, expr->p); + + /* Prepare for return. */ + expr->p = loc.p; + expr_stack_pop(&stack); + if (stack.p) { + r = EXPR_PARSE_BAD_STACK_START; + goto end; + } + store = 0; + break; + } + } + +end: + uns_root_remove(gc, &stack); + uns_root_remove(gc, &loc); + uns_root_remove(gc, &tok.dat); + uns_root_remove(gc, &new_cell); + return r; +} + +static void oom(Uns_GC gc_) +{ + (void)gc_; + printf("oom\n"); + abort(); +} + +/* TODO: Make UNS_Lisp its own library and move this out. */ +extern Uns_GC uns_lisp_gc_init(void); +static void init(void) +{ + gc = uns_lisp_gc_init(); + + uns_set_oom(gc, oom); + uns_root_add(gc, &empty_list); + alloc_of_type(&empty_list, EMPTY_LIST); +} + +static void display(struct uns_ctr *ctr, long indent) +{ + struct uns_ctr tmp = {0}; + long l; + double f; + + if (!ctr->p) { + printf(""); + return; + } + + switch (get_type(ctr->p)) { + case CELL: + uns_root_add(gc, &tmp); + + printf("("); + tmp.p = uns_get(gc, ctr->p, 1, NULL); + display(&tmp, indent); + + ctr->p = uns_get(gc, ctr->p, 2, NULL); + while (get_type(ctr->p) == CELL) { + tmp.p = uns_get(gc, ctr->p, 1, NULL); + if (get_type(tmp.p) == CELL) { + printf("\n"); + for (l = 0; l < indent; l++) + printf(" "); + display(&tmp, indent + 1); + } else { + printf(" "); + display(&tmp, indent); + } + + ctr->p = uns_get(gc, ctr->p, 2, NULL); + } + + switch (get_type(ctr->p)) { + case EMPTY_LIST: + printf(")"); + break; + default: + printf(" . "); + display(ctr, indent); + printf(")"); + break; + } + + uns_root_remove(gc, &tmp); + return; + case INTEGER: + memcpy(&l, uns_get(gc, ctr->p, 1, NULL), sizeof(long)); + printf("%ld", l); + return; + case FLOAT: + memcpy(&f, uns_get(gc, ctr->p, 1, NULL), sizeof(double)); + printf("%f", f); + return; + case STRING: + tmp.p = uns_get(gc, ctr->p, 1, NULL); + uns_root_add(gc, &tmp); + printf("\"%s\"", uns_string_cstring(gc, &tmp)); + uns_root_remove(gc, &tmp); + return; + case SYMBOL: + tmp.p = uns_get(gc, ctr->p, 1, NULL); + uns_root_add(gc, &tmp); + printf("%s", uns_string_cstring(gc, &tmp)); + uns_root_remove(gc, &tmp); + return; + case EMPTY_LIST: + printf("'()"); + return; + default: + abort(); + } +} + +/* Contination passing style. + * Continuation passing IR uses explicit statements instead of + * shorthand (i.e. (__pass K atom) instead of (K atom)). + * + * CPS primitives: + * (__pass k e): Pass e to continuation k. e must be an atom. + * (__apply f l k): Pass the argument list "l" to "f", and pass + * the result to "k". "f" must be a "__klambda". + * (__konstruct e body): Construct a continuation that takes a + * single argument. This is shortened to "\e body" below. + * (__klambda l k body): Constructs a function that takes a single + * value along with a continuation. + * (__primitive-> args...) The primitive, but the last argument is a + * continuation. + + * cps{atom, K} = (__pass K atom) + * cps{(__lambda l E), K} = (__pass K (__klambda l k cps{E,k})) + + * cps{(F A1 ... AN), K} = + * cps{F, \f cps_list{'(), (A1 ... AN), \l (__apply f l K)}} + * cps_list{L, '(), K} = (__pass K L) + * cps_list{L, (A . B), K} = + * cps{A, \a cps_list{append{L,a}, B, K}} + + * cps{(if E B1 B2), K} = + * cps{E, \e (__if* e (\K' cps{B1,K'}) (\K' cps{B1,K'}) K)} + * + * cps{(__define symb E}, K} = cps{E, \e (__define-> symb e K)} + * cps{(set! symb E), K} = cps{E, \e (__set!-> symb e K)} + */ + +static void error(struct location *loc, const char *emsg) +{ + fprintf(stderr, "%ld:%ld: ", loc->line, loc->offset); + fprintf(stderr, "%s\n", emsg); +} + +int main(void) +{ + struct uns_ctr expr = {0}; + struct file input = {0}; + input.loc.line = 1; + + init(); + uns_root_add(gc, &expr); + input.f = stdin; + + while (!feof(input.f)) { + expr.p = NULL; + switch (expr_parse(&input, &expr)) { + case EXPR_PARSE_OK: + expr.p = uns_get(gc, expr.p, 0, NULL); + display(&expr, 1); + printf("\n"); + break; + case EXPR_PARSE_INCOMPLETE: + error(&input.loc, "EOF before expression was finished"); + break; + case EXPR_PARSE_EXCESS_RPAREN: + error(&input.loc, "Unbalanced parentheses"); + break; + case EXPR_PARSE_INVALID_EMPTY_LIST: + error(&input.loc, "Invalid syntax for empty list (either '() or `'())"); + break; + case EXPR_PARSE_BAD_QUOTE: + error(&input.loc, "Invalid syntax for quotes/unquotes"); + break; + case EXPR_PARSE_IMPROPER_LIST_OVERFLOW: + error(&input.loc, "too many values at end of improper list (must be exactly one)"); + case EXPR_PARSE_BAD_IMPROPER_LIST: + error(&input.loc, "Bad syntax for improper list (must be (value . value))"); + break; + case EXPR_PARSE_INTERNAL_ERROR: + error(&input.loc, "Bug in implementation\n"); + break; + case EXPR_PARSE_BAD_STACK_START: + error(&input.loc, "Bug: bottom of stack not correct\n"); + break; + case EXPR_PARSE_EOF: + error(&input.loc, "EOF\n"); + break; + } + } + + uns_root_remove(gc, &expr); + uns_deinit(gc); + return 0; +} diff --git a/examples/lisp/uns_lisp_cheney_c89.c b/examples/lisp/uns_lisp_cheney_c89.c new file mode 100644 index 0000000..96b49a9 --- /dev/null +++ b/examples/lisp/uns_lisp_cheney_c89.c @@ -0,0 +1,60 @@ +/* Copyright (c) 2024, Peter McGoron + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1) Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2) Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "uns.h" +#include "cheney_c89.h" + +static void after_gc(Uns_GC gc, struct uns_cheney_c89_statistics *stats) +{ + fprintf(stderr, + "cheney_c89 %ld: %lu -> %lu\n", + stats->collection_number, + stats->usage_before, + stats->usage_after + ); + + + if (stats->usage_after >= stats->usage_before * 7/10) { + uns_cheney_c89_set_new_heap_size(gc, + uns_cheney_c89_get_new_heap_size(gc) * 2); + } +} + + +Uns_GC uns_lisp_gc_init(void) +{ + Uns_GC gc = malloc(uns_gc_size); + if (!uns_cheney_c89_init(gc, 512)) { + fprintf(stderr, "Error initializing GC\n"); + exit(1); + } + + uns_cheney_c89_set_collect_callback(gc, after_gc); + uns_cheney_c89_set_new_heap_size(gc, 1024); + + return gc; +} diff --git a/gen_tests.sh b/gen_tests.sh index bccad2c..16f1419 100755 --- a/gen_tests.sh +++ b/gen_tests.sh @@ -1,29 +1,35 @@ #!/bin/sh # The road to hell is paved with Makefile generators. +# +# Each test is compiled statically linked with a single collector. +# +# OBJDEPS: Dependencies for an object file. +# DEPS: Dependencies for an executable file. TEST_TARGETS="" GENERATED_OBJS="" -gen_test() { - local NAME COLLECTOR TEST_IMPL DEPS OBJDEPS +gen_test() { # test_name, collector_name, exec_deps, obj_file_deps + local NAME COLLECTOR TARGET TEST_IMPL DEPS OBJDEPS NAME=$1 COLLECTOR=$2 + TARGET="$NAME"_"$COLLECTOR".test TEST_IMPL="$NAME.c" DEPS="$1.o uns.o $3" OBJDEPS="include/uns.h $1.c $4" - printf '%s.o: %s\n' "$NAME" "$OBJDEPS" - printf '%s_%s.test: %s\n' "$NAME" "$COLLECTOR" "$DEPS" - printf '\t${CC} ${LDFLAGS} %s -o %s_%s.test\n' \ - "$DEPS" "$NAME" "$COLLECTOR" - printf '\tvalgrind ./%s_%s.test\n' "$NAME" "$COLLECTOR" + echo " +$TARGET: $DEPS + \${CC} \${LDFLAGS} $DEPS $TARGET + ./valgrind ./$TARGET +" - TEST_TARGETS=$(printf "%s %s_%s.test" "$TEST_TARGETS" "$NAME" "$COLLECTOR") + TEST_TARGETS="$TEST_TARGETS $TARGET" GENERATED_OBJS="$GENERATED_OBJS $NAME.o" } -gen_string_test() { +gen_string_test() { # collector_name, exec_deps local COLLECTOR DEPS OBJDEPS COLLECTOR=$1 @@ -40,7 +46,7 @@ gen_string_test() { "$DEPS" "$OBJDEPS" } -gen_hashtable_test() { +gen_hashtable_test() { # collector_name, exec_deps local COLLECTOR DEPS OBJDEPS COLLECTOR=$1 @@ -54,14 +60,36 @@ gen_hashtable_test() { "$DEPS" "$OBJDEPS" } +gen_lisp_test() { #collector_name, exec_deps + local COLLECTOR SHIM_OBJ TARGET DEPS OBJDEPS + COLLECTOR=$1 + TARGET="examples/lisp/uns_lisp_$COLLECTOR" + SHIM_OBJ="$TARGET.o" + OBJDEPS="examples/lisp/uns_lisp_$COLLECTOR.c include/uns.h include/$COLLECTOR.h" + DEPS="$2 $SHIM_OBJ uns.o "'${UNS_LISP_OBJS}' + + echo " +$SHIM_OBJ: $OBJDEPS + +$TARGET: $DEPS + \${CC} \${LDFLAGS} $DEPS -o $TARGET +" + + TEST_TARGETS="$TEST_TARGETS $TARGET" + GENERTATED_OBJS="$GENERATED_OBJS $SHIM_OBJ" +} + gen_tests() { echo "examples/test_$1.o: include/uns.h $2" GENERATED_OBJS="$GENERATED_OBJS examples/test_$1.o" + gen_string_test "$1" '${CHENEY_C89_OBJS}' gen_hashtable_test "$1" '${CHENEY_C89_OBJS}' + gen_lisp_test "$1" '${CHENEY_C89_OBJS}' } gen_tests cheney_c89 "include/cheney_c89.h" +echo printf "tests: %s\n" "$TEST_TARGETS" printf 'clean_tests:\n\t${RM} -f %s %s\n' "$TEST_TARGETS" "$GENERATED_OBJS"