import flatrate lisp, rename to Universal Service LISP

This commit is contained in:
Peter McGoron 2024-07-10 10:47:55 -04:00
parent a406b64975
commit b41e2100c2
7 changed files with 1302 additions and 16 deletions

View file

@ -4,7 +4,7 @@
.SUFFIXES: .c .o .test
CC=cc
CFLAGS=-Wall -std=c89 -Werror -pedantic -fPIC -g -Iinclude
CFLAGS=-Wall -std=c89 -Werror -pedantic -fPIC -g -Iinclude -I.
tests: string_tests htable_tests
@ -36,6 +36,12 @@ examples/hashtable/uns_hashtable.o: examples/hashtable/uns_hashtable.c \
include/uns.h
UNS_HASHTABLE_OBJS=examples/hashtable/uns_hashtable.o
examples/lisp/uns_lisp.o: include/uns.h \
examples/lisp/uns_lisp.c \
examples/string/uns_string.h
UNS_LISP_OBJS=examples/lisp/uns_lisp.o examples/string/uns_string.o
EXAMPLE_OBJS=${UNS_STRING_OBJS} ${UNS_HASHTABLE_OBJS}
## Clean

View file

@ -12,12 +12,12 @@ Terminology
-----------
Generally speaking, ``UNS_WORD`` must be an integer that can be converted
to and from a pointer to data. ``UNS_SWORD`` is the signed version of
``UNS_WORD``. Both must be integer types.
to and from a pointer to data (such as ``uintptr_t``). ``UNS_SWORD`` is the
signed version of ``UNS_WORD``. Both must be integer types.
In collectors where this conversion cannot be assumed (like C89 collectors)
or not possible, then ``UNS_WORD`` should be a type that can be used to
index any arrays (like ``size_t``) and ``UNS_SWORD`` is like ``ssize_t``.
or is not possible, then ``UNS_WORD`` should be a type that can be used to
index any arrays (like ``size_t``).
A "region" denotes a block of memory in the heap. The "header" of a
region is a hidden area of the region that holds information about
@ -86,5 +86,4 @@ Todo
----
* call before gc and after gc
* Make makefiles simpler and POSIX compliant
* Address sanitizer, ub sanitizer if available

8
examples/lisp/README.rst Normal file
View file

@ -0,0 +1,8 @@
======================
Universal Service LISP
======================
A Scheme R5RS interpreter written C89 using Universal Service GCs.
You shouldn't use it for real programming, but it will work on any C89
compiler.

264
examples/lisp/prelude.scm Normal file
View file

@ -0,0 +1,264 @@
; Copyright (c) 2024, Peter McGoron
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
;
; 1) Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; 2) Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in the
; documentation and/or other materials provided with the distribution.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
; Primitives are special syntatic forms. They differ from built-in
; functions.
;
; primitives:
; __lambda: lambda of one argument
; __define: define in the environment
; __define-macro: Like __define but for macros
; set!
; if
;
; Others:
; __uniapply: Apply function to single argument
; <undefined>: A value that can only be passed to __lambda. It cannot
; be bound to a top-level, or "set!", or added to a list.
; It can also be the parameter of a __lambda, although
; it still cannot be used.
;
; Macros are functions that return an sexpr.
(__define-macro let
(__lambda l ; car = args, cdr = body
(if (null? (car args))
(cdr args)
; (car args) = ((A a) (B b) ...)
; (car (car args)) = (A a)
; (cdr (car (car args))) = (a)
`((__lambda ,(car (car (car args)))
(let ,(cdr (car args)) ,@body))
,@(car (cdr (car (car args))))
)
)
)
)
(__define-macro let* let)
; if and __lambda only execute one statement.
(__define-macro begin body
(let ((first (car body))
(rest (cdr body))
)
(if (null? rest)
first
`((__lambda <undefined> (begin ,@rest)) ,first)
)
)
)
;;; (cond (case1 expr1) (case2 expr2) ... (else exprelse))
(__define-macro cond body
(let ((cases (car body)))
(if (null? body)
<undefined>
(let* ((branch (car cases))
(rest (cdr cases))
(test (car branch))
(to-exec (cdr branch))
)
(if (null? to-exec)
(let ((tmp (gensym)))
`(let ((,tmp ,test))
(if ,tmp ,tmp (cond ,@rest))
)
)
(if (eqv? test 'else)
(if (null? rest)
`(begin ,@rest)
(error "invalid else clause")
)
(if (eqv? (car to-exec) '=>)
(let ((tmp (gensym))
(fun (cdr to-exec))
)
`(let ((,tmp ,test))
(if ,tmp (,fun ,tmp) (cond ,@rest))
)
)
`(if ,tmp (begin ,@rest) (cond ,@rest))
)
)
)
)
)
)
)
(__define-macro __bindlambda
(__lambda l
(let ((larg (car l))
(args (car (cdr l)))
(body (cdr (cdr l)))
)
(if (null? args)
`(__lambda larg
(if (not (null? larg))
(raise "incorrect number of arguments")
(begin ,@body)
)
)
(let* ((argval (cons args))
(rest (cdr args))
(arg (cons argval))
(val (cons (cdr argval)))
)
`(__lambda ,larg
(let ((,arg ,val)) __bindlambda ,larg ,rest ,@body)
)
)
)
)
)
)
(__define-macro lambda
(__lambda l
(let ((args (car l))
(body (cdr l))
)
(if (symbol? args)
`(__lambda ,args (begin ,@body))
(if (null? args)
`(__lambda <undefined> (begin ,@body))
(let ((larg (gensym)))
`(__bindlambda ,larg ,@body)
)
)
)
)
)
)
(__define-macro define-macro
(__lambda l
(let* ((name-and-args (car l))
(name (car name-and-args))
(args (cdr name-and-args))
(body (cdr l))
(tmpname (gensym))
)
`(__define-macro ,name
(lambda ,args ,@body)
)
)
)
)
(define-macro (and . body)
(if (null? body)
1
(let ((first (car body))
(rest (cdr body))
)
`(if ,first (and ,@rest) 0)
)
)
)
(define-macro (or . body)
(if (null? body)
0
(let ((first (car body))
(rest (cdr body))
)
`(if ,first 1 (or ,@rest))
)
)
)
(define-macro (letrec args . body)
(if (null? args)
body
(let* ((argval (car args))
(arg (car argval))
(val (car (cdr argval)))
(rest (cdr args))
)
`(let ((,arg <undefined>))
(letrec ,rest (begin (set! ,arg ,val) ,@body))
)
)
)
)
(define-macro (define name . body)
(if (symbol? name)
`(__define ,name ,@body)
(let ((fname (car name))
(args (cdr name))
(tmparg (gensym))
)
`(__define ,fname
(lambda ,tmparg
(letrec ((,fname (lambda ,args ,@body)))
(,fname . ,tmparg)
)
)
)
)
)
)
;;;;;;;;;;;;;; Standard Library ;;;;;;;;;;;;;;;;;;;
;; for a list of (v1 v2 ... vn)
;; runs (f vn (... (f v2 (f v1 start))))
(define (foldl f start l)
(letrec
((loop
(lambda (ret-list cur-list)
(if (eqv? value '())
value
(loop (f (car cur-list) value)
(cdr cur-list))
)
)
))
(loop start l)
)
)
(define (reverse l) (foldl cons '() l))
;; for a list of (v1 v2 ... vn)
;; runs (f v1 (f v2 (... (f vn start))))
(define (foldr f start l) (foldl f start (reverse l)))
(define (append . l)
(foldr (lambda (to-prepend collected)
(foldr cons collected to-prepend)
)
'()
l
)
)
(define (apply f . l) (__uniapply f (__uniapply append l)))
(define (list . l) l)
;; (define (+ . l) (foldl __bin+ 0 l))
;; (define (* . l) (foldl __bin* 1 l))

921
examples/lisp/uns_lisp.c Normal file
View file

@ -0,0 +1,921 @@
/* Copyright (c) 2024, Peter McGoron
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1) Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2) Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "uns.h"
#include "examples/string/uns_string.h"
static Uns_GC gc;
static struct uns_ctr empty_list;
enum token_type {
T_EOF,
LPAREN,
RPAREN,
QUOTE,
QUASIQUOTE,
UNQUOTE,
UNQUOTE_LIST,
T_IDENT,
T_INT,
T_FLOAT,
T_STRING,
T_DOT,
TOKEN_NUM
};
static const char *token2string_repr[TOKEN_NUM] = {
"'EOF",
"(",
")",
"quote",
"quasiquote",
"unquote",
"unquote-list",
"'ident",
"'number",
"'float",
"'string-tok",
"'dot"
};
struct location {
unsigned long line;
unsigned long offset;
};
struct file {
FILE *f;
struct location loc;
};
struct token {
enum token_type typ;
struct uns_ctr dat;
double f;
long i;
};
static int is_ws(int c)
{
return c == '\n' || c == '\t' || c == '\r' || c == '\v' || c == ' ';
}
static int get(struct file *f)
{
int c = getc(f->f);
if (c == '\n') {
f->loc.line++;
f->loc.offset = 0;
} else {
f->loc.offset += 1;
}
return c;
}
static void unget(int c, struct file *f)
{
ungetc(c, f->f);
}
static int get_skipws(struct file *input)
{
int c;
for (;;) {
c = get(input);
if (c == ';') {
do { c = get(input); } while (c != '\n');
unget(c, input);
continue;
}
if (is_ws(c))
continue;
return c;
}
}
static void tok_string(struct file *input, struct token *tok)
{
int c;
tok->typ = T_STRING;
uns_string_alloc(gc, &tok->dat, 32);
for (;;) {
c = get(input);
switch (c) {
case '\\':
c = get(input);
switch (c) {
case ' ': case '\t': case '\r': case '\n': case '\v':
do {
c = get(input);
} while (is_ws(c));
unget(c, input);
continue;
case 'r':
uns_string_append_char(gc, &tok->dat, '\r');
continue;
case 'n':
uns_string_append_char(gc, &tok->dat, '\n');
continue;
case 'v':
uns_string_append_char(gc, &tok->dat, '\v');
continue;
case 't':
uns_string_append_char(gc, &tok->dat, '\t');
continue;
case '"':
uns_string_append_char(gc, &tok->dat, '"');
continue;
default:
uns_string_append_char(gc, &tok->dat, '\\');
uns_string_append_char(gc, &tok->dat, c);
}
case '"': /* " */
return;
}
}
}
static int tonum(int c)
{
switch (c) {
case '0': case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9':
return c - '0';
default:
return -1;
}
}
static void tok_num(struct file *input, struct token *tok, int c)
{
int is_float = 0;
uns_string_alloc(gc, &tok->dat, 32);
do {
if (c == '.' || c == 'e' || c == 'E' || c == '-' || c == '+')
is_float = 1;
uns_string_append_char(gc, &tok->dat, c);
c = get(input);
} while (tonum(c) >= 0 || c == '.' || c == 'e' || c == 'E' || c == '-' || c == '+');
unget(c, input);
if (is_float) {
tok->typ = T_FLOAT;
tok->f = strtod(uns_string_cstring(gc, &tok->dat), NULL);
} else {
tok->typ = T_INT;
tok->i = strtol(uns_string_cstring(gc, &tok->dat), NULL, 10);
}
tok->dat.p = NULL;
}
static int part_of_ident(int c)
{
return !is_ws(c) && c != '(' && c != ')' && c != '`'
&& c != '\'' && c != '\\' && c != '\"' && c != ','
&& c != ';';
}
static void tok_ident(struct file *input, struct token *tok, int c)
{
tok->typ = T_IDENT;
uns_string_alloc(gc, &tok->dat, 32);
do {
uns_string_append_char(gc, &tok->dat, c);
c = get(input);
} while (part_of_ident(c));
unget(c, input);
}
static void tokenize(struct file *input, struct token *tok)
{
int c = get_skipws(input);
int c2;
tok->dat.p = NULL;
switch (c) {
case EOF:
tok->typ = T_EOF;
return;
case '(':
tok->typ = LPAREN;
return;
case ')':
tok->typ = RPAREN;
return;
case '\'':
tok->typ = QUOTE;
return;
case '`':
tok->typ = QUASIQUOTE;
return;
case ',':
c = get(input);
if (c == '@') {
tok->typ = UNQUOTE_LIST;
} else {
unget(c, input);
tok->typ = UNQUOTE;
}
return;
case '"':
tok_string(input, tok);
return;
tok->typ = T_STRING;
return;
case '+': case '-':
c2 = get(input);
unget(c2, input);
if (tonum(c2) >= 0) {
tok_num(input, tok, c);
return;
}
/* FALLTHROUGH */
default:
if (c == '.') {
c2 = get(input);
if (tonum(c2) >= 0) {
unget(c2, input);
tok_num(input, tok, c);
} else if (part_of_ident(c2)) {
unget(c2, input);
tok_ident(input, tok, c);
} else {
tok->typ = T_DOT;
}
} else if (tonum(c) >= 0) {
tok_num(input, tok, c);
} else {
tok_ident(input, tok, c);
}
return;
}
}
enum item_type {
CELL,
LAMBDA,
INTEGER,
FLOAT,
STRING,
SYMBOL,
EMPTY_LIST,
LISP_NULL
};
/* TODO: if the allocator allows for weak references, do a lookup of
* the symbol in the tree, and if it exists, use that instead of the
* token.
*
* This check should always be ignored on collectors without weak
* references.
*/
static void alloc_of_type(struct uns_ctr *ctr, int typ)
{
void *p;
int fields;
int i;
switch (typ) {
case CELL: fields = 2; break;
case LAMBDA: fields = 2; break;
case INTEGER: fields = 1; break;
case STRING: fields = 1; break;
case SYMBOL: fields = 1; break;
case FLOAT: fields = 1; break;
case EMPTY_LIST: fields = 0; break;
case LISP_NULL: fields = 0; break;
default: abort();
}
ctr->p = uns_alloc_rec(gc, fields + 1, 0);
p = uns_alloc(gc, sizeof(int), 0);
memcpy(p, &typ, sizeof(int));
uns_set(gc, ctr->p, 0, UNS_POINTER, p);
for (i = 0; i < fields; i++)
uns_set(gc, ctr->p, i + 1, UNS_POINTER, NULL);
}
static int get_type(Uns_ptr p)
{
int typ;
void *innerp;
if (!p)
return LISP_NULL;
innerp = uns_get(gc, p, 0, NULL);
memcpy(&typ, innerp, sizeof(int));
return typ;
}
static void alloc_symbol_from_cstring(struct uns_ctr *ctr, const char *s, size_t slen)
{
struct uns_ctr str = {0};
uns_root_add(gc, &str);
uns_string_alloc(gc, &str, slen);
uns_string_append_bytes(gc, &str, s, slen);
alloc_of_type(ctr, SYMBOL);
uns_set(gc, ctr->p, 1, UNS_POINTER, str.p);
uns_root_remove(gc, &str);
}
enum expr_stack_state {
EXPR_STACK_INITIAL,
EXPR_STACK_QUOTELIKE,
EXPR_STACK_SURROUND_OTHER,
EXPR_STACK_START_LIST,
EXPR_STACK_IN_LIST,
EXPR_STACK_IMPROPER_LIST,
EXPR_STACK_IMPROPER_LIST_END,
EXPR_STACK_INVALID
};
enum expr_stack_fields {
EXPR_FIELD_NEXT,
EXPR_FIELD_PTR,
EXPR_FIELD_STATE,
EXPR_FIELD_NUM
};
static void expr_stack_push(struct uns_ctr *stack, struct uns_ctr *loc, enum expr_stack_state state)
{
struct uns_ctr tmp = {0};
uns_root_add(gc, &tmp);
tmp.p = stack->p;
stack->p = uns_alloc_rec(gc, EXPR_FIELD_NUM, 0);
uns_set(gc, stack->p, EXPR_FIELD_NEXT, UNS_POINTER, tmp.p);
uns_root_remove(gc, &tmp);
uns_set(gc, stack->p, EXPR_FIELD_PTR, UNS_POINTER, loc->p);
tmp.p = uns_alloc(gc, sizeof(state), 0);
memcpy(tmp.p, &state, sizeof(state));
uns_set(gc, stack->p, EXPR_FIELD_STATE, UNS_POINTER, tmp.p);
}
static enum expr_stack_state expr_stack_state(struct uns_ctr *stack)
{
enum expr_stack_state r;
if (!stack->p)
return EXPR_STACK_INVALID;
memcpy(&r, uns_get(gc, stack->p, EXPR_FIELD_STATE, NULL),
sizeof(r));
return r;
}
static void expr_stack_change_state(struct uns_ctr *stack, enum expr_stack_state newst)
{
memcpy(uns_get(gc, stack->p, EXPR_FIELD_STATE, NULL),
&newst,
sizeof(newst));
}
static void expr_stack_ctr(struct uns_ctr *stack, struct uns_ctr *loc)
{
loc->p = uns_get(gc, stack->p, EXPR_FIELD_PTR, NULL);
}
static int expr_stack_pop(struct uns_ctr *stack)
{
if (!stack->p)
return 0;
stack->p = uns_get(gc, stack->p, EXPR_FIELD_NEXT, NULL);
return 1;
}
enum parser_return {
EXPR_PARSE_OK,
EXPR_PARSE_EOF,
EXPR_PARSE_INCOMPLETE,
EXPR_PARSE_EXCESS_RPAREN,
EXPR_PARSE_INVALID_EMPTY_LIST,
EXPR_PARSE_IMPROPER_LIST_OVERFLOW,
EXPR_PARSE_BAD_IMPROPER_LIST,
EXPR_PARSE_BAD_STACK_START,
EXPR_PARSE_BAD_QUOTE,
EXPR_PARSE_INTERNAL_ERROR
};
static int tok_to_type(enum token_type tokt)
{
switch (tokt) {
case T_IDENT: return SYMBOL;
case T_STRING: return STRING;
default: return -1;
}
}
/* Parse expressions using an explicit stack.
* This is feasible to to hand-written and has the advantage that error
* recovery is much easier than recursion (implicit stack).
*
* The stack consists of a LIFO:
* [previous value][container][parser state]
*
* The implementation stores containers on the stack and when a container
* is complete or an atom is read, it is stored in "expr" and the stack
* is modified.
*/
static enum parser_return expr_parse(struct file *input, struct uns_ctr *expr)
{
struct uns_ctr stack = {0};
struct uns_ctr loc = {0};
struct uns_ctr new_cell = {0};
struct token tok = {0};
int store = 0;
enum parser_return r = EXPR_PARSE_OK;
uns_root_add(gc, &stack);
uns_root_add(gc, &loc);
uns_root_add(gc, &tok.dat);
uns_root_add(gc, &new_cell);
/* Allocate expr->p to be a single pointer. */
expr->p = uns_alloc_rec(gc, 1, 0);
/* Initialize the stack with the final result. */
expr_stack_push(&stack, expr, EXPR_STACK_INITIAL);
while (stack.p) {
tokenize(input, &tok);
switch (tok.typ) {
case LPAREN:
/* Push a new cell onto the stack and parse it later. */
alloc_of_type(expr, CELL);
expr_stack_push(&stack, expr, EXPR_STACK_START_LIST);
break;
case QUOTE: case QUASIQUOTE: case UNQUOTE: case UNQUOTE_LIST:
/* push (token-name <undefined>) onto the stack, and have
* it point to the first cell.
*/
alloc_of_type(&loc, CELL);
alloc_symbol_from_cstring(expr, token2string_repr[tok.typ],
strlen(token2string_repr[tok.typ]));
uns_set(gc, loc.p, 1, UNS_POINTER, expr->p);
alloc_of_type(expr, CELL);
uns_set(gc, loc.p, 2, UNS_POINTER, expr->p);
uns_set(gc, expr->p, 2, UNS_POINTER, empty_list.p);
if (tok.typ == QUOTE || tok.typ == QUASIQUOTE)
expr_stack_push(&stack, &loc, EXPR_STACK_QUOTELIKE);
else
expr_stack_push(&stack, &loc, EXPR_STACK_SURROUND_OTHER);
break;
/* All atoms are allocated into "expr" and will be stored in the
* container after the switch.
*/
case T_IDENT: case T_STRING:
alloc_of_type(expr, tok_to_type(tok.typ));
uns_set(gc, expr->p, 1, UNS_POINTER, tok.dat.p);
store = 1;
break;
case T_INT:
alloc_of_type(expr, INTEGER);
loc.p = uns_alloc(gc, sizeof(tok.i), 0);
memcpy(loc.p, &tok.i, sizeof(tok.i));
uns_set(gc, expr->p, 1, UNS_POINTER, loc.p);
store = 1;
break;
case T_FLOAT:
alloc_of_type(expr, FLOAT);
loc.p = uns_alloc(gc, sizeof(tok.f), 0);
memcpy(loc.p, &tok.f, sizeof(tok.f));
uns_set(gc, expr->p, 1, UNS_POINTER, loc.p);
store = 1;
break;
case RPAREN:
switch (expr_stack_state(&stack)) {
case EXPR_STACK_START_LIST:
/* Empty list found. Pop the cell and discard it.
* If we are not in a "surround" state, report error.
* Replace the surround state with an empty list.
*/
expr_stack_pop(&stack);
if (!stack.p) {
r = EXPR_PARSE_INTERNAL_ERROR;
goto end;
}
if (expr_stack_state(&stack) != EXPR_STACK_QUOTELIKE) {
r = EXPR_PARSE_INVALID_EMPTY_LIST;
goto end;
}
/* Discard quote/quasiquote and store the empty list instead. */
if (!expr_stack_pop(&stack)) {
r = EXPR_PARSE_INTERNAL_ERROR;
goto end;
}
expr->p = empty_list.p;
store = 1;
break;
case EXPR_STACK_IN_LIST:
/* In a list, there are two things on the stack:
* first cell and last cell.
* Place the empty list in the cdr of the last cell,
* and then discard the last cell. Put the first cell into expr
* and store it in the container in the stack before it.
*/
expr_stack_ctr(&stack, &loc);
if (get_type(loc.p) != CELL) {
r = EXPR_PARSE_INTERNAL_ERROR;
goto end;
}
uns_set(gc, loc.p, 2, UNS_POINTER, empty_list.p);
/* FALLTHROUGH */
case EXPR_STACK_IMPROPER_LIST_END:
/* CDR of the list has been filled. Pop the last cell. */
expr_stack_pop(&stack);
if (!stack.p) {
r = EXPR_PARSE_INTERNAL_ERROR;
goto end;
}
/* Put the first cell in expr, and read it into the next item
* of the stack.
*/
expr_stack_ctr(&stack, expr);
expr_stack_pop(&stack);
store = 1;
break;
case EXPR_STACK_INITIAL:
/* Expression is not balanced. */
r = EXPR_PARSE_EXCESS_RPAREN;
goto end;
case EXPR_STACK_IMPROPER_LIST:
/* (values .) is an invalid expression. */
r = EXPR_PARSE_BAD_IMPROPER_LIST;
goto end;
case EXPR_STACK_QUOTELIKE: case EXPR_STACK_SURROUND_OTHER:
/* ,) ') etc are invalid */
r = EXPR_PARSE_BAD_QUOTE;
goto end;
case EXPR_STACK_INVALID:
r = EXPR_PARSE_INTERNAL_ERROR;
goto end;
}
break;
case T_DOT:
if (expr_stack_state(&stack) != EXPR_STACK_IN_LIST) {
r = EXPR_PARSE_BAD_IMPROPER_LIST;
goto end;
}
expr_stack_change_state(&stack, EXPR_STACK_IMPROPER_LIST);
break;
case T_EOF:
/* If EOF is encountered in the middle of an expression, repor
* that. Otherwise, the parser reads nothing.
*/
if (expr_stack_state(&stack) != EXPR_STACK_INITIAL) {
r = EXPR_PARSE_INCOMPLETE;
goto end;
}
r = EXPR_PARSE_EOF;
goto end;
case TOKEN_NUM:
r = EXPR_PARSE_INTERNAL_ERROR;
goto end;
}
/* If store is true, that means "expr" contains a value to
* be placed into the container at the top of the stack.
*
* This is a loop because some cases need to cascade values up the
* stack without acting on a token. When the loop stops, further
* action requires a new token to be read.
*/
while (store) switch (expr_stack_state(&stack)) {
case EXPR_STACK_INVALID:
r = EXPR_PARSE_INTERNAL_ERROR;
goto end;
case EXPR_STACK_START_LIST:
/* The current thing on the stack is the first part of the
* list. Add the expression to car of the cell, and add
* push a pointer to the cell. This is the last cell in
* the list.
*/
expr_stack_ctr(&stack, &loc);
if (get_type(loc.p) != CELL) {
r = EXPR_PARSE_INTERNAL_ERROR;
goto end;
}
uns_set(gc, loc.p, 1, UNS_POINTER, expr->p);
expr_stack_push(&stack, &loc, EXPR_STACK_IN_LIST);
store = 0;
break;
case EXPR_STACK_IN_LIST:
/* The pointer on the top of the stack is the end of the
* list. Add a new list to it's cdr, and put expr into
* the car of the new end of the list.
*/
expr_stack_ctr(&stack, &loc);
if (get_type(loc.p) != CELL) {
r = EXPR_PARSE_INTERNAL_ERROR;
goto end;
}
alloc_of_type(&new_cell, CELL);
uns_set(gc, loc.p, 2, UNS_POINTER, new_cell.p);
uns_set(gc, new_cell.p, 1, UNS_POINTER, expr->p);
expr_stack_pop(&stack);
expr_stack_push(&stack, &new_cell, EXPR_STACK_IN_LIST);
store = 0;
break;
case EXPR_STACK_IMPROPER_LIST:
/* Add the expression to the cdr of the last element.
* Set the state to expect the end of an improper list.
*/
expr_stack_ctr(&stack, &loc);
uns_set(gc, loc.p, 2, UNS_POINTER, expr->p);
if (get_type(loc.p) != CELL) {
r = EXPR_PARSE_INTERNAL_ERROR;
goto end;
}
expr_stack_change_state(&stack, EXPR_STACK_IMPROPER_LIST_END);
store = 0;
break;
case EXPR_STACK_IMPROPER_LIST_END:
r = EXPR_PARSE_IMPROPER_LIST_OVERFLOW;
goto end;
case EXPR_STACK_QUOTELIKE: case EXPR_STACK_SURROUND_OTHER:
/* Expression at the top of the stack is
* (surrouding-thing <undefined>)
* Replace (car (cdr (top-of-stack)) with expr.
*/
expr_stack_ctr(&stack, &loc);
if (get_type(loc.p) != CELL) {
r = EXPR_PARSE_INTERNAL_ERROR;
goto end;
}
loc.p = uns_get(gc, loc.p, 2, NULL);
if (get_type(loc.p) != CELL) {
r = EXPR_PARSE_INTERNAL_ERROR;
goto end;
}
uns_set(gc, loc.p, 1, UNS_POINTER, expr->p);
/* Get the head of the quoted value again. */
expr_stack_ctr(&stack, expr);
expr_stack_pop(&stack);
/* This value needs to be stored somewhere. Continue looping. */
store = 1;
break;
case EXPR_STACK_INITIAL:
/* Finished parsing an expression at toplevel. */
expr_stack_ctr(&stack, &loc);
uns_set(gc, loc.p, 0, UNS_POINTER, expr->p);
/* Prepare for return. */
expr->p = loc.p;
expr_stack_pop(&stack);
if (stack.p) {
r = EXPR_PARSE_BAD_STACK_START;
goto end;
}
store = 0;
break;
}
}
end:
uns_root_remove(gc, &stack);
uns_root_remove(gc, &loc);
uns_root_remove(gc, &tok.dat);
uns_root_remove(gc, &new_cell);
return r;
}
static void oom(Uns_GC gc_)
{
(void)gc_;
printf("oom\n");
abort();
}
/* TODO: Make UNS_Lisp its own library and move this out. */
extern Uns_GC uns_lisp_gc_init(void);
static void init(void)
{
gc = uns_lisp_gc_init();
uns_set_oom(gc, oom);
uns_root_add(gc, &empty_list);
alloc_of_type(&empty_list, EMPTY_LIST);
}
static void display(struct uns_ctr *ctr, long indent)
{
struct uns_ctr tmp = {0};
long l;
double f;
if (!ctr->p) {
printf("<undefined>");
return;
}
switch (get_type(ctr->p)) {
case CELL:
uns_root_add(gc, &tmp);
printf("(");
tmp.p = uns_get(gc, ctr->p, 1, NULL);
display(&tmp, indent);
ctr->p = uns_get(gc, ctr->p, 2, NULL);
while (get_type(ctr->p) == CELL) {
tmp.p = uns_get(gc, ctr->p, 1, NULL);
if (get_type(tmp.p) == CELL) {
printf("\n");
for (l = 0; l < indent; l++)
printf(" ");
display(&tmp, indent + 1);
} else {
printf(" ");
display(&tmp, indent);
}
ctr->p = uns_get(gc, ctr->p, 2, NULL);
}
switch (get_type(ctr->p)) {
case EMPTY_LIST:
printf(")");
break;
default:
printf(" . ");
display(ctr, indent);
printf(")");
break;
}
uns_root_remove(gc, &tmp);
return;
case INTEGER:
memcpy(&l, uns_get(gc, ctr->p, 1, NULL), sizeof(long));
printf("%ld", l);
return;
case FLOAT:
memcpy(&f, uns_get(gc, ctr->p, 1, NULL), sizeof(double));
printf("%f", f);
return;
case STRING:
tmp.p = uns_get(gc, ctr->p, 1, NULL);
uns_root_add(gc, &tmp);
printf("\"%s\"", uns_string_cstring(gc, &tmp));
uns_root_remove(gc, &tmp);
return;
case SYMBOL:
tmp.p = uns_get(gc, ctr->p, 1, NULL);
uns_root_add(gc, &tmp);
printf("%s", uns_string_cstring(gc, &tmp));
uns_root_remove(gc, &tmp);
return;
case EMPTY_LIST:
printf("'()");
return;
default:
abort();
}
}
/* Contination passing style.
* Continuation passing IR uses explicit statements instead of
* shorthand (i.e. (__pass K atom) instead of (K atom)).
*
* CPS primitives:
* (__pass k e): Pass e to continuation k. e must be an atom.
* (__apply f l k): Pass the argument list "l" to "f", and pass
* the result to "k". "f" must be a "__klambda".
* (__konstruct e body): Construct a continuation that takes a
* single argument. This is shortened to "\e body" below.
* (__klambda l k body): Constructs a function that takes a single
* value along with a continuation.
* (__primitive-> args...) The primitive, but the last argument is a
* continuation.
* cps{atom, K} = (__pass K atom)
* cps{(__lambda l E), K} = (__pass K (__klambda l k cps{E,k}))
* cps{(F A1 ... AN), K} =
* cps{F, \f cps_list{'(), (A1 ... AN), \l (__apply f l K)}}
* cps_list{L, '(), K} = (__pass K L)
* cps_list{L, (A . B), K} =
* cps{A, \a cps_list{append{L,a}, B, K}}
* cps{(if E B1 B2), K} =
* cps{E, \e (__if* e (\K' cps{B1,K'}) (\K' cps{B1,K'}) K)}
*
* cps{(__define symb E}, K} = cps{E, \e (__define-> symb e K)}
* cps{(set! symb E), K} = cps{E, \e (__set!-> symb e K)}
*/
static void error(struct location *loc, const char *emsg)
{
fprintf(stderr, "%ld:%ld: ", loc->line, loc->offset);
fprintf(stderr, "%s\n", emsg);
}
int main(void)
{
struct uns_ctr expr = {0};
struct file input = {0};
input.loc.line = 1;
init();
uns_root_add(gc, &expr);
input.f = stdin;
while (!feof(input.f)) {
expr.p = NULL;
switch (expr_parse(&input, &expr)) {
case EXPR_PARSE_OK:
expr.p = uns_get(gc, expr.p, 0, NULL);
display(&expr, 1);
printf("\n");
break;
case EXPR_PARSE_INCOMPLETE:
error(&input.loc, "EOF before expression was finished");
break;
case EXPR_PARSE_EXCESS_RPAREN:
error(&input.loc, "Unbalanced parentheses");
break;
case EXPR_PARSE_INVALID_EMPTY_LIST:
error(&input.loc, "Invalid syntax for empty list (either '() or `'())");
break;
case EXPR_PARSE_BAD_QUOTE:
error(&input.loc, "Invalid syntax for quotes/unquotes");
break;
case EXPR_PARSE_IMPROPER_LIST_OVERFLOW:
error(&input.loc, "too many values at end of improper list (must be exactly one)");
case EXPR_PARSE_BAD_IMPROPER_LIST:
error(&input.loc, "Bad syntax for improper list (must be (value . value))");
break;
case EXPR_PARSE_INTERNAL_ERROR:
error(&input.loc, "Bug in implementation\n");
break;
case EXPR_PARSE_BAD_STACK_START:
error(&input.loc, "Bug: bottom of stack not correct\n");
break;
case EXPR_PARSE_EOF:
error(&input.loc, "EOF\n");
break;
}
}
uns_root_remove(gc, &expr);
uns_deinit(gc);
return 0;
}

View file

@ -0,0 +1,60 @@
/* Copyright (c) 2024, Peter McGoron
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1) Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2) Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <stdlib.h>
#include "uns.h"
#include "cheney_c89.h"
static void after_gc(Uns_GC gc, struct uns_cheney_c89_statistics *stats)
{
fprintf(stderr,
"cheney_c89 %ld: %lu -> %lu\n",
stats->collection_number,
stats->usage_before,
stats->usage_after
);
if (stats->usage_after >= stats->usage_before * 7/10) {
uns_cheney_c89_set_new_heap_size(gc,
uns_cheney_c89_get_new_heap_size(gc) * 2);
}
}
Uns_GC uns_lisp_gc_init(void)
{
Uns_GC gc = malloc(uns_gc_size);
if (!uns_cheney_c89_init(gc, 512)) {
fprintf(stderr, "Error initializing GC\n");
exit(1);
}
uns_cheney_c89_set_collect_callback(gc, after_gc);
uns_cheney_c89_set_new_heap_size(gc, 1024);
return gc;
}

View file

@ -1,29 +1,35 @@
#!/bin/sh
# The road to hell is paved with Makefile generators.
#
# Each test is compiled statically linked with a single collector.
#
# OBJDEPS: Dependencies for an object file.
# DEPS: Dependencies for an executable file.
TEST_TARGETS=""
GENERATED_OBJS=""
gen_test() {
local NAME COLLECTOR TEST_IMPL DEPS OBJDEPS
gen_test() { # test_name, collector_name, exec_deps, obj_file_deps
local NAME COLLECTOR TARGET TEST_IMPL DEPS OBJDEPS
NAME=$1
COLLECTOR=$2
TARGET="$NAME"_"$COLLECTOR".test
TEST_IMPL="$NAME.c"
DEPS="$1.o uns.o $3"
OBJDEPS="include/uns.h $1.c $4"
printf '%s.o: %s\n' "$NAME" "$OBJDEPS"
printf '%s_%s.test: %s\n' "$NAME" "$COLLECTOR" "$DEPS"
printf '\t${CC} ${LDFLAGS} %s -o %s_%s.test\n' \
"$DEPS" "$NAME" "$COLLECTOR"
printf '\tvalgrind ./%s_%s.test\n' "$NAME" "$COLLECTOR"
echo "
$TARGET: $DEPS
\${CC} \${LDFLAGS} $DEPS $TARGET
./valgrind ./$TARGET
"
TEST_TARGETS=$(printf "%s %s_%s.test" "$TEST_TARGETS" "$NAME" "$COLLECTOR")
TEST_TARGETS="$TEST_TARGETS $TARGET"
GENERATED_OBJS="$GENERATED_OBJS $NAME.o"
}
gen_string_test() {
gen_string_test() { # collector_name, exec_deps
local COLLECTOR DEPS OBJDEPS
COLLECTOR=$1
@ -40,7 +46,7 @@ gen_string_test() {
"$DEPS" "$OBJDEPS"
}
gen_hashtable_test() {
gen_hashtable_test() { # collector_name, exec_deps
local COLLECTOR DEPS OBJDEPS
COLLECTOR=$1
@ -54,14 +60,36 @@ gen_hashtable_test() {
"$DEPS" "$OBJDEPS"
}
gen_lisp_test() { #collector_name, exec_deps
local COLLECTOR SHIM_OBJ TARGET DEPS OBJDEPS
COLLECTOR=$1
TARGET="examples/lisp/uns_lisp_$COLLECTOR"
SHIM_OBJ="$TARGET.o"
OBJDEPS="examples/lisp/uns_lisp_$COLLECTOR.c include/uns.h include/$COLLECTOR.h"
DEPS="$2 $SHIM_OBJ uns.o "'${UNS_LISP_OBJS}'
echo "
$SHIM_OBJ: $OBJDEPS
$TARGET: $DEPS
\${CC} \${LDFLAGS} $DEPS -o $TARGET
"
TEST_TARGETS="$TEST_TARGETS $TARGET"
GENERTATED_OBJS="$GENERATED_OBJS $SHIM_OBJ"
}
gen_tests() {
echo "examples/test_$1.o: include/uns.h $2"
GENERATED_OBJS="$GENERATED_OBJS examples/test_$1.o"
gen_string_test "$1" '${CHENEY_C89_OBJS}'
gen_hashtable_test "$1" '${CHENEY_C89_OBJS}'
gen_lisp_test "$1" '${CHENEY_C89_OBJS}'
}
gen_tests cheney_c89 "include/cheney_c89.h"
echo
printf "tests: %s\n" "$TEST_TARGETS"
printf 'clean_tests:\n\t${RM} -f %s %s\n' "$TEST_TARGETS" "$GENERATED_OBJS"