import flatrate lisp, rename to Universal Service LISP

This commit is contained in:
Peter McGoron 2024-07-10 10:47:55 -04:00
parent a406b64975
commit b41e2100c2
7 changed files with 1302 additions and 16 deletions

View File

@ -4,7 +4,7 @@
.SUFFIXES: .c .o .test .SUFFIXES: .c .o .test
CC=cc CC=cc
CFLAGS=-Wall -std=c89 -Werror -pedantic -fPIC -g -Iinclude CFLAGS=-Wall -std=c89 -Werror -pedantic -fPIC -g -Iinclude -I.
tests: string_tests htable_tests tests: string_tests htable_tests
@ -36,6 +36,12 @@ examples/hashtable/uns_hashtable.o: examples/hashtable/uns_hashtable.c \
include/uns.h include/uns.h
UNS_HASHTABLE_OBJS=examples/hashtable/uns_hashtable.o UNS_HASHTABLE_OBJS=examples/hashtable/uns_hashtable.o
examples/lisp/uns_lisp.o: include/uns.h \
examples/lisp/uns_lisp.c \
examples/string/uns_string.h
UNS_LISP_OBJS=examples/lisp/uns_lisp.o examples/string/uns_string.o
EXAMPLE_OBJS=${UNS_STRING_OBJS} ${UNS_HASHTABLE_OBJS} EXAMPLE_OBJS=${UNS_STRING_OBJS} ${UNS_HASHTABLE_OBJS}
## Clean ## Clean

View File

@ -12,12 +12,12 @@ Terminology
----------- -----------
Generally speaking, ``UNS_WORD`` must be an integer that can be converted Generally speaking, ``UNS_WORD`` must be an integer that can be converted
to and from a pointer to data. ``UNS_SWORD`` is the signed version of to and from a pointer to data (such as ``uintptr_t``). ``UNS_SWORD`` is the
``UNS_WORD``. Both must be integer types. signed version of ``UNS_WORD``. Both must be integer types.
In collectors where this conversion cannot be assumed (like C89 collectors) In collectors where this conversion cannot be assumed (like C89 collectors)
or not possible, then ``UNS_WORD`` should be a type that can be used to or is not possible, then ``UNS_WORD`` should be a type that can be used to
index any arrays (like ``size_t``) and ``UNS_SWORD`` is like ``ssize_t``. index any arrays (like ``size_t``).
A "region" denotes a block of memory in the heap. The "header" of a A "region" denotes a block of memory in the heap. The "header" of a
region is a hidden area of the region that holds information about region is a hidden area of the region that holds information about
@ -86,5 +86,4 @@ Todo
---- ----
* call before gc and after gc * call before gc and after gc
* Make makefiles simpler and POSIX compliant
* Address sanitizer, ub sanitizer if available * Address sanitizer, ub sanitizer if available

8
examples/lisp/README.rst Normal file
View File

@ -0,0 +1,8 @@
======================
Universal Service LISP
======================
A Scheme R5RS interpreter written C89 using Universal Service GCs.
You shouldn't use it for real programming, but it will work on any C89
compiler.

264
examples/lisp/prelude.scm Normal file
View File

@ -0,0 +1,264 @@
; Copyright (c) 2024, Peter McGoron
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
;
; 1) Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; 2) Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in the
; documentation and/or other materials provided with the distribution.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
; Primitives are special syntatic forms. They differ from built-in
; functions.
;
; primitives:
; __lambda: lambda of one argument
; __define: define in the environment
; __define-macro: Like __define but for macros
; set!
; if
;
; Others:
; __uniapply: Apply function to single argument
; <undefined>: A value that can only be passed to __lambda. It cannot
; be bound to a top-level, or "set!", or added to a list.
; It can also be the parameter of a __lambda, although
; it still cannot be used.
;
; Macros are functions that return an sexpr.
(__define-macro let
(__lambda l ; car = args, cdr = body
(if (null? (car args))
(cdr args)
; (car args) = ((A a) (B b) ...)
; (car (car args)) = (A a)
; (cdr (car (car args))) = (a)
`((__lambda ,(car (car (car args)))
(let ,(cdr (car args)) ,@body))
,@(car (cdr (car (car args))))
)
)
)
)
(__define-macro let* let)
; if and __lambda only execute one statement.
(__define-macro begin body
(let ((first (car body))
(rest (cdr body))
)
(if (null? rest)
first
`((__lambda <undefined> (begin ,@rest)) ,first)
)
)
)
;;; (cond (case1 expr1) (case2 expr2) ... (else exprelse))
(__define-macro cond body
(let ((cases (car body)))
(if (null? body)
<undefined>
(let* ((branch (car cases))
(rest (cdr cases))
(test (car branch))
(to-exec (cdr branch))
)
(if (null? to-exec)
(let ((tmp (gensym)))
`(let ((,tmp ,test))
(if ,tmp ,tmp (cond ,@rest))
)
)
(if (eqv? test 'else)
(if (null? rest)
`(begin ,@rest)
(error "invalid else clause")
)
(if (eqv? (car to-exec) '=>)
(let ((tmp (gensym))
(fun (cdr to-exec))
)
`(let ((,tmp ,test))
(if ,tmp (,fun ,tmp) (cond ,@rest))
)
)
`(if ,tmp (begin ,@rest) (cond ,@rest))
)
)
)
)
)
)
)
(__define-macro __bindlambda
(__lambda l
(let ((larg (car l))
(args (car (cdr l)))
(body (cdr (cdr l)))
)
(if (null? args)
`(__lambda larg
(if (not (null? larg))
(raise "incorrect number of arguments")
(begin ,@body)
)
)
(let* ((argval (cons args))
(rest (cdr args))
(arg (cons argval))
(val (cons (cdr argval)))
)
`(__lambda ,larg
(let ((,arg ,val)) __bindlambda ,larg ,rest ,@body)
)
)
)
)
)
)
(__define-macro lambda
(__lambda l
(let ((args (car l))
(body (cdr l))
)
(if (symbol? args)
`(__lambda ,args (begin ,@body))
(if (null? args)
`(__lambda <undefined> (begin ,@body))
(let ((larg (gensym)))
`(__bindlambda ,larg ,@body)
)
)
)
)
)
)
(__define-macro define-macro
(__lambda l
(let* ((name-and-args (car l))
(name (car name-and-args))
(args (cdr name-and-args))
(body (cdr l))
(tmpname (gensym))
)
`(__define-macro ,name
(lambda ,args ,@body)
)
)
)
)
(define-macro (and . body)
(if (null? body)
1
(let ((first (car body))
(rest (cdr body))
)
`(if ,first (and ,@rest) 0)
)
)
)
(define-macro (or . body)
(if (null? body)
0
(let ((first (car body))
(rest (cdr body))
)
`(if ,first 1 (or ,@rest))
)
)
)
(define-macro (letrec args . body)
(if (null? args)
body
(let* ((argval (car args))
(arg (car argval))
(val (car (cdr argval)))
(rest (cdr args))
)
`(let ((,arg <undefined>))
(letrec ,rest (begin (set! ,arg ,val) ,@body))
)
)
)
)
(define-macro (define name . body)
(if (symbol? name)
`(__define ,name ,@body)
(let ((fname (car name))
(args (cdr name))
(tmparg (gensym))
)
`(__define ,fname
(lambda ,tmparg
(letrec ((,fname (lambda ,args ,@body)))
(,fname . ,tmparg)
)
)
)
)
)
)
;;;;;;;;;;;;;; Standard Library ;;;;;;;;;;;;;;;;;;;
;; for a list of (v1 v2 ... vn)
;; runs (f vn (... (f v2 (f v1 start))))
(define (foldl f start l)
(letrec
((loop
(lambda (ret-list cur-list)
(if (eqv? value '())
value
(loop (f (car cur-list) value)
(cdr cur-list))
)
)
))
(loop start l)
)
)
(define (reverse l) (foldl cons '() l))
;; for a list of (v1 v2 ... vn)
;; runs (f v1 (f v2 (... (f vn start))))
(define (foldr f start l) (foldl f start (reverse l)))
(define (append . l)
(foldr (lambda (to-prepend collected)
(foldr cons collected to-prepend)
)
'()
l
)
)
(define (apply f . l) (__uniapply f (__uniapply append l)))
(define (list . l) l)
;; (define (+ . l) (foldl __bin+ 0 l))
;; (define (* . l) (foldl __bin* 1 l))

921
examples/lisp/uns_lisp.c Normal file
View File

@ -0,0 +1,921 @@
/* Copyright (c) 2024, Peter McGoron
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1) Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2) Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "uns.h"
#include "examples/string/uns_string.h"
static Uns_GC gc;
static struct uns_ctr empty_list;
enum token_type {
T_EOF,
LPAREN,
RPAREN,
QUOTE,
QUASIQUOTE,
UNQUOTE,
UNQUOTE_LIST,
T_IDENT,
T_INT,
T_FLOAT,
T_STRING,
T_DOT,
TOKEN_NUM
};
static const char *token2string_repr[TOKEN_NUM] = {
"'EOF",
"(",
")",
"quote",
"quasiquote",
"unquote",
"unquote-list",
"'ident",
"'number",
"'float",
"'string-tok",
"'dot"
};
struct location {
unsigned long line;
unsigned long offset;
};
struct file {
FILE *f;
struct location loc;
};
struct token {
enum token_type typ;
struct uns_ctr dat;
double f;
long i;
};
static int is_ws(int c)
{
return c == '\n' || c == '\t' || c == '\r' || c == '\v' || c == ' ';
}
static int get(struct file *f)
{
int c = getc(f->f);
if (c == '\n') {
f->loc.line++;
f->loc.offset = 0;
} else {
f->loc.offset += 1;
}
return c;
}
static void unget(int c, struct file *f)
{
ungetc(c, f->f);
}
static int get_skipws(struct file *input)
{
int c;
for (;;) {
c = get(input);
if (c == ';') {
do { c = get(input); } while (c != '\n');
unget(c, input);
continue;
}
if (is_ws(c))
continue;
return c;
}
}
static void tok_string(struct file *input, struct token *tok)
{
int c;
tok->typ = T_STRING;
uns_string_alloc(gc, &tok->dat, 32);
for (;;) {
c = get(input);
switch (c) {
case '\\':
c = get(input);
switch (c) {
case ' ': case '\t': case '\r': case '\n': case '\v':
do {
c = get(input);
} while (is_ws(c));
unget(c, input);
continue;
case 'r':
uns_string_append_char(gc, &tok->dat, '\r');
continue;
case 'n':
uns_string_append_char(gc, &tok->dat, '\n');
continue;
case 'v':
uns_string_append_char(gc, &tok->dat, '\v');
continue;
case 't':
uns_string_append_char(gc, &tok->dat, '\t');
continue;
case '"':
uns_string_append_char(gc, &tok->dat, '"');
continue;
default:
uns_string_append_char(gc, &tok->dat, '\\');
uns_string_append_char(gc, &tok->dat, c);
}
case '"': /* " */
return;
}
}
}
static int tonum(int c)
{
switch (c) {
case '0': case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9':
return c - '0';
default:
return -1;
}
}
static void tok_num(struct file *input, struct token *tok, int c)
{
int is_float = 0;
uns_string_alloc(gc, &tok->dat, 32);
do {
if (c == '.' || c == 'e' || c == 'E' || c == '-' || c == '+')
is_float = 1;
uns_string_append_char(gc, &tok->dat, c);
c = get(input);
} while (tonum(c) >= 0 || c == '.' || c == 'e' || c == 'E' || c == '-' || c == '+');
unget(c, input);
if (is_float) {
tok->typ = T_FLOAT;
tok->f = strtod(uns_string_cstring(gc, &tok->dat), NULL);
} else {
tok->typ = T_INT;
tok->i = strtol(uns_string_cstring(gc, &tok->dat), NULL, 10);
}
tok->dat.p = NULL;
}
static int part_of_ident(int c)
{
return !is_ws(c) && c != '(' && c != ')' && c != '`'
&& c != '\'' && c != '\\' && c != '\"' && c != ','
&& c != ';';
}
static void tok_ident(struct file *input, struct token *tok, int c)
{
tok->typ = T_IDENT;
uns_string_alloc(gc, &tok->dat, 32);
do {
uns_string_append_char(gc, &tok->dat, c);
c = get(input);
} while (part_of_ident(c));
unget(c, input);
}
static void tokenize(struct file *input, struct token *tok)
{
int c = get_skipws(input);
int c2;
tok->dat.p = NULL;
switch (c) {
case EOF:
tok->typ = T_EOF;
return;
case '(':
tok->typ = LPAREN;
return;
case ')':
tok->typ = RPAREN;
return;
case '\'':
tok->typ = QUOTE;
return;
case '`':
tok->typ = QUASIQUOTE;
return;
case ',':
c = get(input);
if (c == '@') {
tok->typ = UNQUOTE_LIST;
} else {
unget(c, input);
tok->typ = UNQUOTE;
}
return;
case '"':
tok_string(input, tok);
return;
tok->typ = T_STRING;
return;
case '+': case '-':
c2 = get(input);
unget(c2, input);
if (tonum(c2) >= 0) {
tok_num(input, tok, c);
return;
}
/* FALLTHROUGH */
default:
if (c == '.') {
c2 = get(input);
if (tonum(c2) >= 0) {
unget(c2, input);
tok_num(input, tok, c);
} else if (part_of_ident(c2)) {
unget(c2, input);
tok_ident(input, tok, c);
} else {
tok->typ = T_DOT;
}
} else if (tonum(c) >= 0) {
tok_num(input, tok, c);
} else {
tok_ident(input, tok, c);
}
return;
}
}
enum item_type {
CELL,
LAMBDA,
INTEGER,
FLOAT,
STRING,
SYMBOL,
EMPTY_LIST,
LISP_NULL
};
/* TODO: if the allocator allows for weak references, do a lookup of
* the symbol in the tree, and if it exists, use that instead of the
* token.
*
* This check should always be ignored on collectors without weak
* references.
*/
static void alloc_of_type(struct uns_ctr *ctr, int typ)
{
void *p;
int fields;
int i;
switch (typ) {
case CELL: fields = 2; break;
case LAMBDA: fields = 2; break;
case INTEGER: fields = 1; break;
case STRING: fields = 1; break;
case SYMBOL: fields = 1; break;
case FLOAT: fields = 1; break;
case EMPTY_LIST: fields = 0; break;
case LISP_NULL: fields = 0; break;
default: abort();
}
ctr->p = uns_alloc_rec(gc, fields + 1, 0);
p = uns_alloc(gc, sizeof(int), 0);
memcpy(p, &typ, sizeof(int));
uns_set(gc, ctr->p, 0, UNS_POINTER, p);
for (i = 0; i < fields; i++)
uns_set(gc, ctr->p, i + 1, UNS_POINTER, NULL);
}
static int get_type(Uns_ptr p)
{
int typ;
void *innerp;
if (!p)
return LISP_NULL;
innerp = uns_get(gc, p, 0, NULL);
memcpy(&typ, innerp, sizeof(int));
return typ;
}
static void alloc_symbol_from_cstring(struct uns_ctr *ctr, const char *s, size_t slen)
{
struct uns_ctr str = {0};
uns_root_add(gc, &str);
uns_string_alloc(gc, &str, slen);
uns_string_append_bytes(gc, &str, s, slen);
alloc_of_type(ctr, SYMBOL);
uns_set(gc, ctr->p, 1, UNS_POINTER, str.p);
uns_root_remove(gc, &str);
}
enum expr_stack_state {
EXPR_STACK_INITIAL,
EXPR_STACK_QUOTELIKE,
EXPR_STACK_SURROUND_OTHER,
EXPR_STACK_START_LIST,
EXPR_STACK_IN_LIST,
EXPR_STACK_IMPROPER_LIST,
EXPR_STACK_IMPROPER_LIST_END,
EXPR_STACK_INVALID
};
enum expr_stack_fields {
EXPR_FIELD_NEXT,
EXPR_FIELD_PTR,
EXPR_FIELD_STATE,
EXPR_FIELD_NUM
};
static void expr_stack_push(struct uns_ctr *stack, struct uns_ctr *loc, enum expr_stack_state state)
{
struct uns_ctr tmp = {0};
uns_root_add(gc, &tmp);
tmp.p = stack->p;
stack->p = uns_alloc_rec(gc, EXPR_FIELD_NUM, 0);
uns_set(gc, stack->p, EXPR_FIELD_NEXT, UNS_POINTER, tmp.p);
uns_root_remove(gc, &tmp);
uns_set(gc, stack->p, EXPR_FIELD_PTR, UNS_POINTER, loc->p);
tmp.p = uns_alloc(gc, sizeof(state), 0);
memcpy(tmp.p, &state, sizeof(state));
uns_set(gc, stack->p, EXPR_FIELD_STATE, UNS_POINTER, tmp.p);
}
static enum expr_stack_state expr_stack_state(struct uns_ctr *stack)
{
enum expr_stack_state r;
if (!stack->p)
return EXPR_STACK_INVALID;
memcpy(&r, uns_get(gc, stack->p, EXPR_FIELD_STATE, NULL),
sizeof(r));
return r;
}
static void expr_stack_change_state(struct uns_ctr *stack, enum expr_stack_state newst)
{
memcpy(uns_get(gc, stack->p, EXPR_FIELD_STATE, NULL),
&newst,
sizeof(newst));
}
static void expr_stack_ctr(struct uns_ctr *stack, struct uns_ctr *loc)
{
loc->p = uns_get(gc, stack->p, EXPR_FIELD_PTR, NULL);
}
static int expr_stack_pop(struct uns_ctr *stack)
{
if (!stack->p)
return 0;
stack->p = uns_get(gc, stack->p, EXPR_FIELD_NEXT, NULL);
return 1;
}
enum parser_return {
EXPR_PARSE_OK,
EXPR_PARSE_EOF,
EXPR_PARSE_INCOMPLETE,
EXPR_PARSE_EXCESS_RPAREN,
EXPR_PARSE_INVALID_EMPTY_LIST,
EXPR_PARSE_IMPROPER_LIST_OVERFLOW,
EXPR_PARSE_BAD_IMPROPER_LIST,
EXPR_PARSE_BAD_STACK_START,
EXPR_PARSE_BAD_QUOTE,
EXPR_PARSE_INTERNAL_ERROR
};
static int tok_to_type(enum token_type tokt)
{
switch (tokt) {
case T_IDENT: return SYMBOL;
case T_STRING: return STRING;
default: return -1;
}
}
/* Parse expressions using an explicit stack.
* This is feasible to to hand-written and has the advantage that error
* recovery is much easier than recursion (implicit stack).
*
* The stack consists of a LIFO:
* [previous value][container][parser state]
*
* The implementation stores containers on the stack and when a container
* is complete or an atom is read, it is stored in "expr" and the stack
* is modified.
*/
static enum parser_return expr_parse(struct file *input, struct uns_ctr *expr)
{
struct uns_ctr stack = {0};
struct uns_ctr loc = {0};
struct uns_ctr new_cell = {0};
struct token tok = {0};
int store = 0;
enum parser_return r = EXPR_PARSE_OK;
uns_root_add(gc, &stack);
uns_root_add(gc, &loc);
uns_root_add(gc, &tok.dat);
uns_root_add(gc, &new_cell);
/* Allocate expr->p to be a single pointer. */
expr->p = uns_alloc_rec(gc, 1, 0);
/* Initialize the stack with the final result. */
expr_stack_push(&stack, expr, EXPR_STACK_INITIAL);
while (stack.p) {
tokenize(input, &tok);
switch (tok.typ) {
case LPAREN:
/* Push a new cell onto the stack and parse it later. */
alloc_of_type(expr, CELL);
expr_stack_push(&stack, expr, EXPR_STACK_START_LIST);
break;
case QUOTE: case QUASIQUOTE: case UNQUOTE: case UNQUOTE_LIST:
/* push (token-name <undefined>) onto the stack, and have
* it point to the first cell.
*/
alloc_of_type(&loc, CELL);
alloc_symbol_from_cstring(expr, token2string_repr[tok.typ],
strlen(token2string_repr[tok.typ]));
uns_set(gc, loc.p, 1, UNS_POINTER, expr->p);
alloc_of_type(expr, CELL);
uns_set(gc, loc.p, 2, UNS_POINTER, expr->p);
uns_set(gc, expr->p, 2, UNS_POINTER, empty_list.p);
if (tok.typ == QUOTE || tok.typ == QUASIQUOTE)
expr_stack_push(&stack, &loc, EXPR_STACK_QUOTELIKE);
else
expr_stack_push(&stack, &loc, EXPR_STACK_SURROUND_OTHER);
break;
/* All atoms are allocated into "expr" and will be stored in the
* container after the switch.
*/
case T_IDENT: case T_STRING:
alloc_of_type(expr, tok_to_type(tok.typ));
uns_set(gc, expr->p, 1, UNS_POINTER, tok.dat.p);
store = 1;
break;
case T_INT:
alloc_of_type(expr, INTEGER);
loc.p = uns_alloc(gc, sizeof(tok.i), 0);
memcpy(loc.p, &tok.i, sizeof(tok.i));
uns_set(gc, expr->p, 1, UNS_POINTER, loc.p);
store = 1;
break;
case T_FLOAT:
alloc_of_type(expr, FLOAT);
loc.p = uns_alloc(gc, sizeof(tok.f), 0);
memcpy(loc.p, &tok.f, sizeof(tok.f));
uns_set(gc, expr->p, 1, UNS_POINTER, loc.p);
store = 1;
break;
case RPAREN:
switch (expr_stack_state(&stack)) {
case EXPR_STACK_START_LIST:
/* Empty list found. Pop the cell and discard it.
* If we are not in a "surround" state, report error.
* Replace the surround state with an empty list.
*/
expr_stack_pop(&stack);
if (!stack.p) {
r = EXPR_PARSE_INTERNAL_ERROR;
goto end;
}
if (expr_stack_state(&stack) != EXPR_STACK_QUOTELIKE) {
r = EXPR_PARSE_INVALID_EMPTY_LIST;
goto end;
}
/* Discard quote/quasiquote and store the empty list instead. */
if (!expr_stack_pop(&stack)) {
r = EXPR_PARSE_INTERNAL_ERROR;
goto end;
}
expr->p = empty_list.p;
store = 1;
break;
case EXPR_STACK_IN_LIST:
/* In a list, there are two things on the stack:
* first cell and last cell.
* Place the empty list in the cdr of the last cell,
* and then discard the last cell. Put the first cell into expr
* and store it in the container in the stack before it.
*/
expr_stack_ctr(&stack, &loc);
if (get_type(loc.p) != CELL) {
r = EXPR_PARSE_INTERNAL_ERROR;
goto end;
}
uns_set(gc, loc.p, 2, UNS_POINTER, empty_list.p);
/* FALLTHROUGH */
case EXPR_STACK_IMPROPER_LIST_END:
/* CDR of the list has been filled. Pop the last cell. */
expr_stack_pop(&stack);
if (!stack.p) {
r = EXPR_PARSE_INTERNAL_ERROR;
goto end;
}
/* Put the first cell in expr, and read it into the next item
* of the stack.
*/
expr_stack_ctr(&stack, expr);
expr_stack_pop(&stack);
store = 1;
break;
case EXPR_STACK_INITIAL:
/* Expression is not balanced. */
r = EXPR_PARSE_EXCESS_RPAREN;
goto end;
case EXPR_STACK_IMPROPER_LIST:
/* (values .) is an invalid expression. */
r = EXPR_PARSE_BAD_IMPROPER_LIST;
goto end;
case EXPR_STACK_QUOTELIKE: case EXPR_STACK_SURROUND_OTHER:
/* ,) ') etc are invalid */
r = EXPR_PARSE_BAD_QUOTE;
goto end;
case EXPR_STACK_INVALID:
r = EXPR_PARSE_INTERNAL_ERROR;
goto end;
}
break;
case T_DOT:
if (expr_stack_state(&stack) != EXPR_STACK_IN_LIST) {
r = EXPR_PARSE_BAD_IMPROPER_LIST;
goto end;
}
expr_stack_change_state(&stack, EXPR_STACK_IMPROPER_LIST);
break;
case T_EOF:
/* If EOF is encountered in the middle of an expression, repor
* that. Otherwise, the parser reads nothing.
*/
if (expr_stack_state(&stack) != EXPR_STACK_INITIAL) {
r = EXPR_PARSE_INCOMPLETE;
goto end;
}
r = EXPR_PARSE_EOF;
goto end;
case TOKEN_NUM:
r = EXPR_PARSE_INTERNAL_ERROR;
goto end;
}
/* If store is true, that means "expr" contains a value to
* be placed into the container at the top of the stack.
*
* This is a loop because some cases need to cascade values up the
* stack without acting on a token. When the loop stops, further
* action requires a new token to be read.
*/
while (store) switch (expr_stack_state(&stack)) {
case EXPR_STACK_INVALID:
r = EXPR_PARSE_INTERNAL_ERROR;
goto end;
case EXPR_STACK_START_LIST:
/* The current thing on the stack is the first part of the
* list. Add the expression to car of the cell, and add
* push a pointer to the cell. This is the last cell in
* the list.
*/
expr_stack_ctr(&stack, &loc);
if (get_type(loc.p) != CELL) {
r = EXPR_PARSE_INTERNAL_ERROR;
goto end;
}
uns_set(gc, loc.p, 1, UNS_POINTER, expr->p);
expr_stack_push(&stack, &loc, EXPR_STACK_IN_LIST);
store = 0;
break;
case EXPR_STACK_IN_LIST:
/* The pointer on the top of the stack is the end of the
* list. Add a new list to it's cdr, and put expr into
* the car of the new end of the list.
*/
expr_stack_ctr(&stack, &loc);
if (get_type(loc.p) != CELL) {
r = EXPR_PARSE_INTERNAL_ERROR;
goto end;
}
alloc_of_type(&new_cell, CELL);
uns_set(gc, loc.p, 2, UNS_POINTER, new_cell.p);
uns_set(gc, new_cell.p, 1, UNS_POINTER, expr->p);
expr_stack_pop(&stack);
expr_stack_push(&stack, &new_cell, EXPR_STACK_IN_LIST);
store = 0;
break;
case EXPR_STACK_IMPROPER_LIST:
/* Add the expression to the cdr of the last element.
* Set the state to expect the end of an improper list.
*/
expr_stack_ctr(&stack, &loc);
uns_set(gc, loc.p, 2, UNS_POINTER, expr->p);
if (get_type(loc.p) != CELL) {
r = EXPR_PARSE_INTERNAL_ERROR;
goto end;
}
expr_stack_change_state(&stack, EXPR_STACK_IMPROPER_LIST_END);
store = 0;
break;
case EXPR_STACK_IMPROPER_LIST_END:
r = EXPR_PARSE_IMPROPER_LIST_OVERFLOW;
goto end;
case EXPR_STACK_QUOTELIKE: case EXPR_STACK_SURROUND_OTHER:
/* Expression at the top of the stack is
* (surrouding-thing <undefined>)
* Replace (car (cdr (top-of-stack)) with expr.
*/
expr_stack_ctr(&stack, &loc);
if (get_type(loc.p) != CELL) {
r = EXPR_PARSE_INTERNAL_ERROR;
goto end;
}
loc.p = uns_get(gc, loc.p, 2, NULL);
if (get_type(loc.p) != CELL) {
r = EXPR_PARSE_INTERNAL_ERROR;
goto end;
}
uns_set(gc, loc.p, 1, UNS_POINTER, expr->p);
/* Get the head of the quoted value again. */
expr_stack_ctr(&stack, expr);
expr_stack_pop(&stack);
/* This value needs to be stored somewhere. Continue looping. */
store = 1;
break;
case EXPR_STACK_INITIAL:
/* Finished parsing an expression at toplevel. */
expr_stack_ctr(&stack, &loc);
uns_set(gc, loc.p, 0, UNS_POINTER, expr->p);
/* Prepare for return. */
expr->p = loc.p;
expr_stack_pop(&stack);
if (stack.p) {
r = EXPR_PARSE_BAD_STACK_START;
goto end;
}
store = 0;
break;
}
}
end:
uns_root_remove(gc, &stack);
uns_root_remove(gc, &loc);
uns_root_remove(gc, &tok.dat);
uns_root_remove(gc, &new_cell);
return r;
}
static void oom(Uns_GC gc_)
{
(void)gc_;
printf("oom\n");
abort();
}
/* TODO: Make UNS_Lisp its own library and move this out. */
extern Uns_GC uns_lisp_gc_init(void);
static void init(void)
{
gc = uns_lisp_gc_init();
uns_set_oom(gc, oom);
uns_root_add(gc, &empty_list);
alloc_of_type(&empty_list, EMPTY_LIST);
}
static void display(struct uns_ctr *ctr, long indent)
{
struct uns_ctr tmp = {0};
long l;
double f;
if (!ctr->p) {
printf("<undefined>");
return;
}
switch (get_type(ctr->p)) {
case CELL:
uns_root_add(gc, &tmp);
printf("(");
tmp.p = uns_get(gc, ctr->p, 1, NULL);
display(&tmp, indent);
ctr->p = uns_get(gc, ctr->p, 2, NULL);
while (get_type(ctr->p) == CELL) {
tmp.p = uns_get(gc, ctr->p, 1, NULL);
if (get_type(tmp.p) == CELL) {
printf("\n");
for (l = 0; l < indent; l++)
printf(" ");
display(&tmp, indent + 1);
} else {
printf(" ");
display(&tmp, indent);
}
ctr->p = uns_get(gc, ctr->p, 2, NULL);
}
switch (get_type(ctr->p)) {
case EMPTY_LIST:
printf(")");
break;
default:
printf(" . ");
display(ctr, indent);
printf(")");
break;
}
uns_root_remove(gc, &tmp);
return;
case INTEGER:
memcpy(&l, uns_get(gc, ctr->p, 1, NULL), sizeof(long));
printf("%ld", l);
return;
case FLOAT:
memcpy(&f, uns_get(gc, ctr->p, 1, NULL), sizeof(double));
printf("%f", f);
return;
case STRING:
tmp.p = uns_get(gc, ctr->p, 1, NULL);
uns_root_add(gc, &tmp);
printf("\"%s\"", uns_string_cstring(gc, &tmp));
uns_root_remove(gc, &tmp);
return;
case SYMBOL:
tmp.p = uns_get(gc, ctr->p, 1, NULL);
uns_root_add(gc, &tmp);
printf("%s", uns_string_cstring(gc, &tmp));
uns_root_remove(gc, &tmp);
return;
case EMPTY_LIST:
printf("'()");
return;
default:
abort();
}
}
/* Contination passing style.
* Continuation passing IR uses explicit statements instead of
* shorthand (i.e. (__pass K atom) instead of (K atom)).
*
* CPS primitives:
* (__pass k e): Pass e to continuation k. e must be an atom.
* (__apply f l k): Pass the argument list "l" to "f", and pass
* the result to "k". "f" must be a "__klambda".
* (__konstruct e body): Construct a continuation that takes a
* single argument. This is shortened to "\e body" below.
* (__klambda l k body): Constructs a function that takes a single
* value along with a continuation.
* (__primitive-> args...) The primitive, but the last argument is a
* continuation.
* cps{atom, K} = (__pass K atom)
* cps{(__lambda l E), K} = (__pass K (__klambda l k cps{E,k}))
* cps{(F A1 ... AN), K} =
* cps{F, \f cps_list{'(), (A1 ... AN), \l (__apply f l K)}}
* cps_list{L, '(), K} = (__pass K L)
* cps_list{L, (A . B), K} =
* cps{A, \a cps_list{append{L,a}, B, K}}
* cps{(if E B1 B2), K} =
* cps{E, \e (__if* e (\K' cps{B1,K'}) (\K' cps{B1,K'}) K)}
*
* cps{(__define symb E}, K} = cps{E, \e (__define-> symb e K)}
* cps{(set! symb E), K} = cps{E, \e (__set!-> symb e K)}
*/
static void error(struct location *loc, const char *emsg)
{
fprintf(stderr, "%ld:%ld: ", loc->line, loc->offset);
fprintf(stderr, "%s\n", emsg);
}
int main(void)
{
struct uns_ctr expr = {0};
struct file input = {0};
input.loc.line = 1;
init();
uns_root_add(gc, &expr);
input.f = stdin;
while (!feof(input.f)) {
expr.p = NULL;
switch (expr_parse(&input, &expr)) {
case EXPR_PARSE_OK:
expr.p = uns_get(gc, expr.p, 0, NULL);
display(&expr, 1);
printf("\n");
break;
case EXPR_PARSE_INCOMPLETE:
error(&input.loc, "EOF before expression was finished");
break;
case EXPR_PARSE_EXCESS_RPAREN:
error(&input.loc, "Unbalanced parentheses");
break;
case EXPR_PARSE_INVALID_EMPTY_LIST:
error(&input.loc, "Invalid syntax for empty list (either '() or `'())");
break;
case EXPR_PARSE_BAD_QUOTE:
error(&input.loc, "Invalid syntax for quotes/unquotes");
break;
case EXPR_PARSE_IMPROPER_LIST_OVERFLOW:
error(&input.loc, "too many values at end of improper list (must be exactly one)");
case EXPR_PARSE_BAD_IMPROPER_LIST:
error(&input.loc, "Bad syntax for improper list (must be (value . value))");
break;
case EXPR_PARSE_INTERNAL_ERROR:
error(&input.loc, "Bug in implementation\n");
break;
case EXPR_PARSE_BAD_STACK_START:
error(&input.loc, "Bug: bottom of stack not correct\n");
break;
case EXPR_PARSE_EOF:
error(&input.loc, "EOF\n");
break;
}
}
uns_root_remove(gc, &expr);
uns_deinit(gc);
return 0;
}

View File

@ -0,0 +1,60 @@
/* Copyright (c) 2024, Peter McGoron
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1) Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2) Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <stdlib.h>
#include "uns.h"
#include "cheney_c89.h"
static void after_gc(Uns_GC gc, struct uns_cheney_c89_statistics *stats)
{
fprintf(stderr,
"cheney_c89 %ld: %lu -> %lu\n",
stats->collection_number,
stats->usage_before,
stats->usage_after
);
if (stats->usage_after >= stats->usage_before * 7/10) {
uns_cheney_c89_set_new_heap_size(gc,
uns_cheney_c89_get_new_heap_size(gc) * 2);
}
}
Uns_GC uns_lisp_gc_init(void)
{
Uns_GC gc = malloc(uns_gc_size);
if (!uns_cheney_c89_init(gc, 512)) {
fprintf(stderr, "Error initializing GC\n");
exit(1);
}
uns_cheney_c89_set_collect_callback(gc, after_gc);
uns_cheney_c89_set_new_heap_size(gc, 1024);
return gc;
}

View File

@ -1,29 +1,35 @@
#!/bin/sh #!/bin/sh
# The road to hell is paved with Makefile generators. # The road to hell is paved with Makefile generators.
#
# Each test is compiled statically linked with a single collector.
#
# OBJDEPS: Dependencies for an object file.
# DEPS: Dependencies for an executable file.
TEST_TARGETS="" TEST_TARGETS=""
GENERATED_OBJS="" GENERATED_OBJS=""
gen_test() { gen_test() { # test_name, collector_name, exec_deps, obj_file_deps
local NAME COLLECTOR TEST_IMPL DEPS OBJDEPS local NAME COLLECTOR TARGET TEST_IMPL DEPS OBJDEPS
NAME=$1 NAME=$1
COLLECTOR=$2 COLLECTOR=$2
TARGET="$NAME"_"$COLLECTOR".test
TEST_IMPL="$NAME.c" TEST_IMPL="$NAME.c"
DEPS="$1.o uns.o $3" DEPS="$1.o uns.o $3"
OBJDEPS="include/uns.h $1.c $4" OBJDEPS="include/uns.h $1.c $4"
printf '%s.o: %s\n' "$NAME" "$OBJDEPS" echo "
printf '%s_%s.test: %s\n' "$NAME" "$COLLECTOR" "$DEPS" $TARGET: $DEPS
printf '\t${CC} ${LDFLAGS} %s -o %s_%s.test\n' \ \${CC} \${LDFLAGS} $DEPS $TARGET
"$DEPS" "$NAME" "$COLLECTOR" ./valgrind ./$TARGET
printf '\tvalgrind ./%s_%s.test\n' "$NAME" "$COLLECTOR" "
TEST_TARGETS=$(printf "%s %s_%s.test" "$TEST_TARGETS" "$NAME" "$COLLECTOR") TEST_TARGETS="$TEST_TARGETS $TARGET"
GENERATED_OBJS="$GENERATED_OBJS $NAME.o" GENERATED_OBJS="$GENERATED_OBJS $NAME.o"
} }
gen_string_test() { gen_string_test() { # collector_name, exec_deps
local COLLECTOR DEPS OBJDEPS local COLLECTOR DEPS OBJDEPS
COLLECTOR=$1 COLLECTOR=$1
@ -40,7 +46,7 @@ gen_string_test() {
"$DEPS" "$OBJDEPS" "$DEPS" "$OBJDEPS"
} }
gen_hashtable_test() { gen_hashtable_test() { # collector_name, exec_deps
local COLLECTOR DEPS OBJDEPS local COLLECTOR DEPS OBJDEPS
COLLECTOR=$1 COLLECTOR=$1
@ -54,14 +60,36 @@ gen_hashtable_test() {
"$DEPS" "$OBJDEPS" "$DEPS" "$OBJDEPS"
} }
gen_lisp_test() { #collector_name, exec_deps
local COLLECTOR SHIM_OBJ TARGET DEPS OBJDEPS
COLLECTOR=$1
TARGET="examples/lisp/uns_lisp_$COLLECTOR"
SHIM_OBJ="$TARGET.o"
OBJDEPS="examples/lisp/uns_lisp_$COLLECTOR.c include/uns.h include/$COLLECTOR.h"
DEPS="$2 $SHIM_OBJ uns.o "'${UNS_LISP_OBJS}'
echo "
$SHIM_OBJ: $OBJDEPS
$TARGET: $DEPS
\${CC} \${LDFLAGS} $DEPS -o $TARGET
"
TEST_TARGETS="$TEST_TARGETS $TARGET"
GENERTATED_OBJS="$GENERATED_OBJS $SHIM_OBJ"
}
gen_tests() { gen_tests() {
echo "examples/test_$1.o: include/uns.h $2" echo "examples/test_$1.o: include/uns.h $2"
GENERATED_OBJS="$GENERATED_OBJS examples/test_$1.o" GENERATED_OBJS="$GENERATED_OBJS examples/test_$1.o"
gen_string_test "$1" '${CHENEY_C89_OBJS}' gen_string_test "$1" '${CHENEY_C89_OBJS}'
gen_hashtable_test "$1" '${CHENEY_C89_OBJS}' gen_hashtable_test "$1" '${CHENEY_C89_OBJS}'
gen_lisp_test "$1" '${CHENEY_C89_OBJS}'
} }
gen_tests cheney_c89 "include/cheney_c89.h" gen_tests cheney_c89 "include/cheney_c89.h"
echo
printf "tests: %s\n" "$TEST_TARGETS" printf "tests: %s\n" "$TEST_TARGETS"
printf 'clean_tests:\n\t${RM} -f %s %s\n' "$TEST_TARGETS" "$GENERATED_OBJS" printf 'clean_tests:\n\t${RM} -f %s %s\n' "$TEST_TARGETS" "$GENERATED_OBJS"