596 lines
12 KiB
C
596 lines
12 KiB
C
/* Copyright (c) 2024, Peter McGoron
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* 1) Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2) Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
|
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include "uns.h"
|
|
#include "uns_string.h"
|
|
#include "cheney_c89.h"
|
|
|
|
static struct uns_gc gc;
|
|
static struct uns_ctr empty_list;
|
|
|
|
enum token_type {
|
|
T_EOF,
|
|
LPAREN,
|
|
RPAREN,
|
|
QUOTE,
|
|
QUASIQUOTE,
|
|
UNQUOTE,
|
|
UNQUOTE_LIST,
|
|
T_IDENT,
|
|
T_INT,
|
|
T_FLOAT,
|
|
T_STRING,
|
|
T_DOT,
|
|
TOKEN_NUM
|
|
};
|
|
|
|
#if 0
|
|
static const char *token2string[TOKEN_NUM] = {
|
|
"EOF",
|
|
"(",
|
|
")",
|
|
"'",
|
|
"`",
|
|
",",
|
|
",@",
|
|
"IDENT",
|
|
"NUMBER_TOK",
|
|
"STRING_TOK",
|
|
"."
|
|
};
|
|
#endif
|
|
|
|
struct token {
|
|
enum token_type typ;
|
|
struct uns_ctr dat;
|
|
|
|
double f;
|
|
long i;
|
|
};
|
|
|
|
static int is_ws(int c)
|
|
{
|
|
return c == '\n' || c == '\t' || c == '\r' || c == '\v' || c == ' ';
|
|
}
|
|
|
|
static int get_skipws(FILE *input)
|
|
{
|
|
int c;
|
|
|
|
for (;;) {
|
|
c = getc(input);
|
|
if (c == ';') {
|
|
do { c = getc(input); } while (c != '\n');
|
|
ungetc(c, input);
|
|
continue;
|
|
}
|
|
|
|
if (is_ws(c))
|
|
continue;
|
|
return c;
|
|
}
|
|
}
|
|
|
|
static void tok_string(FILE *input, struct token *tok)
|
|
{
|
|
int c;
|
|
|
|
tok->typ = T_STRING;
|
|
uns_string_alloc(&gc, &tok->dat, 32);
|
|
for (;;) {
|
|
c = getc(input);
|
|
switch (c) {
|
|
case '\\':
|
|
c = getc(input);
|
|
switch (c) {
|
|
case ' ': case '\t': case '\r': case '\n': case '\v':
|
|
do {
|
|
c = getc(input);
|
|
} while (is_ws(c));
|
|
ungetc(c, input);
|
|
continue;
|
|
case 'r':
|
|
uns_string_append_char(&gc, &tok->dat, '\r');
|
|
continue;
|
|
case 'n':
|
|
uns_string_append_char(&gc, &tok->dat, '\n');
|
|
continue;
|
|
case 'v':
|
|
uns_string_append_char(&gc, &tok->dat, '\v');
|
|
continue;
|
|
case 't':
|
|
uns_string_append_char(&gc, &tok->dat, '\t');
|
|
continue;
|
|
case '"':
|
|
uns_string_append_char(&gc, &tok->dat, '"');
|
|
continue;
|
|
default:
|
|
uns_string_append_char(&gc, &tok->dat, '\\');
|
|
uns_string_append_char(&gc, &tok->dat, c);
|
|
}
|
|
case '"': /* " */
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
static int tonum(int c)
|
|
{
|
|
switch (c) {
|
|
case '0': case '1': case '2': case '3': case '4': case '5':
|
|
case '6': case '7': case '8': case '9':
|
|
return c - '0';
|
|
default:
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
static void tok_num(FILE *input, struct token *tok, int c)
|
|
{
|
|
int is_float = 0;
|
|
|
|
uns_string_alloc(&gc, &tok->dat, 32);
|
|
do {
|
|
if (c == '.' || c == 'e' || c == 'E')
|
|
is_float = 1;
|
|
uns_string_append_char(&gc, &tok->dat, c);
|
|
c = getc(input);
|
|
} while (tonum(c) >= 0);
|
|
ungetc(c, input);
|
|
|
|
if (is_float) {
|
|
tok->typ = T_FLOAT;
|
|
tok->f = strtod(uns_string_cstring(&gc, &tok->dat), NULL);
|
|
} else {
|
|
tok->typ = T_INT;
|
|
tok->i = strtol(uns_string_cstring(&gc, &tok->dat), NULL, 10);
|
|
}
|
|
|
|
tok->dat.p = NULL;
|
|
}
|
|
|
|
static int part_of_ident(int c)
|
|
{
|
|
return !is_ws(c) && c != '(' && c != ')' && c != '`'
|
|
&& c != '\'' && c != '\\' && c != '\"' && c != ','
|
|
&& c != ';';
|
|
}
|
|
|
|
static void tok_ident(FILE *input, struct token *tok, int c)
|
|
{
|
|
tok->typ = T_IDENT;
|
|
|
|
uns_string_alloc(&gc, &tok->dat, 32);
|
|
do {
|
|
uns_string_append_char(&gc, &tok->dat, c);
|
|
c = getc(input);
|
|
} while (part_of_ident(c));
|
|
ungetc(c, input);
|
|
}
|
|
|
|
static void tokenize(FILE *input, struct token *tok)
|
|
{
|
|
int c = get_skipws(input);
|
|
int c2;
|
|
|
|
tok->dat.p = NULL;
|
|
|
|
switch (c) {
|
|
case EOF:
|
|
tok->typ = T_EOF;
|
|
return;
|
|
case '(':
|
|
tok->typ = LPAREN;
|
|
return;
|
|
case ')':
|
|
tok->typ = RPAREN;
|
|
return;
|
|
case '\'':
|
|
tok->typ = QUOTE;
|
|
return;
|
|
case '`':
|
|
tok->typ = QUASIQUOTE;
|
|
return;
|
|
case ',':
|
|
c = getc(input);
|
|
if (c == '@') {
|
|
tok->typ = UNQUOTE_LIST;
|
|
} else {
|
|
ungetc(c, input);
|
|
tok->typ = UNQUOTE;
|
|
}
|
|
return;
|
|
case '"':
|
|
tok_string(input, tok);
|
|
return;
|
|
tok->typ = T_STRING;
|
|
return;
|
|
case '+': case '-':
|
|
c2 = getc(input);
|
|
ungetc(c2, input);
|
|
if (tonum(c2) >= 0) {
|
|
tok_num(input, tok, c);
|
|
return;
|
|
}
|
|
/* FALLTHROUGH */
|
|
default:
|
|
if (c == '.') {
|
|
c2 = getc(input);
|
|
/* Flatrate does not have floating point. */
|
|
if (part_of_ident(c2)) {
|
|
tok_ident(input, tok, c);
|
|
} else if (tonum(c2) >= 0) {
|
|
tok_num(input, tok, c);
|
|
} else {
|
|
tok->typ = T_DOT;
|
|
}
|
|
} else if (tonum(c) >= 0) {
|
|
tok_num(input, tok, c);
|
|
} else {
|
|
tok_ident(input, tok, c);
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
|
|
enum item_type {
|
|
CELL,
|
|
LAMBDA,
|
|
INTEGER,
|
|
FLOAT,
|
|
STRING,
|
|
SYMBOL,
|
|
EMPTY_LIST,
|
|
LISP_NULL
|
|
};
|
|
|
|
static void alloc_of_type(struct uns_ctr *ctr, int typ)
|
|
{
|
|
void *p;
|
|
int fields;
|
|
int i;
|
|
|
|
switch (typ) {
|
|
case CELL: fields = 2; break;
|
|
case LAMBDA: fields = 2; break;
|
|
case INTEGER: fields = 1; break;
|
|
case STRING: fields = 1; break;
|
|
case SYMBOL: fields = 1; break;
|
|
case EMPTY_LIST: fields = 0; break;
|
|
case LISP_NULL: fields = 0; break;
|
|
}
|
|
|
|
ctr->p = gc.alloc_record(&gc, fields + 1);
|
|
p = gc.alloc(&gc, sizeof(int));
|
|
memcpy(p, &typ, sizeof(int));
|
|
gc.record_set_ptr(ctr->p, 0, p);
|
|
|
|
for (i = 0; i < fields; i++)
|
|
gc.record_set_ptr(ctr->p, i + 1, NULL);
|
|
}
|
|
|
|
static int get_type(Uns_ptr p)
|
|
{
|
|
int typ;
|
|
void *innerp;
|
|
|
|
if (!p)
|
|
return LISP_NULL;
|
|
innerp = gc.record_get_ptr(p, 0);
|
|
|
|
memcpy(&typ, innerp, sizeof(int));
|
|
return typ;
|
|
}
|
|
|
|
static void alloc_symbol_from_cstring(struct uns_ctr *ctr, const char *s, size_t slen)
|
|
{
|
|
struct uns_ctr str = {0};
|
|
|
|
uns_root_add(&gc, &str);
|
|
uns_string_alloc(&gc, &str, slen);
|
|
uns_string_append_bytes(&gc, &str, s, slen);
|
|
|
|
alloc_of_type(ctr, SYMBOL);
|
|
gc.record_set_ptr(ctr->p, 1, str.p);
|
|
|
|
uns_root_remove(&gc, &str);
|
|
}
|
|
|
|
static void alloc_integer(struct uns_ctr *ctr, long l)
|
|
{
|
|
void *p;
|
|
alloc_of_type(ctr, INTEGER);
|
|
|
|
p = gc.alloc(&gc, sizeof(long));
|
|
memcpy(p, &l, sizeof(long));
|
|
gc.record_set_ptr(ctr->p, 1, p);
|
|
}
|
|
|
|
static void alloc_float(struct uns_ctr *ctr, double f)
|
|
{
|
|
void *p;
|
|
alloc_of_type(ctr, FLOAT);
|
|
|
|
p = gc.alloc(&gc, sizeof(double));
|
|
memcpy(p, &f, sizeof(double));
|
|
gc.record_set_ptr(ctr->p, 1, p);
|
|
}
|
|
|
|
static int expr_all(FILE *input, struct token *tok, struct uns_ctr *expr);
|
|
|
|
static int list_expr(FILE *input, struct uns_ctr *expr, struct token *tok)
|
|
{
|
|
struct uns_ctr in_car = {0};
|
|
struct uns_ctr in_cdr = {0};
|
|
struct uns_ctr cur_head = {0};
|
|
int r = 1;
|
|
|
|
uns_root_add(&gc, &in_car);
|
|
uns_root_add(&gc, &in_cdr);
|
|
uns_root_add(&gc, &cur_head);
|
|
|
|
alloc_of_type(expr, CELL);
|
|
cur_head.p = expr->p;
|
|
|
|
for (;;) {
|
|
if (!expr_all(input, tok, &in_car)) {
|
|
r = 0;
|
|
goto end;
|
|
}
|
|
|
|
gc.record_set_ptr(cur_head.p, 1, in_car.p);
|
|
|
|
tokenize(input, tok);
|
|
if (tok->typ == RPAREN)
|
|
break;
|
|
|
|
alloc_of_type(&in_cdr, CELL);
|
|
|
|
if (tok->typ == T_DOT) {
|
|
tokenize(input, tok);
|
|
expr_all(input, tok, &in_cdr);
|
|
gc.record_set_ptr(cur_head.p, 2, in_cdr.p);
|
|
tokenize(input, tok);
|
|
if (tok->typ != RPAREN) {
|
|
r = 0;
|
|
}
|
|
|
|
goto end;
|
|
}
|
|
|
|
gc.record_set_ptr(cur_head.p, 2, in_cdr.p);
|
|
cur_head.p = in_cdr.p;
|
|
}
|
|
|
|
gc.record_set_ptr(cur_head.p, 2, empty_list.p);
|
|
end:
|
|
uns_root_remove(&gc, &in_car);
|
|
uns_root_remove(&gc, &in_cdr);
|
|
uns_root_remove(&gc, &cur_head);
|
|
return r;
|
|
}
|
|
|
|
static int surround_expr(FILE *input, struct uns_ctr *expr, struct token *tok, const char *name)
|
|
{
|
|
struct uns_ctr tmp = {0};
|
|
struct uns_ctr quoted = {0};
|
|
int r = 0;
|
|
|
|
uns_root_add(&gc, &tmp);
|
|
uns_root_add(&gc, "ed);
|
|
|
|
alloc_of_type(expr, CELL);
|
|
|
|
alloc_symbol_from_cstring(&tmp, name, strlen(name));
|
|
gc.record_set_ptr(expr->p, 1, tmp.p);
|
|
|
|
alloc_of_type(&tmp, CELL);
|
|
gc.record_set_ptr(expr->p, 2, tmp.p);
|
|
|
|
tokenize(input, tok);
|
|
if (expr_all(input, tok, "ed)) {
|
|
r = 1;
|
|
gc.record_set_ptr(tmp.p, 1, quoted.p);
|
|
gc.record_set_ptr(tmp.p, 2, empty_list.p);
|
|
}
|
|
|
|
uns_root_remove(&gc, &tmp);
|
|
uns_root_remove(&gc, "ed);
|
|
return r;
|
|
}
|
|
|
|
/* expr does not call tokenizer directly: it acts on an input token */
|
|
static int expr_all(FILE *input, struct token *tok, struct uns_ctr *expr)
|
|
{
|
|
switch (tok->typ) {
|
|
case LPAREN:
|
|
tokenize(input, tok);
|
|
if (tok->typ == RPAREN)
|
|
expr->p = empty_list.p;
|
|
else
|
|
return list_expr(input, expr, tok);
|
|
break;
|
|
case QUOTE:
|
|
return surround_expr(input, expr, tok, "quote");
|
|
case QUASIQUOTE:
|
|
return surround_expr(input, expr, tok, "quasiquote");
|
|
case UNQUOTE:
|
|
return surround_expr(input, expr, tok, "unquote");
|
|
case UNQUOTE_LIST:
|
|
return surround_expr(input, expr, tok, "unquote-list");
|
|
case T_IDENT:
|
|
alloc_of_type(expr, SYMBOL);
|
|
gc.record_set_ptr(expr->p, 1, tok->dat.p);
|
|
break;
|
|
case T_INT:
|
|
alloc_integer(expr, tok->i);
|
|
break;
|
|
case T_FLOAT:
|
|
alloc_float(expr, tok->f);
|
|
break;
|
|
case T_STRING:
|
|
alloc_of_type(expr, STRING);
|
|
gc.record_set_ptr(expr->p, 1, tok->dat.p);
|
|
break;
|
|
case RPAREN: case T_EOF: case TOKEN_NUM: case T_DOT:
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int read_next(FILE *input, struct uns_ctr *expr)
|
|
{
|
|
struct token tok = {0};
|
|
int r = 0;
|
|
|
|
uns_root_add(&gc, &tok.dat);
|
|
tokenize(input, &tok);
|
|
expr->p = NULL;
|
|
if (tok.typ != T_EOF)
|
|
r = expr_all(input, &tok, expr);
|
|
uns_root_remove(&gc, &tok.dat);
|
|
|
|
return r;
|
|
}
|
|
|
|
static void oom(struct uns_gc *gc_)
|
|
{
|
|
(void)gc_;
|
|
printf("oom\n");
|
|
abort();
|
|
}
|
|
|
|
static void after_gc(struct uns_gc *gc_)
|
|
{
|
|
(void)gc_;
|
|
|
|
fprintf(stderr,
|
|
"The garbage collector has run %ld times\n"
|
|
"\tbefore collection: %lu\n"
|
|
"\tafter collection: %lu\n",
|
|
gc.collection_number,
|
|
gc.before_collection,
|
|
gc.after_collection
|
|
);
|
|
|
|
|
|
if (gc.after_collection >= gc.before_collection * 7/10) {
|
|
fprintf(stderr, "\tincreasing\n");
|
|
gc.next_alloc *= 2;
|
|
}
|
|
}
|
|
|
|
static void init_gc(void)
|
|
{
|
|
gc.next_alloc = 512;
|
|
gc.oom = oom;
|
|
gc.after_gc = after_gc;
|
|
|
|
gc.ctx = malloc(uns_cheney_c89_ctx_size);
|
|
if(!gc.ctx || !uns_cheney_c89_init(&gc))
|
|
exit(1);
|
|
|
|
gc.next_alloc *= 2;
|
|
|
|
uns_root_add(&gc, &empty_list);
|
|
alloc_of_type(&empty_list, EMPTY_LIST);
|
|
}
|
|
|
|
static void display(struct uns_ctr *ctr)
|
|
{
|
|
struct uns_ctr tmp = {0};
|
|
long l;
|
|
double f;
|
|
|
|
if (!ctr->p) {
|
|
printf("<undefined>");
|
|
return;
|
|
}
|
|
|
|
switch (get_type(ctr->p)) {
|
|
case CELL:
|
|
uns_root_add(&gc, &tmp);
|
|
|
|
printf("(");
|
|
tmp.p = gc.record_get_ptr(ctr->p, 1);
|
|
display(&tmp);
|
|
|
|
printf(" . ");
|
|
|
|
tmp.p = gc.record_get_ptr(ctr->p, 2);
|
|
display(&tmp);
|
|
printf(")");
|
|
|
|
uns_root_remove(&gc, &tmp);
|
|
return;
|
|
case INTEGER:
|
|
memcpy(&l, gc.record_get_ptr(ctr->p, 1), sizeof(long));
|
|
printf("%ld ", l);
|
|
return;
|
|
case FLOAT:
|
|
memcpy(&f, gc.record_get_ptr(ctr->p, 1), sizeof(float));
|
|
printf("%f ", f);
|
|
return;
|
|
case STRING:
|
|
tmp.p = gc.record_get_ptr(ctr->p, 1);
|
|
uns_root_add(&gc, &tmp);
|
|
printf("\"%s\" ", uns_string_cstring(&gc, &tmp));
|
|
uns_root_remove(&gc, &tmp);
|
|
return;
|
|
case SYMBOL:
|
|
tmp.p = gc.record_get_ptr(ctr->p, 1);
|
|
uns_root_add(&gc, &tmp);
|
|
printf("'%s ", uns_string_cstring(&gc, &tmp));
|
|
uns_root_remove(&gc, &tmp);
|
|
return;
|
|
case EMPTY_LIST:
|
|
printf("'() ");
|
|
return;
|
|
default:
|
|
abort();
|
|
}
|
|
}
|
|
|
|
int main(void)
|
|
{
|
|
struct uns_ctr expr = {0};
|
|
|
|
init_gc();
|
|
uns_root_add(&gc, &expr);
|
|
|
|
while (read_next(stdin, &expr)) {
|
|
display(&expr);
|
|
printf("\n");
|
|
}
|
|
|
|
uns_root_remove(&gc, &expr);
|
|
uns_cheney_c89_deinit(&gc);
|
|
return 0;
|
|
}
|