/* Copyright (c) 2024, Peter McGoron * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1) Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include "uns.h" #include "uns_string.h" #include "cheney_c89.h" static struct uns_gc gc; static struct uns_ctr empty_list; enum token_type { T_EOF, LPAREN, RPAREN, QUOTE, QUASIQUOTE, UNQUOTE, UNQUOTE_LIST, T_IDENT, T_INT, T_FLOAT, T_STRING, T_DOT, TOKEN_NUM }; #if 0 static const char *token2string[TOKEN_NUM] = { "EOF", "(", ")", "'", "`", ",", ",@", "IDENT", "NUMBER_TOK", "STRING_TOK", "." }; #endif struct token { enum token_type typ; struct uns_ctr dat; double f; long i; }; static int is_ws(int c) { return c == '\n' || c == '\t' || c == '\r' || c == '\v' || c == ' '; } static int get_skipws(FILE *input) { int c; for (;;) { c = getc(input); if (c == ';') { do { c = getc(input); } while (c != '\n'); ungetc(c, input); continue; } if (is_ws(c)) continue; return c; } } static void tok_string(FILE *input, struct token *tok) { int c; tok->typ = T_STRING; uns_string_alloc(&gc, &tok->dat, 32); for (;;) { c = getc(input); switch (c) { case '\\': c = getc(input); switch (c) { case ' ': case '\t': case '\r': case '\n': case '\v': do { c = getc(input); } while (is_ws(c)); ungetc(c, input); continue; case 'r': uns_string_append_char(&gc, &tok->dat, '\r'); continue; case 'n': uns_string_append_char(&gc, &tok->dat, '\n'); continue; case 'v': uns_string_append_char(&gc, &tok->dat, '\v'); continue; case 't': uns_string_append_char(&gc, &tok->dat, '\t'); continue; case '"': uns_string_append_char(&gc, &tok->dat, '"'); continue; default: uns_string_append_char(&gc, &tok->dat, '\\'); uns_string_append_char(&gc, &tok->dat, c); } case '"': /* " */ return; } } } static int tonum(int c) { switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return c - '0'; default: return -1; } } static void tok_num(FILE *input, struct token *tok, int c) { int is_float = 0; uns_string_alloc(&gc, &tok->dat, 32); do { if (c == '.' || c == 'e' || c == 'E') is_float = 1; uns_string_append_char(&gc, &tok->dat, c); c = getc(input); } while (tonum(c) >= 0); ungetc(c, input); if (is_float) { tok->typ = T_FLOAT; tok->f = strtod(uns_string_cstring(&gc, &tok->dat), NULL); } else { tok->typ = T_INT; tok->i = strtol(uns_string_cstring(&gc, &tok->dat), NULL, 10); } tok->dat.p = NULL; } static int part_of_ident(int c) { return !is_ws(c) && c != '(' && c != ')' && c != '`' && c != '\'' && c != '\\' && c != '\"' && c != ',' && c != ';'; } static void tok_ident(FILE *input, struct token *tok, int c) { tok->typ = T_IDENT; uns_string_alloc(&gc, &tok->dat, 32); do { uns_string_append_char(&gc, &tok->dat, c); c = getc(input); } while (part_of_ident(c)); ungetc(c, input); } static void tokenize(FILE *input, struct token *tok) { int c = get_skipws(input); int c2; tok->dat.p = NULL; switch (c) { case EOF: tok->typ = T_EOF; return; case '(': tok->typ = LPAREN; return; case ')': tok->typ = RPAREN; return; case '\'': tok->typ = QUOTE; return; case '`': tok->typ = QUASIQUOTE; return; case ',': c = getc(input); if (c == '@') { tok->typ = UNQUOTE_LIST; } else { ungetc(c, input); tok->typ = UNQUOTE; } return; case '"': tok_string(input, tok); return; tok->typ = T_STRING; return; case '+': case '-': c2 = getc(input); ungetc(c2, input); if (tonum(c2) >= 0) { tok_num(input, tok, c); return; } /* FALLTHROUGH */ default: if (c == '.') { c2 = getc(input); /* Flatrate does not have floating point. */ if (part_of_ident(c2)) { tok_ident(input, tok, c); } else if (tonum(c2) >= 0) { tok_num(input, tok, c); } else { tok->typ = T_DOT; } } else if (tonum(c) >= 0) { tok_num(input, tok, c); } else { tok_ident(input, tok, c); } return; } } enum item_type { CELL, LAMBDA, INTEGER, FLOAT, STRING, SYMBOL, EMPTY_LIST, LISP_NULL }; static void alloc_of_type(struct uns_ctr *ctr, int typ) { void *p; int fields; int i; switch (typ) { case CELL: fields = 2; break; case LAMBDA: fields = 2; break; case INTEGER: fields = 1; break; case STRING: fields = 1; break; case SYMBOL: fields = 1; break; case EMPTY_LIST: fields = 0; break; case LISP_NULL: fields = 0; break; } ctr->p = gc.alloc_record(&gc, fields + 1); p = gc.alloc(&gc, sizeof(int)); memcpy(p, &typ, sizeof(int)); gc.record_set_ptr(ctr->p, 0, p); for (i = 0; i < fields; i++) gc.record_set_ptr(ctr->p, i + 1, NULL); } static int get_type(Uns_ptr p) { int typ; void *innerp; if (!p) return LISP_NULL; innerp = gc.record_get_ptr(p, 0); memcpy(&typ, innerp, sizeof(int)); return typ; } static void alloc_symbol_from_cstring(struct uns_ctr *ctr, const char *s, size_t slen) { struct uns_ctr str = {0}; uns_root_add(&gc, &str); uns_string_alloc(&gc, &str, slen); uns_string_append_bytes(&gc, &str, s, slen); alloc_of_type(ctr, SYMBOL); gc.record_set_ptr(ctr->p, 1, str.p); uns_root_remove(&gc, &str); } static void alloc_integer(struct uns_ctr *ctr, long l) { void *p; alloc_of_type(ctr, INTEGER); p = gc.alloc(&gc, sizeof(long)); memcpy(p, &l, sizeof(long)); gc.record_set_ptr(ctr->p, 1, p); } static void alloc_float(struct uns_ctr *ctr, double f) { void *p; alloc_of_type(ctr, FLOAT); p = gc.alloc(&gc, sizeof(double)); memcpy(p, &f, sizeof(double)); gc.record_set_ptr(ctr->p, 1, p); } static int expr_all(FILE *input, struct token *tok, struct uns_ctr *expr); static int list_expr(FILE *input, struct uns_ctr *expr, struct token *tok) { struct uns_ctr in_car = {0}; struct uns_ctr in_cdr = {0}; struct uns_ctr cur_head = {0}; int r = 1; uns_root_add(&gc, &in_car); uns_root_add(&gc, &in_cdr); uns_root_add(&gc, &cur_head); alloc_of_type(expr, CELL); cur_head.p = expr->p; for (;;) { if (!expr_all(input, tok, &in_car)) { r = 0; goto end; } gc.record_set_ptr(cur_head.p, 1, in_car.p); tokenize(input, tok); if (tok->typ == RPAREN) break; alloc_of_type(&in_cdr, CELL); if (tok->typ == T_DOT) { tokenize(input, tok); expr_all(input, tok, &in_cdr); gc.record_set_ptr(cur_head.p, 2, in_cdr.p); tokenize(input, tok); if (tok->typ != RPAREN) { r = 0; } goto end; } gc.record_set_ptr(cur_head.p, 2, in_cdr.p); cur_head.p = in_cdr.p; } gc.record_set_ptr(cur_head.p, 2, empty_list.p); end: uns_root_remove(&gc, &in_car); uns_root_remove(&gc, &in_cdr); uns_root_remove(&gc, &cur_head); return r; } static int surround_expr(FILE *input, struct uns_ctr *expr, struct token *tok, const char *name) { struct uns_ctr tmp = {0}; struct uns_ctr quoted = {0}; int r = 0; uns_root_add(&gc, &tmp); uns_root_add(&gc, "ed); alloc_of_type(expr, CELL); alloc_symbol_from_cstring(&tmp, name, strlen(name)); gc.record_set_ptr(expr->p, 1, tmp.p); alloc_of_type(&tmp, CELL); gc.record_set_ptr(expr->p, 2, tmp.p); tokenize(input, tok); if (expr_all(input, tok, "ed)) { r = 1; gc.record_set_ptr(tmp.p, 1, quoted.p); gc.record_set_ptr(tmp.p, 2, empty_list.p); } uns_root_remove(&gc, &tmp); uns_root_remove(&gc, "ed); return r; } /* expr does not call tokenizer directly: it acts on an input token */ static int expr_all(FILE *input, struct token *tok, struct uns_ctr *expr) { switch (tok->typ) { case LPAREN: tokenize(input, tok); if (tok->typ == RPAREN) expr->p = empty_list.p; else return list_expr(input, expr, tok); break; case QUOTE: return surround_expr(input, expr, tok, "quote"); case QUASIQUOTE: return surround_expr(input, expr, tok, "quasiquote"); case UNQUOTE: return surround_expr(input, expr, tok, "unquote"); case UNQUOTE_LIST: return surround_expr(input, expr, tok, "unquote-list"); case T_IDENT: alloc_of_type(expr, SYMBOL); gc.record_set_ptr(expr->p, 1, tok->dat.p); break; case T_INT: alloc_integer(expr, tok->i); break; case T_FLOAT: alloc_float(expr, tok->f); break; case T_STRING: alloc_of_type(expr, STRING); gc.record_set_ptr(expr->p, 1, tok->dat.p); break; case RPAREN: case T_EOF: case TOKEN_NUM: case T_DOT: return 0; } return 1; } static int read_next(FILE *input, struct uns_ctr *expr) { struct token tok = {0}; int r = 0; uns_root_add(&gc, &tok.dat); tokenize(input, &tok); expr->p = NULL; if (tok.typ != T_EOF) r = expr_all(input, &tok, expr); uns_root_remove(&gc, &tok.dat); return r; } static void oom(struct uns_gc *gc_) { (void)gc_; printf("oom\n"); abort(); } static void after_gc(struct uns_gc *gc_) { (void)gc_; fprintf(stderr, "The garbage collector has run %ld times\n" "\tbefore collection: %lu\n" "\tafter collection: %lu\n", gc.collection_number, gc.before_collection, gc.after_collection ); if (gc.after_collection >= gc.before_collection * 7/10) { fprintf(stderr, "\tincreasing\n"); gc.next_alloc *= 2; } } static void init_gc(void) { gc.next_alloc = 512; gc.oom = oom; gc.after_gc = after_gc; gc.ctx = malloc(uns_cheney_c89_ctx_size); if(!gc.ctx || !uns_cheney_c89_init(&gc)) exit(1); gc.next_alloc *= 2; uns_root_add(&gc, &empty_list); alloc_of_type(&empty_list, EMPTY_LIST); } static void display(struct uns_ctr *ctr) { struct uns_ctr tmp = {0}; long l; double f; if (!ctr->p) { printf(""); return; } switch (get_type(ctr->p)) { case CELL: uns_root_add(&gc, &tmp); printf("("); tmp.p = gc.record_get_ptr(ctr->p, 1); display(&tmp); printf(" . "); tmp.p = gc.record_get_ptr(ctr->p, 2); display(&tmp); printf(")"); uns_root_remove(&gc, &tmp); return; case INTEGER: memcpy(&l, gc.record_get_ptr(ctr->p, 1), sizeof(long)); printf("%ld ", l); return; case FLOAT: memcpy(&f, gc.record_get_ptr(ctr->p, 1), sizeof(float)); printf("%f ", f); return; case STRING: tmp.p = gc.record_get_ptr(ctr->p, 1); uns_root_add(&gc, &tmp); printf("\"%s\" ", uns_string_cstring(&gc, &tmp)); uns_root_remove(&gc, &tmp); return; case SYMBOL: tmp.p = gc.record_get_ptr(ctr->p, 1); uns_root_add(&gc, &tmp); printf("'%s ", uns_string_cstring(&gc, &tmp)); uns_root_remove(&gc, &tmp); return; case EMPTY_LIST: printf("'() "); return; default: abort(); } } int main(void) { struct uns_ctr expr = {0}; init_gc(); uns_root_add(&gc, &expr); while (read_next(stdin, &expr)) { display(&expr); printf("\n"); } uns_root_remove(&gc, &expr); uns_cheney_c89_deinit(&gc); return 0; }