From e300baf0db9a39aa16b61c5f8d9c74ec90b86480 Mon Sep 17 00:00:00 2001 From: Peter McGoron Date: Fri, 21 Jun 2024 00:22:44 -0400 Subject: [PATCH] parsing --- Makefile | 7 ++ main.c | 322 ++++++++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 290 insertions(+), 39 deletions(-) diff --git a/Makefile b/Makefile index cc596b2..106ffab 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,13 @@ +.PHONY: run UNIVERSAL_SERVICE_DIR=universalservice CFLAGS=-I${UNIVERSAL_SERVICE_DIR}/include -g -I${UNIVERSAL_SERVICE_DIR}/examples/string -std=c89 -Wall -pedantic -Werror LDFLAGS=-L${UNIVERSAL_SERVICE_DIR} -luniversalservice -L${UNIVERSAL_SERVICE_DIR}/examples/string -lunsstring flatrate: main.c $(CC) main.c -o flatrate $(CFLAGS) $(LDFLAGS) +run: flatrate + LD_LIBRARY_PATH="$$(pwd)/${UNIVERSAL_SERVICE_DIR}/:$$(pwd)/${UNIVERSAL_SERVICE_DIR}/examples/string" ./flatrate < prelude.scm +debug: flatrate + LD_LIBRARY_PATH="$$(pwd)/${UNIVERSAL_SERVICE_DIR}/:$$(pwd)/${UNIVERSAL_SERVICE_DIR}/examples/string" gdb -tui ./flatrate +valgrind: flatrate + LD_LIBRARY_PATH="$$(pwd)/${UNIVERSAL_SERVICE_DIR}/:$$(pwd)/${UNIVERSAL_SERVICE_DIR}/examples/string" valgrind ./flatrate < prelude.scm diff --git a/main.c b/main.c index 7ec3acb..b9560b0 100644 --- a/main.c +++ b/main.c @@ -31,6 +31,7 @@ #include "cheney_c89.h" static struct uns_gc gc; +static struct uns_ctr empty_list; enum token_type { T_EOF, @@ -40,12 +41,13 @@ enum token_type { QUASIQUOTE, UNQUOTE, UNQUOTE_LIST, - IDENT, - NUMBER, - STRING, + T_IDENT, + T_NUMBER, + T_STRING, TOKEN_NUM }; +#if 0 static const char *token2string[TOKEN_NUM] = { "EOF", "(", @@ -55,9 +57,10 @@ static const char *token2string[TOKEN_NUM] = { ",", ",@", "IDENT", - "NUMBER", - "STRING" + "NUMBER_TOK", + "STRING_TOK" }; +#endif struct token { enum token_type typ; @@ -92,7 +95,7 @@ static void tok_string(FILE *input, struct token *tok) { int c; - tok->typ = STRING; + tok->typ = T_STRING; uns_string_alloc(&gc, &tok->dat, 32); for (;;) { c = getc(input); @@ -146,7 +149,7 @@ static void tok_num(FILE *input, struct token *tok, int mul) { int c = getc(input); - tok->typ = NUMBER; + tok->typ = T_NUMBER; uns_string_alloc(&gc, &tok->dat, 32); do { uns_string_append_char(&gc, &tok->dat, c); @@ -167,7 +170,7 @@ static int part_of_ident(int c) static void tok_ident(FILE *input, struct token *tok, int c) { - tok->typ = IDENT; + tok->typ = T_IDENT; uns_string_alloc(&gc, &tok->dat, 32); do { @@ -228,61 +231,302 @@ static void tokenize(FILE *input, struct token *tok) } } +enum item_type { + CELL, + LAMBDA, + INTEGER, + STRING, + SYMBOL, + EMPTY_LIST, + LISP_NULL +}; + +static void alloc_of_type(struct uns_ctr *ctr, int typ) +{ + void *p; + int fields; + int i; + + switch (typ) { + case CELL: fields = 2; break; + case LAMBDA: fields = 2; break; + case INTEGER: fields = 1; break; + case STRING: fields = 1; break; + case SYMBOL: fields = 1; break; + case EMPTY_LIST: fields = 0; break; + case LISP_NULL: fields = 0; break; + } + + ctr->p = gc.alloc_record(&gc, fields + 1); + p = gc.alloc(&gc, sizeof(int)); + memcpy(p, &typ, sizeof(int)); + gc.record_set_ptr(ctr->p, 0, p); + + for (i = 0; i < fields; i++) + gc.record_set_ptr(ctr->p, i + 1, NULL); +} + +static int get_type(Uns_ptr p) +{ + int typ; + void *innerp; + + if (!p) + return LISP_NULL; + innerp = gc.record_get_ptr(p, 0); + + memcpy(&typ, innerp, sizeof(int)); + return typ; +} + +static void alloc_symbol_from_cstring(struct uns_ctr *ctr, const char *s, size_t slen) +{ + struct uns_ctr str = {0}; + + uns_root_add(&gc, &str); + uns_string_alloc(&gc, &str, slen); + uns_string_append_bytes(&gc, &str, s, slen); + + alloc_of_type(ctr, SYMBOL); + gc.record_set_ptr(ctr->p, 1, str.p); + + uns_root_remove(&gc, &str); +} + +static void alloc_integer(struct uns_ctr *ctr, long l) +{ + void *p; + alloc_of_type(ctr, INTEGER); + + p = gc.alloc(&gc, sizeof(long)); + memcpy(p, &l, sizeof(long)); + gc.record_set_ptr(ctr->p, 1, p); +} + +static int expr_all(FILE *input, struct token *tok, struct uns_ctr *expr); + +static int list_expr(FILE *input, struct uns_ctr *expr, struct token *tok) +{ + struct uns_ctr in_car = {0}; + struct uns_ctr in_cdr = {0}; + struct uns_ctr cur_head = {0}; + int r = 1; + + uns_root_add(&gc, &in_car); + uns_root_add(&gc, &in_cdr); + uns_root_add(&gc, &cur_head); + + alloc_of_type(expr, CELL); + cur_head.p = expr->p; + + for (;;) { + if (!expr_all(input, tok, &in_car)) { + r = 0; + goto end; + } + + gc.record_set_ptr(cur_head.p, 1, in_car.p); + + tokenize(input, tok); + if (tok->typ == RPAREN) + break; + + alloc_of_type(&in_cdr, CELL); + gc.record_set_ptr(cur_head.p, 2, in_cdr.p); + cur_head.p = in_cdr.p; + } + + gc.record_set_ptr(cur_head.p, 2, empty_list.p); +end: + uns_root_remove(&gc, &in_car); + uns_root_remove(&gc, &in_cdr); + uns_root_remove(&gc, &cur_head); + return r; +} + +static int surround_expr(FILE *input, struct uns_ctr *expr, struct token *tok, const char *name) +{ + struct uns_ctr tmp = {0}; + struct uns_ctr quoted = {0}; + int r = 0; + + uns_root_add(&gc, &tmp); + uns_root_add(&gc, "ed); + + alloc_of_type(expr, CELL); + + alloc_symbol_from_cstring(&tmp, name, strlen(name)); + gc.record_set_ptr(expr->p, 1, tmp.p); + + alloc_of_type(&tmp, CELL); + gc.record_set_ptr(expr->p, 2, tmp.p); + + tokenize(input, tok); + if (expr_all(input, tok, "ed)) { + r = 1; + gc.record_set_ptr(tmp.p, 1, quoted.p); + gc.record_set_ptr(tmp.p, 2, empty_list.p); + } + + uns_root_remove(&gc, &tmp); + uns_root_remove(&gc, "ed); + return r; +} + +/* expr does not call tokenizer directly: it acts on an input token */ +static int expr_all(FILE *input, struct token *tok, struct uns_ctr *expr) +{ + switch (tok->typ) { + case LPAREN: + tokenize(input, tok); + if (tok->typ == RPAREN) + expr->p = empty_list.p; + else + return list_expr(input, expr, tok); + break; + case QUOTE: + return surround_expr(input, expr, tok, "quote"); + case QUASIQUOTE: + return surround_expr(input, expr, tok, "quasiquote"); + case UNQUOTE: + return surround_expr(input, expr, tok, "unquote"); + case UNQUOTE_LIST: + return surround_expr(input, expr, tok, "unquote-list"); + case T_IDENT: + alloc_of_type(expr, SYMBOL); + gc.record_set_ptr(expr->p, 1, tok->dat.p); + break; + case T_NUMBER: + alloc_integer(expr, tok->i); + break; + case T_STRING: + alloc_of_type(expr, STRING); + gc.record_set_ptr(expr->p, 1, tok->dat.p); + break; + case RPAREN: case T_EOF: case TOKEN_NUM: + return 0; + } + + return 1; +} + +static int read_next(FILE *input, struct uns_ctr *expr) +{ + struct token tok = {0}; + int r = 0; + + uns_root_add(&gc, &tok.dat); + tokenize(input, &tok); + expr->p = NULL; + if (tok.typ != T_EOF) + r = expr_all(input, &tok, expr); + uns_root_remove(&gc, &tok.dat); + + return r; +} + static void oom(struct uns_gc *gc_) { (void)gc_; printf("oom\n"); - exit(1); + abort(); } static void after_gc(struct uns_gc *gc_) { (void)gc_; - if (gc.after_collection >= gc.next_alloc * 7/10) - gc.next_alloc *= 2; + fprintf(stderr, + "The garbage collector has run %ld times\n" + "\tbefore collection: %lu\n" + "\tafter collection: %lu\n", + gc.collection_number, + gc.before_collection, + gc.after_collection + ); + + + gc.next_alloc *= 2; } -int main(void) +static void init_gc(void) { - FILE *input = stdin; - struct token tok; - int indent = 0; - int i; - gc.next_alloc = 512; gc.oom = oom; gc.after_gc = after_gc; gc.ctx = malloc(uns_cheney_c89_ctx_size); if(!gc.ctx || !uns_cheney_c89_init(&gc)) - return 1; - uns_root_add(&gc, &tok.dat); + exit(1); + gc.next_alloc *= 2; - do { - tokenize(input, &tok); - if (tok.typ == LPAREN) - indent++; - else if (tok.typ == RPAREN) - indent--; + uns_root_add(&gc, &empty_list); + alloc_of_type(&empty_list, EMPTY_LIST); +} - for (i = 0; i < indent; i++) - printf(" "); +static void display(struct uns_ctr *ctr) +{ + struct uns_ctr tmp = {0}; + long l; - printf("%s ", token2string[tok.typ]); - switch (tok.typ) { - case STRING: case IDENT: - printf("[%s] ", uns_string_cstring(&gc, &tok.dat)); - break; - case NUMBER: - printf("[%ld] ", tok.i); - break; - default: - break; - } + if (!ctr->p) { + printf("__null"); + return; + } + + switch (get_type(ctr->p)) { + case CELL: + uns_root_add(&gc, &tmp); + + printf("("); + tmp.p = gc.record_get_ptr(ctr->p, 1); + display(&tmp); + + printf(" . "); + + tmp.p = gc.record_get_ptr(ctr->p, 2); + display(&tmp); + printf(")"); + + uns_root_remove(&gc, &tmp); + return; + case INTEGER: + memcpy(&l, gc.record_get_ptr(ctr->p, 1), sizeof(long)); + printf("%ld ", l); + return; + case STRING: + tmp.p = gc.record_get_ptr(ctr->p, 1); + uns_root_add(&gc, &tmp); + printf("\"%s\" ", uns_string_cstring(&gc, &tmp)); + uns_root_remove(&gc, &tmp); + return; + case SYMBOL: + tmp.p = gc.record_get_ptr(ctr->p, 1); + uns_root_add(&gc, &tmp); + printf("'%s ", uns_string_cstring(&gc, &tmp)); + uns_root_remove(&gc, &tmp); + return; + case EMPTY_LIST: + printf("'() "); + return; + default: + abort(); + } +} + +int main(void) +{ + struct uns_ctr expr = {0}; + + init_gc(); + uns_root_add(&gc, &expr); + + while (read_next(stdin, &expr)) { + display(&expr); printf("\n"); - } while (tok.typ != T_EOF); + } + uns_root_remove(&gc, &expr); uns_cheney_c89_deinit(&gc); return 0; }