aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Peter McGoron 2024-06-21 00:22:44 -0400
committerGravatar Peter McGoron 2024-06-21 00:22:44 -0400
commite300baf0db9a39aa16b61c5f8d9c74ec90b86480 (patch)
treef55b340875ad7d5e05016ac748fb29d22b1e8ef5
parenttokenizer (diff)
parsing
-rw-r--r--Makefile7
-rw-r--r--main.c326
2 files changed, 292 insertions, 41 deletions
diff --git a/Makefile b/Makefile
index cc596b2..106ffab 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,13 @@
+.PHONY: run
UNIVERSAL_SERVICE_DIR=universalservice
CFLAGS=-I${UNIVERSAL_SERVICE_DIR}/include -g -I${UNIVERSAL_SERVICE_DIR}/examples/string -std=c89 -Wall -pedantic -Werror
LDFLAGS=-L${UNIVERSAL_SERVICE_DIR} -luniversalservice -L${UNIVERSAL_SERVICE_DIR}/examples/string -lunsstring
flatrate: main.c
$(CC) main.c -o flatrate $(CFLAGS) $(LDFLAGS)
+run: flatrate
+ LD_LIBRARY_PATH="$$(pwd)/${UNIVERSAL_SERVICE_DIR}/:$$(pwd)/${UNIVERSAL_SERVICE_DIR}/examples/string" ./flatrate < prelude.scm
+debug: flatrate
+ LD_LIBRARY_PATH="$$(pwd)/${UNIVERSAL_SERVICE_DIR}/:$$(pwd)/${UNIVERSAL_SERVICE_DIR}/examples/string" gdb -tui ./flatrate
+valgrind: flatrate
+ LD_LIBRARY_PATH="$$(pwd)/${UNIVERSAL_SERVICE_DIR}/:$$(pwd)/${UNIVERSAL_SERVICE_DIR}/examples/string" valgrind ./flatrate < prelude.scm
diff --git a/main.c b/main.c
index 7ec3acb..b9560b0 100644
--- a/main.c
+++ b/main.c
@@ -31,6 +31,7 @@
#include "cheney_c89.h"
static struct uns_gc gc;
+static struct uns_ctr empty_list;
enum token_type {
T_EOF,
@@ -40,12 +41,13 @@ enum token_type {
QUASIQUOTE,
UNQUOTE,
UNQUOTE_LIST,
- IDENT,
- NUMBER,
- STRING,
+ T_IDENT,
+ T_NUMBER,
+ T_STRING,
TOKEN_NUM
};
+#if 0
static const char *token2string[TOKEN_NUM] = {
"EOF",
"(",
@@ -55,9 +57,10 @@ static const char *token2string[TOKEN_NUM] = {
",",
",@",
"IDENT",
- "NUMBER",
- "STRING"
+ "NUMBER_TOK",
+ "STRING_TOK"
};
+#endif
struct token {
enum token_type typ;
@@ -92,7 +95,7 @@ static void tok_string(FILE *input, struct token *tok)
{
int c;
- tok->typ = STRING;
+ tok->typ = T_STRING;
uns_string_alloc(&gc, &tok->dat, 32);
for (;;) {
c = getc(input);
@@ -146,7 +149,7 @@ static void tok_num(FILE *input, struct token *tok, int mul)
{
int c = getc(input);
- tok->typ = NUMBER;
+ tok->typ = T_NUMBER;
uns_string_alloc(&gc, &tok->dat, 32);
do {
uns_string_append_char(&gc, &tok->dat, c);
@@ -167,7 +170,7 @@ static int part_of_ident(int c)
static void tok_ident(FILE *input, struct token *tok, int c)
{
- tok->typ = IDENT;
+ tok->typ = T_IDENT;
uns_string_alloc(&gc, &tok->dat, 32);
do {
@@ -228,61 +231,302 @@ static void tokenize(FILE *input, struct token *tok)
}
}
+enum item_type {
+ CELL,
+ LAMBDA,
+ INTEGER,
+ STRING,
+ SYMBOL,
+ EMPTY_LIST,
+ LISP_NULL
+};
+
+static void alloc_of_type(struct uns_ctr *ctr, int typ)
+{
+ void *p;
+ int fields;
+ int i;
+
+ switch (typ) {
+ case CELL: fields = 2; break;
+ case LAMBDA: fields = 2; break;
+ case INTEGER: fields = 1; break;
+ case STRING: fields = 1; break;
+ case SYMBOL: fields = 1; break;
+ case EMPTY_LIST: fields = 0; break;
+ case LISP_NULL: fields = 0; break;
+ }
+
+ ctr->p = gc.alloc_record(&gc, fields + 1);
+ p = gc.alloc(&gc, sizeof(int));
+ memcpy(p, &typ, sizeof(int));
+ gc.record_set_ptr(ctr->p, 0, p);
+
+ for (i = 0; i < fields; i++)
+ gc.record_set_ptr(ctr->p, i + 1, NULL);
+}
+
+static int get_type(Uns_ptr p)
+{
+ int typ;
+ void *innerp;
+
+ if (!p)
+ return LISP_NULL;
+ innerp = gc.record_get_ptr(p, 0);
+
+ memcpy(&typ, innerp, sizeof(int));
+ return typ;
+}
+
+static void alloc_symbol_from_cstring(struct uns_ctr *ctr, const char *s, size_t slen)
+{
+ struct uns_ctr str = {0};
+
+ uns_root_add(&gc, &str);
+ uns_string_alloc(&gc, &str, slen);
+ uns_string_append_bytes(&gc, &str, s, slen);
+
+ alloc_of_type(ctr, SYMBOL);
+ gc.record_set_ptr(ctr->p, 1, str.p);
+
+ uns_root_remove(&gc, &str);
+}
+
+static void alloc_integer(struct uns_ctr *ctr, long l)
+{
+ void *p;
+ alloc_of_type(ctr, INTEGER);
+
+ p = gc.alloc(&gc, sizeof(long));
+ memcpy(p, &l, sizeof(long));
+ gc.record_set_ptr(ctr->p, 1, p);
+}
+
+static int expr_all(FILE *input, struct token *tok, struct uns_ctr *expr);
+
+static int list_expr(FILE *input, struct uns_ctr *expr, struct token *tok)
+{
+ struct uns_ctr in_car = {0};
+ struct uns_ctr in_cdr = {0};
+ struct uns_ctr cur_head = {0};
+ int r = 1;
+
+ uns_root_add(&gc, &in_car);
+ uns_root_add(&gc, &in_cdr);
+ uns_root_add(&gc, &cur_head);
+
+ alloc_of_type(expr, CELL);
+ cur_head.p = expr->p;
+
+ for (;;) {
+ if (!expr_all(input, tok, &in_car)) {
+ r = 0;
+ goto end;
+ }
+
+ gc.record_set_ptr(cur_head.p, 1, in_car.p);
+
+ tokenize(input, tok);
+ if (tok->typ == RPAREN)
+ break;
+
+ alloc_of_type(&in_cdr, CELL);
+ gc.record_set_ptr(cur_head.p, 2, in_cdr.p);
+ cur_head.p = in_cdr.p;
+ }
+
+ gc.record_set_ptr(cur_head.p, 2, empty_list.p);
+end:
+ uns_root_remove(&gc, &in_car);
+ uns_root_remove(&gc, &in_cdr);
+ uns_root_remove(&gc, &cur_head);
+ return r;
+}
+
+static int surround_expr(FILE *input, struct uns_ctr *expr, struct token *tok, const char *name)
+{
+ struct uns_ctr tmp = {0};
+ struct uns_ctr quoted = {0};
+ int r = 0;
+
+ uns_root_add(&gc, &tmp);
+ uns_root_add(&gc, &quoted);
+
+ alloc_of_type(expr, CELL);
+
+ alloc_symbol_from_cstring(&tmp, name, strlen(name));
+ gc.record_set_ptr(expr->p, 1, tmp.p);
+
+ alloc_of_type(&tmp, CELL);
+ gc.record_set_ptr(expr->p, 2, tmp.p);
+
+ tokenize(input, tok);
+ if (expr_all(input, tok, &quoted)) {
+ r = 1;
+ gc.record_set_ptr(tmp.p, 1, quoted.p);
+ gc.record_set_ptr(tmp.p, 2, empty_list.p);
+ }
+
+ uns_root_remove(&gc, &tmp);
+ uns_root_remove(&gc, &quoted);
+ return r;
+}
+
+/* expr does not call tokenizer directly: it acts on an input token */
+static int expr_all(FILE *input, struct token *tok, struct uns_ctr *expr)
+{
+ switch (tok->typ) {
+ case LPAREN:
+ tokenize(input, tok);
+ if (tok->typ == RPAREN)
+ expr->p = empty_list.p;
+ else
+ return list_expr(input, expr, tok);
+ break;
+ case QUOTE:
+ return surround_expr(input, expr, tok, "quote");
+ case QUASIQUOTE:
+ return surround_expr(input, expr, tok, "quasiquote");
+ case UNQUOTE:
+ return surround_expr(input, expr, tok, "unquote");
+ case UNQUOTE_LIST:
+ return surround_expr(input, expr, tok, "unquote-list");
+ case T_IDENT:
+ alloc_of_type(expr, SYMBOL);
+ gc.record_set_ptr(expr->p, 1, tok->dat.p);
+ break;
+ case T_NUMBER:
+ alloc_integer(expr, tok->i);
+ break;
+ case T_STRING:
+ alloc_of_type(expr, STRING);
+ gc.record_set_ptr(expr->p, 1, tok->dat.p);
+ break;
+ case RPAREN: case T_EOF: case TOKEN_NUM:
+ return 0;
+ }
+
+ return 1;
+}
+
+static int read_next(FILE *input, struct uns_ctr *expr)
+{
+ struct token tok = {0};
+ int r = 0;
+
+ uns_root_add(&gc, &tok.dat);
+ tokenize(input, &tok);
+ expr->p = NULL;
+ if (tok.typ != T_EOF)
+ r = expr_all(input, &tok, expr);
+ uns_root_remove(&gc, &tok.dat);
+
+ return r;
+}
+
static void oom(struct uns_gc *gc_)
{
(void)gc_;
printf("oom\n");
- exit(1);
+ abort();
}
static void after_gc(struct uns_gc *gc_)
{
(void)gc_;
- if (gc.after_collection >= gc.next_alloc * 7/10)
- gc.next_alloc *= 2;
+ fprintf(stderr,
+ "The garbage collector has run %ld times\n"
+ "\tbefore collection: %lu\n"
+ "\tafter collection: %lu\n",
+ gc.collection_number,
+ gc.before_collection,
+ gc.after_collection
+ );
+
+
+ gc.next_alloc *= 2;
}
-int main(void)
+static void init_gc(void)
{
- FILE *input = stdin;
- struct token tok;
- int indent = 0;
- int i;
-
gc.next_alloc = 512;
gc.oom = oom;
gc.after_gc = after_gc;
gc.ctx = malloc(uns_cheney_c89_ctx_size);
if(!gc.ctx || !uns_cheney_c89_init(&gc))
- return 1;
- uns_root_add(&gc, &tok.dat);
+ exit(1);
+ gc.next_alloc *= 2;
- do {
- tokenize(input, &tok);
- if (tok.typ == LPAREN)
- indent++;
- else if (tok.typ == RPAREN)
- indent--;
-
- for (i = 0; i < indent; i++)
- printf(" ");
-
- printf("%s ", token2string[tok.typ]);
- switch (tok.typ) {
- case STRING: case IDENT:
- printf("[%s] ", uns_string_cstring(&gc, &tok.dat));
- break;
- case NUMBER:
- printf("[%ld] ", tok.i);
- break;
- default:
- break;
- }
+ uns_root_add(&gc, &empty_list);
+ alloc_of_type(&empty_list, EMPTY_LIST);
+}
+
+static void display(struct uns_ctr *ctr)
+{
+ struct uns_ctr tmp = {0};
+ long l;
+
+ if (!ctr->p) {
+ printf("__null");
+ return;
+ }
+
+ switch (get_type(ctr->p)) {
+ case CELL:
+ uns_root_add(&gc, &tmp);
+
+ printf("(");
+ tmp.p = gc.record_get_ptr(ctr->p, 1);
+ display(&tmp);
+
+ printf(" . ");
+
+ tmp.p = gc.record_get_ptr(ctr->p, 2);
+ display(&tmp);
+ printf(")");
+
+ uns_root_remove(&gc, &tmp);
+ return;
+ case INTEGER:
+ memcpy(&l, gc.record_get_ptr(ctr->p, 1), sizeof(long));
+ printf("%ld ", l);
+ return;
+ case STRING:
+ tmp.p = gc.record_get_ptr(ctr->p, 1);
+ uns_root_add(&gc, &tmp);
+ printf("\"%s\" ", uns_string_cstring(&gc, &tmp));
+ uns_root_remove(&gc, &tmp);
+ return;
+ case SYMBOL:
+ tmp.p = gc.record_get_ptr(ctr->p, 1);
+ uns_root_add(&gc, &tmp);
+ printf("'%s ", uns_string_cstring(&gc, &tmp));
+ uns_root_remove(&gc, &tmp);
+ return;
+ case EMPTY_LIST:
+ printf("'() ");
+ return;
+ default:
+ abort();
+ }
+}
+
+int main(void)
+{
+ struct uns_ctr expr = {0};
+
+ init_gc();
+ uns_root_add(&gc, &expr);
+
+ while (read_next(stdin, &expr)) {
+ display(&expr);
printf("\n");
- } while (tok.typ != T_EOF);
+ }
+ uns_root_remove(&gc, &expr);
uns_cheney_c89_deinit(&gc);
return 0;
}