aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Peter McGoron 2024-06-19 23:27:14 -0400
committerGravatar Peter McGoron 2024-06-19 23:27:14 -0400
commitc573be476bc67892191aee937226ffcd590aa1db (patch)
treeb95137e2e5a48d5eda77a1769b3b06b836e66cc0
tokenizer
-rw-r--r--Makefile6
-rw-r--r--README.rst8
-rw-r--r--main.c288
-rw-r--r--prelude.scm203
4 files changed, 505 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..cc596b2
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,6 @@
+UNIVERSAL_SERVICE_DIR=universalservice
+CFLAGS=-I${UNIVERSAL_SERVICE_DIR}/include -g -I${UNIVERSAL_SERVICE_DIR}/examples/string -std=c89 -Wall -pedantic -Werror
+LDFLAGS=-L${UNIVERSAL_SERVICE_DIR} -luniversalservice -L${UNIVERSAL_SERVICE_DIR}/examples/string -lunsstring
+
+flatrate: main.c
+ $(CC) main.c -o flatrate $(CFLAGS) $(LDFLAGS)
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..e531dc1
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,8 @@
+=============
+Flatrate LISP
+=============
+
+Flatrate is a minimalistic LISP. It's designed to test out my garbage
+collectors.
+
+The default macro system is non-hygenic.
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..7ec3acb
--- /dev/null
+++ b/main.c
@@ -0,0 +1,288 @@
+/* Copyright (c) 2024, Peter McGoron
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1) Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "uns.h"
+#include "uns_string.h"
+#include "cheney_c89.h"
+
+static struct uns_gc gc;
+
+enum token_type {
+ T_EOF,
+ LPAREN,
+ RPAREN,
+ QUOTE,
+ QUASIQUOTE,
+ UNQUOTE,
+ UNQUOTE_LIST,
+ IDENT,
+ NUMBER,
+ STRING,
+ TOKEN_NUM
+};
+
+static const char *token2string[TOKEN_NUM] = {
+ "EOF",
+ "(",
+ ")",
+ "'",
+ "`",
+ ",",
+ ",@",
+ "IDENT",
+ "NUMBER",
+ "STRING"
+};
+
+struct token {
+ enum token_type typ;
+ struct uns_ctr dat;
+ long i;
+};
+
+static int is_ws(int c)
+{
+ return c == '\n' || c == '\t' || c == '\r' || c == '\v' || c == ' ';
+}
+
+static int get_skipws(FILE *input)
+{
+ int c;
+
+ for (;;) {
+ c = getc(input);
+ if (c == ';') {
+ do { c = getc(input); } while (c != '\n');
+ ungetc(c, input);
+ continue;
+ }
+
+ if (is_ws(c))
+ continue;
+ return c;
+ }
+}
+
+static void tok_string(FILE *input, struct token *tok)
+{
+ int c;
+
+ tok->typ = STRING;
+ uns_string_alloc(&gc, &tok->dat, 32);
+ for (;;) {
+ c = getc(input);
+ switch (c) {
+ case '\\':
+ c = getc(input);
+ switch (c) {
+ case ' ': case '\t': case '\r': case '\n': case '\v':
+ do {
+ c = getc(input);
+ } while (is_ws(c));
+ ungetc(c, input);
+ continue;
+ case 'r':
+ uns_string_append_char(&gc, &tok->dat, '\r');
+ continue;
+ case 'n':
+ uns_string_append_char(&gc, &tok->dat, '\n');
+ continue;
+ case 'v':
+ uns_string_append_char(&gc, &tok->dat, '\v');
+ continue;
+ case 't':
+ uns_string_append_char(&gc, &tok->dat, '\t');
+ continue;
+ case '"':
+ uns_string_append_char(&gc, &tok->dat, '"');
+ continue;
+ default:
+ uns_string_append_char(&gc, &tok->dat, '\\');
+ uns_string_append_char(&gc, &tok->dat, c);
+ }
+ case '"': /* " */
+ return;
+ }
+ }
+}
+
+static int tonum(int c)
+{
+ switch (c) {
+ case '0': case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ return c - '0';
+ default:
+ return -1;
+ }
+}
+
+static void tok_num(FILE *input, struct token *tok, int mul)
+{
+ int c = getc(input);
+
+ tok->typ = NUMBER;
+ uns_string_alloc(&gc, &tok->dat, 32);
+ do {
+ uns_string_append_char(&gc, &tok->dat, c);
+ c = getc(input);
+ } while (tonum(c) >= 0);
+ ungetc(c, input);
+
+ tok->i = mul*strtol(uns_string_cstring(&gc, &tok->dat), NULL, 10);
+ tok->dat.p = NULL;
+}
+
+static int part_of_ident(int c)
+{
+ return !is_ws(c) && c != '(' && c != ')' && c != '`'
+ && c != '\'' && c != '\\' && c != '\"' && c != ','
+ && c != ';';
+}
+
+static void tok_ident(FILE *input, struct token *tok, int c)
+{
+ tok->typ = IDENT;
+
+ uns_string_alloc(&gc, &tok->dat, 32);
+ do {
+ uns_string_append_char(&gc, &tok->dat, c);
+ c = getc(input);
+ } while (part_of_ident(c));
+ ungetc(c, input);
+}
+
+static void tokenize(FILE *input, struct token *tok)
+{
+ int c = get_skipws(input);
+ int c2;
+
+ tok->dat.p = NULL;
+
+ switch (c) {
+ case EOF:
+ tok->typ = T_EOF;
+ return;
+ case '(':
+ tok->typ = LPAREN;
+ return;
+ case ')':
+ tok->typ = RPAREN;
+ return;
+ case '\'':
+ tok->typ = QUOTE;
+ return;
+ case '`':
+ tok->typ = QUASIQUOTE;
+ return;
+ case ',':
+ c = getc(input);
+ if (c == '@') {
+ tok->typ = UNQUOTE_LIST;
+ } else {
+ ungetc(c, input);
+ tok->typ = UNQUOTE;
+ }
+ return;
+ case '"':
+ tok_string(input, tok);
+ return;
+ case '+': case '-':
+ c2 = getc(input);
+ ungetc(c2, input);
+ if (tonum(c2) >= 0) {
+ tok_num(input, tok, c == '+' ? 1 : -1);
+ return;
+ }
+ default:
+ if (tonum(c) >= 0)
+ tok_num(input, tok, 1);
+ else
+ tok_ident(input, tok, c);
+ return;
+ }
+}
+
+static void oom(struct uns_gc *gc_)
+{
+ (void)gc_;
+ printf("oom\n");
+ exit(1);
+}
+
+static void after_gc(struct uns_gc *gc_)
+{
+ (void)gc_;
+
+ if (gc.after_collection >= gc.next_alloc * 7/10)
+ gc.next_alloc *= 2;
+}
+
+int main(void)
+{
+ FILE *input = stdin;
+ struct token tok;
+ int indent = 0;
+ int i;
+
+ gc.next_alloc = 512;
+ gc.oom = oom;
+ gc.after_gc = after_gc;
+
+ gc.ctx = malloc(uns_cheney_c89_ctx_size);
+ if(!gc.ctx || !uns_cheney_c89_init(&gc))
+ return 1;
+ uns_root_add(&gc, &tok.dat);
+
+ do {
+ tokenize(input, &tok);
+ if (tok.typ == LPAREN)
+ indent++;
+ else if (tok.typ == RPAREN)
+ indent--;
+
+ for (i = 0; i < indent; i++)
+ printf(" ");
+
+ printf("%s ", token2string[tok.typ]);
+ switch (tok.typ) {
+ case STRING: case IDENT:
+ printf("[%s] ", uns_string_cstring(&gc, &tok.dat));
+ break;
+ case NUMBER:
+ printf("[%ld] ", tok.i);
+ break;
+ default:
+ break;
+ }
+ printf("\n");
+ } while (tok.typ != T_EOF);
+
+ uns_cheney_c89_deinit(&gc);
+ return 0;
+}
diff --git a/prelude.scm b/prelude.scm
new file mode 100644
index 0000000..097ca64
--- /dev/null
+++ b/prelude.scm
@@ -0,0 +1,203 @@
+; Copyright (c) 2024, Peter McGoron
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+;
+; 1) Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; 2) Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in the
+; documentation and/or other materials provided with the distribution.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+; Primitives are special syntatic forms. They differ from built-in
+; functions.
+;
+; primitives:
+; __unilambda: lambda of one argument
+; __define: define in the environment
+; __define-macro: Like __define but for macros
+; set!
+; if
+;
+; Others:
+; __uniapply: Apply function to single argument
+; __null: A value that can only be passed to __unilambda. It cannot be
+; bound to a top-level, or "set!", or added to a list.
+;
+; Macros are functions that return an sexpr.
+
+(__define-macro let
+ (__unilambda l ; car = args, cdr = body
+ (if (null? (car args))
+ (cdr args)
+ ; (car args) = ((A a) (B b) ...)
+ ; (car (car args)) = (A a)
+ ; (cdr (car (car args))) = (a)
+ `((__unilambda ,(car (car (car args)))
+ (let ,(cdr (car args)) ,@body))
+ ,(car (cdr (car (car args))))
+ )
+ )
+ )
+)
+
+(__define-macro let* let)
+
+(__define-macro __bindlambda
+ (__unilambda l
+ (let ((larg (car l))
+ (args (car (cdr l)))
+ (body (cdr (cdr l)))
+ )
+ (if (null? args)
+ `(__unilambda larg
+ (if (not (null? larg))
+ (raise "incorrect number of arguments")
+ ,body
+ )
+ )
+ (let* ((argval (cons args))
+ (rest (cdr args))
+ (arg (cons argval))
+ (val (cons (cdr argval)))
+ )
+ `(__unilambda ,larg
+ (let ((,arg ,val)) __bindlambda ,larg ,rest ,@body)
+ )
+ )
+ )
+ )
+ )
+)
+
+(__define-macro lambda
+ (__unilambda l
+ (let ((args (car l))
+ (body (cdr l))
+ )
+ (if (symbol? args)
+ `(__unilambda ,args ,@body)
+ (let ((larg (gensym)))
+ (if (null? args)
+ `(__unilambda ,larg ,@body)
+ `(__bindlambda ,larg . body)
+ )
+ )
+ )
+ )
+ )
+)
+
+(__define-macro define-macro
+ (__unilambda l
+ (let* ((name-and-args (car l))
+ (name (car name-and-args))
+ (args (cdr name-and-args))
+ (body (cdr l))
+ (tmpname (gensym))
+ )
+ `(__define-macro ,name
+ (lambda ,args ,body)
+ )
+ )
+ )
+)
+
+(define-macro (define name . body)
+ (if (symbol? name)
+ `(__define ,name ,@body)
+ `(__define ,(car name) (lambda ,(cdr name) ,@body))
+ )
+)
+
+(define-macro (begin . body)
+ `((lambda () ,@body))
+)
+
+(define-macro (and . body)
+ (if (null? body)
+ 1
+ (let ((first (car body))
+ (rest (cdr body))
+ )
+ `(if ,first (and ,@rest) 0)
+ )
+ )
+)
+
+(define-macro (or . body)
+ (if (null? body)
+ 0
+ (let ((first (car body))
+ (rest (cdr body))
+ )
+ `(if ,first 1 (or ,@rest))
+ )
+ )
+)
+
+(define-macro (letrec args . body)
+ (if (null? args)
+ body
+ (let* ((argval (car args))
+ (arg (car argval))
+ (val (car (cdr argval)))
+ (rest (cdr args))
+ )
+ `(let ((,arg __null))
+ (letrec ,rest (set! ,arg ,val) ,@body)
+ )
+ )
+ )
+)
+
+;; for a list of (v1 v2 ... vn)
+;; runs (f vn (... (f v2 (f v1 start))))
+(define (foldl f start l)
+ (letrec
+ ((loop
+ (lambda (ret-list cur-list)
+ (if (eqv? value '())
+ value
+ (loop (f (car cur-list) value)
+ (cdr cur-list))
+ )
+ )
+ ))
+ (loop start l)
+ )
+)
+
+(define (reverse l) (foldl cons '() l))
+
+;; for a list of (v1 v2 ... vn)
+;; runs (f v1 (f v2 (... (f vn start))))
+(define (foldr f start l) (foldl f start (reverse l)))
+
+(define (append . l)
+ (foldr (lambda (to-prepend collected)
+ (foldr cons collected to-prepend)
+ )
+ '()
+ l
+ )
+)
+
+(define (apply f . l) (__uniapply f (__uniapply append l)))
+(define (list . l) l)
+
+;; (define (+ . l) (foldl __bin+ 0 l))
+;; (define (* . l) (foldl __bin* 1 l))