From 53a174f8e2621f2ac8d452742fbc6d458983ccc1 Mon Sep 17 00:00:00 2001 From: Peter McGoron Date: Sat, 7 Sep 2024 17:47:10 -0400 Subject: [PATCH] readtables, first pass --- README.rst | 20 +++- chez-compat.scm | 5 + read.scm | 300 ++++++++++++++++++++++++++++++++++++++++++++++++ tests.scm | 5 - trie.scm | 138 ---------------------- 5 files changed, 321 insertions(+), 147 deletions(-) create mode 100644 read.scm delete mode 100644 trie.scm diff --git a/README.rst b/README.rst index 52cd843..ebc68a0 100644 --- a/README.rst +++ b/README.rst @@ -16,11 +16,23 @@ designed to be used by a severely limited Scheme interpreter, which * lacks ``call/cc``, ``call-with-values``, etc * lacks user definable macros -* only uses required features from R3RS - (except ``open-input-port``, ``close-input-port``, ``read-char``, - ``open-output-port``, ``close-output-port``, ``write-char``) * has fixnums only * only uses immutable strings * does not use "load" recursively +* uses R3RS essental procedures only (with some exceptions) + +The goal is to have the compiler run under the MiniScheme in ``miniscm`` +in DOS, and then run in GLLV to compile itself. + +A proper implementation must have + +* ``open-input-port``, ``read-char`` +* ``open-output-port``, ``write-char`` +* ``cond-expand`` +* ``and``, ``or`` +* ``error`` + +If your system supports any macro system at all, ``cond-expand``, +``and``, and ``or`` can be implemented. ``error`` can be implemented +as in SRFI-23 : https://srfi.schemers.org/srfi-23/srfi-23.html . -The goal is to have the compiler run under MiniScheme in DOS. diff --git a/chez-compat.scm b/chez-compat.scm index 39cb31a..f400a3e 100644 --- a/chez-compat.scm +++ b/chez-compat.scm @@ -47,3 +47,8 @@ (begin evaluated ...) (cond-expand rest ...))))) +(define %r6rs-error error) + +(define (error . rest) + (apply %r6rs-error (cons "UNSLISP" rest))) + diff --git a/read.scm b/read.scm new file mode 100644 index 0000000..0089232 --- /dev/null +++ b/read.scm @@ -0,0 +1,300 @@ +;;; Copyright (C) Peter McGoron 2024 +;;; This program is free software: you can redistribute it and/or modify +;;; it under the terms of the GNU General Public License as published by +;;; the Free Software Foundation, version 3 of the License. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program. If not, see . + +;;; R7RS reader. This is the lexer-parser end, so it returns tokens and +;;; not concrete objects. +;;; +;;; The reader is based on a readtable that acts (in some instances) like +;;; a trie. The reader reads a character and looks it up in the readtable. +;;; The actions stored in the readtable are either opaque execution actions +;;; or transparent "pass" actions that jump to a new readtable to read more +;;; characters. + +(load "chez-compat.scm") +(load "util.scm") +(load "set.scm") + +;;; ;;;;;;;;;;;;;;;;;;;;;;;; +;;; Port reader wrapper +;;; ;;;;;;;;;;;;;;;;;;;;;;;; + +(define port->read-function + (lambda (port) + (lambda () + (read-char port)))) + +(define port->read + (lambda (read-function filename) + (let ((line-number 1) + (offset 0) + (pushback-buffer '())) + (let ((update-position + (lambda (ch) + (cond + ((or (eof-object? ch) (not ch)) (set! ch #f)) + ((eqv? ch #\newline) + (set! line-number (+ 1 line-number)) (set! offset 0)) + (offset (set! offset (+ 1 offset)))) + ch))) + (lambda (op . args) + (cond + ((eq? op 'pos) (list filename line-number offset)) + ((eq? op 'read) + (update-position + (if (null? pushback-buffer) + (read-function) + (let ((ch (car pushback-buffer))) + (set! pushback-buffer (cdr pushback-buffer)) + ch)))) + ((eq? op 'push) + (let ((ch (car args))) + (if (eqv? ch #\newline) + (begin + (set! line-number (- line-number 1)) + (set! offset #f)) + (set! offset (- offset 1))) + (set! pushback-buffer (cons ch pushback-buffer)))) + (else (error "read->port: invalid" args)))))))) + +;;; ;;;;;;;;;;;;;; +;;; Character maps +;;; ;;;;;;;;;;;;;; + +(define integer<=> + (lambda (x y) + (cond + ((< x y) '<) + ((= x y) '=) + (else '>)))) + +(define char<=> + (lambda (x y) + (integer<=> (char->integer x) + (char->integer y)))) + +(define %charmap:<=> (set:<=>-to-map char<=>)) +(define %charmap:update (set:update %charmap:<=>)) + +(define charmap:update (map:update %charmap:update)) +(define charmap:insert (map:insert %charmap:update)) +(define charmap:search (map:search %charmap:<=>)) + +(define charmap:insert-many + (lambda (cmap . pairs) + (fold (lambda (pair cmap) + (car (charmap:insert cmap (car pair) (cdr pair)))) + cmap + pairs))) + +;;; ;;;;;;;;;; +;;; Readtable constructors +;;; +;;; Readtable actions are objects with the following messages: +;;; +;;; (STEP TABLE CHAR ACC PORT): Act on CHAR from TABLE with accumulated +;;; value ACC and PORT. +;;; (UPDATE REST ACTION): Return a new readtable action which is the +;;; action of reading REST and executing ACTION after that. This should +;;; preserve the previous action as much as possible. +;;; ;;;;;;;;;; + +;;; Pass to new readtable. +;;; +;;; STEP reads the next character in the sequence and jumps to TABLE +;;; modifying ACC. +;;; +;;; On UPDATE: +;;; If (NULL? REST), return PASS with a TABLE updated with its default +;;; action to be ACTION. +;;; If (PAIR? REST), return PASS with (TABLE UPDATE REST ACTION). +(define readtable:pass:new + (lambda (table) + (let ((step + (lambda (oldtable char acc port) + (table (port 'read) acc port))) + (update + (lambda (rest action) + (readtable:pass:new + (if (null? rest) + (table 'with-default-action action) + (table 'update rest action)))))) + (lambda (op . args) + (cond + ((eq? op 'update) (apply update args)) + ((eq? op 'step) (apply step args)) + (else (error "readtable:pass: invalid" args))))))) + +;;; Execute an action. +;;; +;;; STEP calls PROC with the same arguments. +;;; +;;; On UPDATE: +;;; If (NULL? REST), then replace this action with ACTION. +;;; If (PAIR? REST), return PASS with an empty table with this object +;;; as the default action, and run (UPDATE REST ACTION) on it. +(define readtable:exec:new + (lambda (proc) + (let ((update + (lambda (rest action) + (if (null? rest) + action + ((readtable:pass:new readtable:empty) + 'update rest action))))) + (lambda (op . args) + (cond + ((eq? op 'update) (apply update args)) + ((eq? op 'step) (apply proc args)) + (else (error "readtable:exec: invalid" args))))))) + +;;; Define a new readtable. +;;; +;;; (X) where (CHAR? X): Execute the action associated with X, or the +;;; default action. +;;; +;;; (WITH-DEFAULT-ACTION NEW-ACTION): Return a readtable with the same +;;; backing table but with NEW-ACTION as the default action. +;;; +;;; (UPDATE REST ACTION): Update the action taken by the total application +;;; of REST to be ACTION. REST must be a pair. +(define readtable:new + (lambda (default-action charmap) + (letrec + ((lookup? + (lambda (char) + (let ((node (charmap:search charmap char))) + (if (null? node) + #f + (map:val node))))) + (lookup + (lambda (char) + (or (lookup? char) default-action))) + (run* + (lambda (handler char acc port) + (handler 'step table char acc port))) + (run + (lambda (char acc port) + (run* (lookup char) char acc port))) + (with-default-action + (lambda (new-default-action) + (readtable:new new-default-action charmap))) + (update + (lambda (rest action) + (if (null? rest) + (error "readtable update: invalid" (list rest action)) + (readtable:new default-action + (charmap:update + charmap + (car rest) + (lambda (_ oldnode) + (if (null? oldnode) + (action 'update (cdr rest) action) + ((map:val oldnode) + 'update (cdr rest) action)))))))) + (table + (lambda (op . args) + (cond + ((not op) + (display (list op args)) + (apply run* default-action op args)) + ((char? op) (apply run op args)) + ((eq? op 'with-default-action) (apply with-default-action + args)) + ((eq? op 'update) (apply update args)) + (else (error "readtable: invalid" args)))))) + table))) + +;;; Add ACTION as the action to be taken in TABLE following SEQ. +(define readtable:add-sequence + (lambda (table seq action) + (let ((seq (cond + ((char? seq) (list seq)) + ((string? seq) (string->list seq)) + (else seq)))) + (table 'update seq action)))) + +;;; ;;;;;;;;;;;;;;; +;;; Default actions +;;; ;;;;;;;;;;;;;;; + +(define readtable:update-many + (lambda (table . pairs) + (fold (lambda (pair table) + (readtable:add-sequence table (car pair) + (cdr pair))) + table + pairs))) + +;;; Signal error on this action. +(define readtable:error-action + (lambda (emsg) + (readtable:exec:new + (lambda (table char acc port) + (error emsg (list table char acc port)))))) + +;;; Create an EXEC action that discards the current character. +;;; This should not be a PASS action, because PASS actions emulate +;;; a trie, which is not cyclic. +(define readtable:skip + (readtable:exec:new + (lambda (table char acc port) + (table 'run (port 'read) acc port)))) + +;;; Ignore everything and return a constant. +(define readtable:return-value + (lambda (value) + (readtable:exec:new + (lambda (table char acc port) + value)))) + +;;; ;;;;;;;;;;;;;;;;;;; +;;; Default readtables +;;; ;;;;;;;;;;;;;;;;;;; + +(define readtable:empty + (readtable:new (readtable:error-action "no more actions") + '())) + +;;; Ignore all characters until newline. +(define readtable:read-line-comment + (readtable:new + readtable:skip + (charmap:insert-many '() + (list + (cons #\newline (readtable:return-value #f)))))) + +(define readtable:top + (readtable:update-many + readtable:empty + (cons "#t" (readtable:return-value #t)) + (cons "#f" (readtable:return-value #f)) + (cons "#true" (readtable:return-value #t)))) + +;;; ;;;;;;;;;;; +;;; Test reader +;;; ;;;;;;;;;;; + +(define %list->read + (lambda (seq) + (port->read + (lambda () + (if (null? seq) + #f + (let ((ch (car seq))) + (set! seq (cdr seq)) + ch))) + "test"))) + +(let ((true-reader (%list->read (list #\# #\t)))) + (readtable:top (true-reader 'read) #f true-reader)) + + diff --git a/tests.scm b/tests.scm index ae93237..a3a6a6e 100644 --- a/tests.scm +++ b/tests.scm @@ -69,8 +69,3 @@ (display "running string BST-AVL tests") (newline) (report-tests %set:tests) - -(load "trie.scm") -(display "char trie") -(newline) -(report-tests %trie:tests) diff --git a/trie.scm b/trie.scm deleted file mode 100644 index c384e12..0000000 --- a/trie.scm +++ /dev/null @@ -1,138 +0,0 @@ -;;; Copyright (C) Peter McGoron 2024 -;;; This program is free software: you can redistribute it and/or modify -;;; it under the terms of the GNU General Public License as published by -;;; the Free Software Foundation, version 3 of the License. -;;; -;;; This program is distributed in the hope that it will be useful, -;;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;;; GNU General Public License for more details. -;;; -;;; You should have received a copy of the GNU General Public License -;;; along with this program. If not, see . - -;;; ;;;;;;;;;;;;;; -;;; Character maps -;;; ;;;;;;;;;;;;;; - -(define char<=> - (lambda (x y) - (let ((x (char->integer x)) - (y (char->integer y))) - (cond - ((< x y) '<) - ((= x y) '=) - (else '>))))) - - -(define %charmap:<=> (set:<=>-to-map char<=>)) -(define %charmap:update (set:update %charmap:<=>)) - -(define charmap:update (map:update %charmap:update)) -(define charmap:insert (map:insert %charmap:update)) -(define charmap:search (map:search %charmap:<=>)) - -;;; ;;;;;;;;;; -;;; Char tries -;;; -;;; Tries are pairs, CAR = backing map from chars to trie nodes, and -;;; CDR = function tied to this part of the trie. -;;; ;;;;;;;;;; - -;;; (TRIE:NEW TABLE FUNCTION) -(define trie:new cons) -(define trie:empty (trie:new '() #f)) - -;;; Get the function inside of trie NODE. -(define trie:function - (lambda (node) - (if (null? node) - #f - (cdr node)))) - -;;; Get the backing set inside of trie NODE. -(define trie:backing - (lambda (node) - (if (null? node) - '() - (car node)))) - -;;; Insert STRING-AS-LIST into NODE with value FUNCTION. -(define trie:insert - (lambda (node string-as-list function) - (if (null? string-as-list) - (trie:new (trie:backing node) function) - (let ((ch (car string-as-list)) - (string-as-list (cdr string-as-list))) - (let ((newtree (charmap:update (trie:backing node) ch - (lambda (node oldnode) - (if oldnode - (map:node-new-val - oldnode - ch - (trie:insert (map:val oldnode) - string-as-list - function)) - (map:empty-node ch - (trie:insert - trie:empty - string-as-list - function))))))) - (trie:new newtree (trie:function node))))))) - -(define trie:insert-many - (lambda (node lst) - (fold (lambda (pair node) - (let ((key (car pair))) - (let ((key (cond - ((list? key) key) - ((string? key) (string->list key)) - ((char? key) (list key))))) - (trie:insert node key (cdr pair))))) - node lst))) - -;;; Search for CH in NODE. -(define trie:search-single - (lambda (ch node) - (if (null? node) - '() - (let ((node (charmap:search (trie:backing node) ch))) - (if (null? node) - '() - (map:val node)))))) - -(define trie:search - (lambda (node str) - (fold trie:search-single node (string->list str)))) - -;;; ;;;;; -;;; Tests -;;; ;;;;; - -(define %trie:tests - (list - (cons "insert with prefixes" - (lambda () - (let ((trie - (trie:insert-many trie:empty - (list - (cons (string #\a #\b #\c #\d) 10) - (cons (string #\a #\b #\c) 20) - (cons (string #\a #\b #\c #\d #\e) 30) - (cons (string #\b) 40) - (cons (string #\b #\e #\g) 50)))) - (t '())) - (set! t (trie:search trie (string #\a #\b #\c))) - (if (not (= (trie:function t) 20)) - "abc not found" - (let ((t (trie:search-single #\d t))) - (if (not (= (trie:function t) 10)) - "abcd not found" - (let ((t (trie:search-single #\e t))) - (if (not (= (trie:function t) 30)) - "abcde not found" - (let ((t (trie:search-single #\b trie))) - (if (not (= (trie:function t) 40)) - "b not found" - (let ((t (trie:search t (string #\e #\g)))) - (= (trie:function t) 50))))))))))))))