readtables, first pass

This commit is contained in:
Peter McGoron 2024-09-07 17:47:10 -04:00
parent f0cad72190
commit 53a174f8e2
5 changed files with 321 additions and 147 deletions

View File

@ -16,11 +16,23 @@ designed to be used by a severely limited Scheme interpreter, which
* lacks ``call/cc``, ``call-with-values``, etc
* lacks user definable macros
* only uses required features from R3RS
(except ``open-input-port``, ``close-input-port``, ``read-char``,
``open-output-port``, ``close-output-port``, ``write-char``)
* has fixnums only
* only uses immutable strings
* does not use "load" recursively
* uses R3RS essental procedures only (with some exceptions)
The goal is to have the compiler run under the MiniScheme in ``miniscm``
in DOS, and then run in GLLV to compile itself.
A proper implementation must have
* ``open-input-port``, ``read-char``
* ``open-output-port``, ``write-char``
* ``cond-expand``
* ``and``, ``or``
* ``error``
If your system supports any macro system at all, ``cond-expand``,
``and``, and ``or`` can be implemented. ``error`` can be implemented
as in SRFI-23 : https://srfi.schemers.org/srfi-23/srfi-23.html .
The goal is to have the compiler run under MiniScheme in DOS.

View File

@ -47,3 +47,8 @@
(begin evaluated ...)
(cond-expand rest ...)))))
(define %r6rs-error error)
(define (error . rest)
(apply %r6rs-error (cons "UNSLISP" rest)))

300
read.scm Normal file
View File

@ -0,0 +1,300 @@
;;; Copyright (C) Peter McGoron 2024
;;; This program is free software: you can redistribute it and/or modify
;;; it under the terms of the GNU General Public License as published by
;;; the Free Software Foundation, version 3 of the License.
;;;
;;; This program is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with this program. If not, see <https://www.gnu.org/licenses/>.
;;; R7RS reader. This is the lexer-parser end, so it returns tokens and
;;; not concrete objects.
;;;
;;; The reader is based on a readtable that acts (in some instances) like
;;; a trie. The reader reads a character and looks it up in the readtable.
;;; The actions stored in the readtable are either opaque execution actions
;;; or transparent "pass" actions that jump to a new readtable to read more
;;; characters.
(load "chez-compat.scm")
(load "util.scm")
(load "set.scm")
;;; ;;;;;;;;;;;;;;;;;;;;;;;;
;;; Port reader wrapper
;;; ;;;;;;;;;;;;;;;;;;;;;;;;
(define port->read-function
(lambda (port)
(lambda ()
(read-char port))))
(define port->read
(lambda (read-function filename)
(let ((line-number 1)
(offset 0)
(pushback-buffer '()))
(let ((update-position
(lambda (ch)
(cond
((or (eof-object? ch) (not ch)) (set! ch #f))
((eqv? ch #\newline)
(set! line-number (+ 1 line-number)) (set! offset 0))
(offset (set! offset (+ 1 offset))))
ch)))
(lambda (op . args)
(cond
((eq? op 'pos) (list filename line-number offset))
((eq? op 'read)
(update-position
(if (null? pushback-buffer)
(read-function)
(let ((ch (car pushback-buffer)))
(set! pushback-buffer (cdr pushback-buffer))
ch))))
((eq? op 'push)
(let ((ch (car args)))
(if (eqv? ch #\newline)
(begin
(set! line-number (- line-number 1))
(set! offset #f))
(set! offset (- offset 1)))
(set! pushback-buffer (cons ch pushback-buffer))))
(else (error "read->port: invalid" args))))))))
;;; ;;;;;;;;;;;;;;
;;; Character maps
;;; ;;;;;;;;;;;;;;
(define integer<=>
(lambda (x y)
(cond
((< x y) '<)
((= x y) '=)
(else '>))))
(define char<=>
(lambda (x y)
(integer<=> (char->integer x)
(char->integer y))))
(define %charmap:<=> (set:<=>-to-map char<=>))
(define %charmap:update (set:update %charmap:<=>))
(define charmap:update (map:update %charmap:update))
(define charmap:insert (map:insert %charmap:update))
(define charmap:search (map:search %charmap:<=>))
(define charmap:insert-many
(lambda (cmap . pairs)
(fold (lambda (pair cmap)
(car (charmap:insert cmap (car pair) (cdr pair))))
cmap
pairs)))
;;; ;;;;;;;;;;
;;; Readtable constructors
;;;
;;; Readtable actions are objects with the following messages:
;;;
;;; (STEP TABLE CHAR ACC PORT): Act on CHAR from TABLE with accumulated
;;; value ACC and PORT.
;;; (UPDATE REST ACTION): Return a new readtable action which is the
;;; action of reading REST and executing ACTION after that. This should
;;; preserve the previous action as much as possible.
;;; ;;;;;;;;;;
;;; Pass to new readtable.
;;;
;;; STEP reads the next character in the sequence and jumps to TABLE
;;; modifying ACC.
;;;
;;; On UPDATE:
;;; If (NULL? REST), return PASS with a TABLE updated with its default
;;; action to be ACTION.
;;; If (PAIR? REST), return PASS with (TABLE UPDATE REST ACTION).
(define readtable:pass:new
(lambda (table)
(let ((step
(lambda (oldtable char acc port)
(table (port 'read) acc port)))
(update
(lambda (rest action)
(readtable:pass:new
(if (null? rest)
(table 'with-default-action action)
(table 'update rest action))))))
(lambda (op . args)
(cond
((eq? op 'update) (apply update args))
((eq? op 'step) (apply step args))
(else (error "readtable:pass: invalid" args)))))))
;;; Execute an action.
;;;
;;; STEP calls PROC with the same arguments.
;;;
;;; On UPDATE:
;;; If (NULL? REST), then replace this action with ACTION.
;;; If (PAIR? REST), return PASS with an empty table with this object
;;; as the default action, and run (UPDATE REST ACTION) on it.
(define readtable:exec:new
(lambda (proc)
(let ((update
(lambda (rest action)
(if (null? rest)
action
((readtable:pass:new readtable:empty)
'update rest action)))))
(lambda (op . args)
(cond
((eq? op 'update) (apply update args))
((eq? op 'step) (apply proc args))
(else (error "readtable:exec: invalid" args)))))))
;;; Define a new readtable.
;;;
;;; (X) where (CHAR? X): Execute the action associated with X, or the
;;; default action.
;;;
;;; (WITH-DEFAULT-ACTION NEW-ACTION): Return a readtable with the same
;;; backing table but with NEW-ACTION as the default action.
;;;
;;; (UPDATE REST ACTION): Update the action taken by the total application
;;; of REST to be ACTION. REST must be a pair.
(define readtable:new
(lambda (default-action charmap)
(letrec
((lookup?
(lambda (char)
(let ((node (charmap:search charmap char)))
(if (null? node)
#f
(map:val node)))))
(lookup
(lambda (char)
(or (lookup? char) default-action)))
(run*
(lambda (handler char acc port)
(handler 'step table char acc port)))
(run
(lambda (char acc port)
(run* (lookup char) char acc port)))
(with-default-action
(lambda (new-default-action)
(readtable:new new-default-action charmap)))
(update
(lambda (rest action)
(if (null? rest)
(error "readtable update: invalid" (list rest action))
(readtable:new default-action
(charmap:update
charmap
(car rest)
(lambda (_ oldnode)
(if (null? oldnode)
(action 'update (cdr rest) action)
((map:val oldnode)
'update (cdr rest) action))))))))
(table
(lambda (op . args)
(cond
((not op)
(display (list op args))
(apply run* default-action op args))
((char? op) (apply run op args))
((eq? op 'with-default-action) (apply with-default-action
args))
((eq? op 'update) (apply update args))
(else (error "readtable: invalid" args))))))
table)))
;;; Add ACTION as the action to be taken in TABLE following SEQ.
(define readtable:add-sequence
(lambda (table seq action)
(let ((seq (cond
((char? seq) (list seq))
((string? seq) (string->list seq))
(else seq))))
(table 'update seq action))))
;;; ;;;;;;;;;;;;;;;
;;; Default actions
;;; ;;;;;;;;;;;;;;;
(define readtable:update-many
(lambda (table . pairs)
(fold (lambda (pair table)
(readtable:add-sequence table (car pair)
(cdr pair)))
table
pairs)))
;;; Signal error on this action.
(define readtable:error-action
(lambda (emsg)
(readtable:exec:new
(lambda (table char acc port)
(error emsg (list table char acc port))))))
;;; Create an EXEC action that discards the current character.
;;; This should not be a PASS action, because PASS actions emulate
;;; a trie, which is not cyclic.
(define readtable:skip
(readtable:exec:new
(lambda (table char acc port)
(table 'run (port 'read) acc port))))
;;; Ignore everything and return a constant.
(define readtable:return-value
(lambda (value)
(readtable:exec:new
(lambda (table char acc port)
value))))
;;; ;;;;;;;;;;;;;;;;;;;
;;; Default readtables
;;; ;;;;;;;;;;;;;;;;;;;
(define readtable:empty
(readtable:new (readtable:error-action "no more actions")
'()))
;;; Ignore all characters until newline.
(define readtable:read-line-comment
(readtable:new
readtable:skip
(charmap:insert-many '()
(list
(cons #\newline (readtable:return-value #f))))))
(define readtable:top
(readtable:update-many
readtable:empty
(cons "#t" (readtable:return-value #t))
(cons "#f" (readtable:return-value #f))
(cons "#true" (readtable:return-value #t))))
;;; ;;;;;;;;;;;
;;; Test reader
;;; ;;;;;;;;;;;
(define %list->read
(lambda (seq)
(port->read
(lambda ()
(if (null? seq)
#f
(let ((ch (car seq)))
(set! seq (cdr seq))
ch)))
"test")))
(let ((true-reader (%list->read (list #\# #\t))))
(readtable:top (true-reader 'read) #f true-reader))

View File

@ -69,8 +69,3 @@
(display "running string BST-AVL tests")
(newline)
(report-tests %set:tests)
(load "trie.scm")
(display "char trie")
(newline)
(report-tests %trie:tests)

138
trie.scm
View File

@ -1,138 +0,0 @@
;;; Copyright (C) Peter McGoron 2024
;;; This program is free software: you can redistribute it and/or modify
;;; it under the terms of the GNU General Public License as published by
;;; the Free Software Foundation, version 3 of the License.
;;;
;;; This program is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with this program. If not, see <https://www.gnu.org/licenses/>.
;;; ;;;;;;;;;;;;;;
;;; Character maps
;;; ;;;;;;;;;;;;;;
(define char<=>
(lambda (x y)
(let ((x (char->integer x))
(y (char->integer y)))
(cond
((< x y) '<)
((= x y) '=)
(else '>)))))
(define %charmap:<=> (set:<=>-to-map char<=>))
(define %charmap:update (set:update %charmap:<=>))
(define charmap:update (map:update %charmap:update))
(define charmap:insert (map:insert %charmap:update))
(define charmap:search (map:search %charmap:<=>))
;;; ;;;;;;;;;;
;;; Char tries
;;;
;;; Tries are pairs, CAR = backing map from chars to trie nodes, and
;;; CDR = function tied to this part of the trie.
;;; ;;;;;;;;;;
;;; (TRIE:NEW TABLE FUNCTION)
(define trie:new cons)
(define trie:empty (trie:new '() #f))
;;; Get the function inside of trie NODE.
(define trie:function
(lambda (node)
(if (null? node)
#f
(cdr node))))
;;; Get the backing set inside of trie NODE.
(define trie:backing
(lambda (node)
(if (null? node)
'()
(car node))))
;;; Insert STRING-AS-LIST into NODE with value FUNCTION.
(define trie:insert
(lambda (node string-as-list function)
(if (null? string-as-list)
(trie:new (trie:backing node) function)
(let ((ch (car string-as-list))
(string-as-list (cdr string-as-list)))
(let ((newtree (charmap:update (trie:backing node) ch
(lambda (node oldnode)
(if oldnode
(map:node-new-val
oldnode
ch
(trie:insert (map:val oldnode)
string-as-list
function))
(map:empty-node ch
(trie:insert
trie:empty
string-as-list
function)))))))
(trie:new newtree (trie:function node)))))))
(define trie:insert-many
(lambda (node lst)
(fold (lambda (pair node)
(let ((key (car pair)))
(let ((key (cond
((list? key) key)
((string? key) (string->list key))
((char? key) (list key)))))
(trie:insert node key (cdr pair)))))
node lst)))
;;; Search for CH in NODE.
(define trie:search-single
(lambda (ch node)
(if (null? node)
'()
(let ((node (charmap:search (trie:backing node) ch)))
(if (null? node)
'()
(map:val node))))))
(define trie:search
(lambda (node str)
(fold trie:search-single node (string->list str))))
;;; ;;;;;
;;; Tests
;;; ;;;;;
(define %trie:tests
(list
(cons "insert with prefixes"
(lambda ()
(let ((trie
(trie:insert-many trie:empty
(list
(cons (string #\a #\b #\c #\d) 10)
(cons (string #\a #\b #\c) 20)
(cons (string #\a #\b #\c #\d #\e) 30)
(cons (string #\b) 40)
(cons (string #\b #\e #\g) 50))))
(t '()))
(set! t (trie:search trie (string #\a #\b #\c)))
(if (not (= (trie:function t) 20))
"abc not found"
(let ((t (trie:search-single #\d t)))
(if (not (= (trie:function t) 10))
"abcd not found"
(let ((t (trie:search-single #\e t)))
(if (not (= (trie:function t) 30))
"abcde not found"
(let ((t (trie:search-single #\b trie)))
(if (not (= (trie:function t) 40))
"b not found"
(let ((t (trie:search t (string #\e #\g))))
(= (trie:function t) 50))))))))))))))