readtable: simplify

After taking a look at the R7RS syntax and how Chicken parses
directives, I realized that it's easier to have "#!", "#\\", etc.
parse identifiers instead of baking in trie actions. This is slightly
slower but completely removes the trie concept from the readtable,
which simplifies the implementation and removes many corner cases
involving combining readtables with different action types.
This commit is contained in:
Peter McGoron 2024-09-08 08:22:39 -04:00
parent 6945841d1c
commit 8bee2d39a4
2 changed files with 130 additions and 238 deletions

353
read.scm
View File

@ -13,12 +13,6 @@
;;; R7RS reader. This is the lexer-parser end, so it returns tokens and ;;; R7RS reader. This is the lexer-parser end, so it returns tokens and
;;; not concrete objects. ;;; not concrete objects.
;;;
;;; The reader is based on a readtable that acts (in some instances) like
;;; a trie. The reader reads a character and looks it up in the readtable.
;;; The actions stored in the readtable are either opaque execution actions
;;; or transparent "pass" actions that jump to a new readtable to read more
;;; characters.
(load "chez-compat.scm") (load "chez-compat.scm")
(load "util.scm") (load "util.scm")
@ -26,7 +20,6 @@
;;; ;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;;;;;;;;;;;;;;;;;;;;;;;
;;; Port reader wrapper ;;; Port reader wrapper
;;; ;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;;;;;;;;;;;;;;;;;;;;;;;
(define port->read-function (define port->read-function
@ -96,251 +89,134 @@
((= x y) '=) ((= x y) '=)
(else '>)))) (else '>))))
(define char<=> ;;; Comparison on characters extended to #F, which is less than all
;;; characters.
(define char*<=>
(lambda (x y) (lambda (x y)
(integer<=> (char->integer x) (cond
(char->integer y)))) ((and (not x) y) '<)
((and x (not y)) '>)
((and (not x) (not y) '=))
(else (integer<=> (char->integer x)
(char->integer y))))))
(define %charmap:<=> (set:<=>-to-map char<=>)) (define %charmap:<=> (set:<=>-to-map char*<=>))
(define %charmap:update (set:update %charmap:<=>)) (define %charmap:update (set:update %charmap:<=>))
(define charmap:update (map:update %charmap:update)) (define charmap:update (map:update %charmap:update))
(define charmap:insert (map:insert %charmap:update)) (define charmap:insert (map:insert %charmap:update))
(define charmap:search (map:search %charmap:<=>)) (define charmap:search (map:search %charmap:<=>))
(define charmap:insert-many ;;; ;;;;;;;;;;;;;;;;;;;;;;
(lambda (cmap . pairs)
(fold (lambda (pair cmap)
(car (charmap:insert cmap (car pair) (cdr pair))))
cmap
pairs)))
;;; ;;;;;;;;;;
;;; Readtable constructors ;;; Readtable constructors
;;; ;;; ;;;;;;;;;;;;;;;;;;;;;;
;;; Readtable actions are objects with the following messages:
;;;
;;; (STEP TABLE CHAR ACC PORT): Act on CHAR from TABLE with accumulated
;;; value ACC and PORT.
;;; (UPDATE REST ACTION): Return a new readtable action which is the
;;; action of reading REST and executing ACTION after that. This should
;;; preserve the previous action as much as possible.
;;; ;;;;;;;;;;
;;; Pass to new readtable. ;;; (READTABLE:NEW DEFAULT-ACTION CHARMAP)
;;; (define readtable:new cons)
;;; STEP reads the next character in the sequence and jumps to TABLE
;;; modifying ACC.
;;;
;;; On UPDATE:
;;; If (NULL? REST), return PASS with a TABLE updated with its default
;;; action to be ACTION.
;;; If (PAIR? REST), return PASS with (TABLE UPDATE REST ACTION).
(define readtable:pass:new
(lambda (table)
(let ((step
(lambda (oldtable char acc port)
(table (port 'read) acc port)))
(update
(lambda (rest action)
(readtable:pass:new
(if (null? rest)
(table 'with-default-action action)
(table 'update rest action))))))
(lambda (op . args)
(cond
((eq? op 'update) (apply update args))
((eq? op 'step) (apply step args))
(else (error "readtable:pass: invalid" args)))))))
;;; Create a new PASS with a blank TABLE with DEFAULT-ACTION as the (define %readtable:default-action car)
;;; default action. (define %readtable:charmap cdr)
(define readtable:pass/blank
;;; Run the action in TABLE assigned to CHAR, or the default action of
;;; TABLE if there is no entry for CHAR.
(define readtable:act
(lambda (table char acc port)
(let ((node (charmap:search (%readtable:charmap table)
char)))
(let ((action (if (null? node)
(%readtable:default-action table)
(map:val node))))
(action table char acc port)))))
;;; Return a new readtable where CHAR is bound to ACTION.
(define readtable:update
(lambda (table char action)
(readtable:new (%readtable:default-action table)
(car (charmap:insert
(%readtable:charmap table) char action)))))
;;; Construct new readtable with no characters in its map and
;;; DEFAULT-ACTION as the default action.
(define readtable:empty/default
(lambda (default-action) (lambda (default-action)
(readtable:pass:new (readtable:new default-action '())))) (readtable:new default-action '())))
;;; Execute an action. ;;; Each value in FUNCTIONS is a list (PROCEDURE ARGS...) which is called
;;; ;;; like (PROCEDURE TABLE ARGS...) and returns a table.
;;; STEP calls PROC with the same arguments. (define readtable:process
;;; (lambda (table . functions)
;;; On UPDATE: (fold (lambda (function table)
;;; If (NULL? REST), then replace this action with ACTION. (apply (car function) table (cdr function)))
;;; If (PAIR? REST), return PASS with an empty table with this object
;;; as the default action, and run (UPDATE REST ACTION) on it.
(define readtable:exec:new
(lambda (proc)
(letrec
((update
(lambda (rest action)
(if (null? rest)
action
((readtable:pass:new (readtable:new exec '()))
'update rest action))))
(exec
(lambda (op . args)
(cond
((eq? op 'update) (apply update args))
((eq? op 'step) (apply proc args))
(else (error "readtable:exec: invalid" args))))))
exec)))
;;; ;;;;;;;;;;;;;;;
;;; Default actions
;;; ;;;;;;;;;;;;;;;
;;; Add multiple literal sequences to the readtable.
(define readtable:update-many-sequence
(lambda (table . pairs)
(fold (lambda (pair table)
(readtable:add-sequence table (car pair)
(cdr pair)))
table table
pairs))) functions)))
;;; Signal error on this action. ;;; ;;;;;;;;;;;;;;;;;;
(define readtable:error-action
(lambda (emsg)
(readtable:exec:new
(lambda (table char acc port)
(error emsg (list table char acc port))))))
;;; Create an EXEC action that discards the current character.
;;; This should not be a PASS action, because PASS actions emulate
;;; a trie, which is not cyclic.
(define readtable:skip
(readtable:exec:new
(lambda (table char acc port)
(table 'run (port 'read) acc port))))
;;; Ignore everything and return a constant.
(define readtable:return-value
(lambda (value)
(readtable:exec:new
(lambda (table char acc port)
value))))
;;; Define a new readtable.
;;;
;;; (X) where (CHAR? X): Execute the action associated with X, or the
;;; default action.
;;;
;;; (WITH-DEFAULT-ACTION NEW-ACTION): Return a readtable with the same
;;; backing table but with NEW-ACTION as the default action.
;;;
;;; (UPDATE REST ACTION): Update the action taken by the total application
;;; of REST to be ACTION. REST must be a pair.
(define readtable:new
(lambda (default-action charmap)
(letrec
((lookup?
(lambda (char)
(let ((node (charmap:search charmap char)))
(if (null? node)
#f
(map:val node)))))
(lookup
(lambda (char)
(or (lookup? char) default-action)))
(run*
(lambda (handler char acc port)
(handler 'step table char acc port)))
(run
(lambda (char acc port)
(run* (lookup char) char acc port)))
(with-default-action
(lambda (new-default-action)
(readtable:new new-default-action charmap)))
(empty-pass/error
(lambda ()
(readtable:pass/blank (readtable:error-action
"reading long name"))))
(update-oldnode
(lambda (rest action)
(lambda (_ oldnode)
(if (null? rest)
action
(let ((replaced (if (null? oldnode)
(empty-pass/error)
(map:val oldnode))))
(replaced 'update rest action))))))
(update
(lambda (rest action)
(if (null? rest)
(error "readtable update: invalid" (list rest action))
(readtable:new default-action
(charmap:update
charmap
(car rest)
(update-oldnode (cdr rest) action))))))
(table
(lambda (op . args)
(cond
((not op)
(apply run* default-action op args))
((char? op) (apply run op args))
((eq? op 'with-default-action) (apply with-default-action
args))
((eq? op 'update) (apply update args))
(else (error "readtable: invalid" (cons op args)))))))
table)))
;;; Wrap ACTION in a readtable, and push bach the currently read character
;;; before calling ACTION.
;;;
;;; This is designed for readtable actions that already know what they
;;; are matching against.
(define readtable:sequence-wrapper
(lambda (action)
(readtable:pass/blank
(readtable:exec:new
(lambda (table char acc port)
(port 'push char)
(action 'step table #f #f port))))))
;;; Add ACTION as the action to be taken in TABLE following SEQ.
;;; The action is wrapped in an empty PASS table with ACTION as the default
;;; action. This normalizes the character ACTION is passed.
(define readtable:add-sequence
(lambda (table seq action)
(let ((seq (cond
((char? seq) (list seq))
((string? seq) (string->list seq))
(else seq))))
(table 'update seq (readtable:sequence-wrapper action)))))
;;; ;;;;;;;;;;;;;;;;;;;
;;; Default readtables ;;; Default readtables
;;; ;;;;;;;;;;;;;;;;;;; ;;; ;;;;;;;;;;;;;;;;;;
(define readtable:empty ;;; Discard the current character and continue reading the readtable.
(readtable:new (readtable:error-action "no more actions") (define readtable:skip
'())) (lambda (table char acc port)
(readtable:act table (port 'read) acc port)))
;;; Ignore all characters until newline. ;;; Push back CHAR and return ACC.
(define readtable:read-line-comment (define readtable:return-acc-keep-char
(readtable:new (lambda (table char acc port)
readtable:skip (port 'push char)
(charmap:insert-many '() acc))
(list
(cons #\newline (readtable:return-value #f))))))
;;; Push CHAR to ACC and continue reading from TABLE.
(define readtable:push-acc
(lambda (table char acc port)
(readtable:act table (port 'read) (cons char acc) port)))
;;; Define a readtable that constructs an identifier by accepting all
;;; characters that are not listed.
(define readtable:exclude-from-identifiers
(lambda (table excluded)
(fold (lambda (char table)
(readtable:update table char readtable:return-acc-keep-char))
table
excluded)))
;;; ASCII whitespace.
(define readtable:ASCII-whitespace
(list #\newline
#\space
(integer->char #x09)
(integer->char #x0B)
(integer->char #x0C)
(integer->char #x0D)))
;;; Readtable for identifiers.
(define readtable:identifier
(readtable:process
(readtable:empty/default readtable:push-acc)
(list readtable:exclude-from-identifiers
readtable:ASCII-whitespace)
(list readtable:exclude-from-identifiers
(list #\| #\( #\) #\' #\; #f))))
;;; Read an identifier starting with CHAR.
(define readtable:read-ident
(lambda (table char acc port)
(reverse (readtable:act readtable:identifier
(port 'read) (list char) port))))
;;; Add all chars in TO-SKIP to TABLE as skipped characters. Used for
;;; whitespace.
(define readtable:add-all-as-skip
(lambda (table to-skip)
(fold (lambda (char table)
(readtable:update table char readtable:skip))
table
to-skip)))
;;; Toplevel reader.
(define readtable:top (define readtable:top
(readtable:update-many-sequence (readtable:process
readtable:empty (readtable:empty/default readtable:read-ident)
(cons "#!fold-case" (list readtable:add-all-as-skip readtable:ASCII-whitespace)))
(readtable:exec:new
(lambda (table char acc port)
(port 'fold-case! #t)
'())))
(cons "#!no-fold-case"
(readtable:exec:new
(lambda (table char acc port)
(port 'fold-case! #f)
'())))
(cons "#t" (readtable:return-value #t))
(cons "#f" (readtable:return-value #f))
(cons "#true" (readtable:return-value #t))))
;;; ;;;;;;;;;;; ;;; ;;;;;;;;;;;
;;; Test reader ;;; Test reader
@ -356,12 +232,13 @@
ch))) ch)))
"test"))) "test")))
(let ((true-reader (%list->read (string->list "#!fold-case#TRUE#!no-fold-case#TRUE")))) (let ((true-reader (%list->read (string->list "xyzw abcd x!"))))
(display (list "first:" (readtable:top (true-reader 'read) #f true-reader))) (display (list "first:" (readtable:act
readtable:top (true-reader 'read) #f true-reader)))
(newline) (newline)
(display (list "second: " (readtable:top (true-reader 'read) #f true-reader))) (display (list "second: " (readtable:act
readtable:top (true-reader 'read) #f true-reader)))
(newline) (newline)
(display (list "third: " (readtable:top (true-reader 'read) #f true-reader))) (display (list "third: " (readtable:act
(newline) readtable:top (true-reader 'read) #f true-reader)))
(display (list "fourth: " (readtable:top (true-reader 'read) #f true-reader)))
(newline)) (newline))

View File

@ -33,3 +33,18 @@
((not (f (car lst))) #f) ((not (f (car lst))) #f)
(else (all f (cdr lst)))))) (else (all f (cdr lst))))))
;;; (REVAPPEND L1 ... LN) returns L{N-1}, L{N-2}, ... reversed and
;;; appended to LN, in that order.
(define revappend
(letrec ((loop
(lambda (lst1 lst2)
(if (null? lst1)
lst2
(loop (cdr lst1) (cons (car lst1) lst2))))))
(lambda lists
(cond
((null? lists) '())
((null? (cdr lists)) (car lists))
(else
(apply revappend (loop (car lists) (cadr lists))
(cddr lists)))))))