From 8bee2d39a43b589654a2067ff3385a33059fd308 Mon Sep 17 00:00:00 2001 From: Peter McGoron Date: Sun, 8 Sep 2024 08:22:39 -0400 Subject: [PATCH] readtable: simplify After taking a look at the R7RS syntax and how Chicken parses directives, I realized that it's easier to have "#!", "#\\", etc. parse identifiers instead of baking in trie actions. This is slightly slower but completely removes the trie concept from the readtable, which simplifies the implementation and removes many corner cases involving combining readtables with different action types. --- read.scm | 353 ++++++++++++++++++------------------------------------- util.scm | 15 +++ 2 files changed, 130 insertions(+), 238 deletions(-) diff --git a/read.scm b/read.scm index edea2b5..7347d2b 100644 --- a/read.scm +++ b/read.scm @@ -13,12 +13,6 @@ ;;; R7RS reader. This is the lexer-parser end, so it returns tokens and ;;; not concrete objects. -;;; -;;; The reader is based on a readtable that acts (in some instances) like -;;; a trie. The reader reads a character and looks it up in the readtable. -;;; The actions stored in the readtable are either opaque execution actions -;;; or transparent "pass" actions that jump to a new readtable to read more -;;; characters. (load "chez-compat.scm") (load "util.scm") @@ -26,7 +20,6 @@ ;;; ;;;;;;;;;;;;;;;;;;;;;;;; ;;; Port reader wrapper - ;;; ;;;;;;;;;;;;;;;;;;;;;;;; (define port->read-function @@ -96,251 +89,134 @@ ((= x y) '=) (else '>)))) -(define char<=> +;;; Comparison on characters extended to #F, which is less than all +;;; characters. +(define char*<=> (lambda (x y) - (integer<=> (char->integer x) - (char->integer y)))) + (cond + ((and (not x) y) '<) + ((and x (not y)) '>) + ((and (not x) (not y) '=)) + (else (integer<=> (char->integer x) + (char->integer y)))))) -(define %charmap:<=> (set:<=>-to-map char<=>)) +(define %charmap:<=> (set:<=>-to-map char*<=>)) (define %charmap:update (set:update %charmap:<=>)) (define charmap:update (map:update %charmap:update)) (define charmap:insert (map:insert %charmap:update)) (define charmap:search (map:search %charmap:<=>)) -(define charmap:insert-many - (lambda (cmap . pairs) - (fold (lambda (pair cmap) - (car (charmap:insert cmap (car pair) (cdr pair)))) - cmap - pairs))) - -;;; ;;;;;;;;;; +;;; ;;;;;;;;;;;;;;;;;;;;;; ;;; Readtable constructors -;;; -;;; Readtable actions are objects with the following messages: -;;; -;;; (STEP TABLE CHAR ACC PORT): Act on CHAR from TABLE with accumulated -;;; value ACC and PORT. -;;; (UPDATE REST ACTION): Return a new readtable action which is the -;;; action of reading REST and executing ACTION after that. This should -;;; preserve the previous action as much as possible. -;;; ;;;;;;;;;; +;;; ;;;;;;;;;;;;;;;;;;;;;; -;;; Pass to new readtable. -;;; -;;; STEP reads the next character in the sequence and jumps to TABLE -;;; modifying ACC. -;;; -;;; On UPDATE: -;;; If (NULL? REST), return PASS with a TABLE updated with its default -;;; action to be ACTION. -;;; If (PAIR? REST), return PASS with (TABLE UPDATE REST ACTION). -(define readtable:pass:new - (lambda (table) - (let ((step - (lambda (oldtable char acc port) - (table (port 'read) acc port))) - (update - (lambda (rest action) - (readtable:pass:new - (if (null? rest) - (table 'with-default-action action) - (table 'update rest action)))))) - (lambda (op . args) - (cond - ((eq? op 'update) (apply update args)) - ((eq? op 'step) (apply step args)) - (else (error "readtable:pass: invalid" args))))))) +;;; (READTABLE:NEW DEFAULT-ACTION CHARMAP) +(define readtable:new cons) -;;; Create a new PASS with a blank TABLE with DEFAULT-ACTION as the -;;; default action. -(define readtable:pass/blank +(define %readtable:default-action car) +(define %readtable:charmap cdr) + +;;; Run the action in TABLE assigned to CHAR, or the default action of +;;; TABLE if there is no entry for CHAR. +(define readtable:act + (lambda (table char acc port) + (let ((node (charmap:search (%readtable:charmap table) + char))) + (let ((action (if (null? node) + (%readtable:default-action table) + (map:val node)))) + (action table char acc port))))) + +;;; Return a new readtable where CHAR is bound to ACTION. +(define readtable:update + (lambda (table char action) + (readtable:new (%readtable:default-action table) + (car (charmap:insert + (%readtable:charmap table) char action))))) + +;;; Construct new readtable with no characters in its map and +;;; DEFAULT-ACTION as the default action. +(define readtable:empty/default (lambda (default-action) - (readtable:pass:new (readtable:new default-action '())))) + (readtable:new default-action '()))) -;;; Execute an action. -;;; -;;; STEP calls PROC with the same arguments. -;;; -;;; On UPDATE: -;;; If (NULL? REST), then replace this action with ACTION. -;;; If (PAIR? REST), return PASS with an empty table with this object -;;; as the default action, and run (UPDATE REST ACTION) on it. -(define readtable:exec:new - (lambda (proc) - (letrec - ((update - (lambda (rest action) - (if (null? rest) - action - ((readtable:pass:new (readtable:new exec '())) - 'update rest action)))) - (exec - (lambda (op . args) - (cond - ((eq? op 'update) (apply update args)) - ((eq? op 'step) (apply proc args)) - (else (error "readtable:exec: invalid" args)))))) - exec))) - - -;;; ;;;;;;;;;;;;;;; -;;; Default actions -;;; ;;;;;;;;;;;;;;; - -;;; Add multiple literal sequences to the readtable. -(define readtable:update-many-sequence - (lambda (table . pairs) - (fold (lambda (pair table) - (readtable:add-sequence table (car pair) - (cdr pair))) +;;; Each value in FUNCTIONS is a list (PROCEDURE ARGS...) which is called +;;; like (PROCEDURE TABLE ARGS...) and returns a table. +(define readtable:process + (lambda (table . functions) + (fold (lambda (function table) + (apply (car function) table (cdr function))) table - pairs))) + functions))) -;;; Signal error on this action. -(define readtable:error-action - (lambda (emsg) - (readtable:exec:new - (lambda (table char acc port) - (error emsg (list table char acc port)))))) - -;;; Create an EXEC action that discards the current character. -;;; This should not be a PASS action, because PASS actions emulate -;;; a trie, which is not cyclic. -(define readtable:skip - (readtable:exec:new - (lambda (table char acc port) - (table 'run (port 'read) acc port)))) - -;;; Ignore everything and return a constant. -(define readtable:return-value - (lambda (value) - (readtable:exec:new - (lambda (table char acc port) - value)))) - -;;; Define a new readtable. -;;; -;;; (X) where (CHAR? X): Execute the action associated with X, or the -;;; default action. -;;; -;;; (WITH-DEFAULT-ACTION NEW-ACTION): Return a readtable with the same -;;; backing table but with NEW-ACTION as the default action. -;;; -;;; (UPDATE REST ACTION): Update the action taken by the total application -;;; of REST to be ACTION. REST must be a pair. -(define readtable:new - (lambda (default-action charmap) - (letrec - ((lookup? - (lambda (char) - (let ((node (charmap:search charmap char))) - (if (null? node) - #f - (map:val node))))) - (lookup - (lambda (char) - (or (lookup? char) default-action))) - (run* - (lambda (handler char acc port) - (handler 'step table char acc port))) - (run - (lambda (char acc port) - (run* (lookup char) char acc port))) - (with-default-action - (lambda (new-default-action) - (readtable:new new-default-action charmap))) - (empty-pass/error - (lambda () - (readtable:pass/blank (readtable:error-action - "reading long name")))) - (update-oldnode - (lambda (rest action) - (lambda (_ oldnode) - (if (null? rest) - action - (let ((replaced (if (null? oldnode) - (empty-pass/error) - (map:val oldnode)))) - (replaced 'update rest action)))))) - (update - (lambda (rest action) - (if (null? rest) - (error "readtable update: invalid" (list rest action)) - (readtable:new default-action - (charmap:update - charmap - (car rest) - (update-oldnode (cdr rest) action)))))) - (table - (lambda (op . args) - (cond - ((not op) - (apply run* default-action op args)) - ((char? op) (apply run op args)) - ((eq? op 'with-default-action) (apply with-default-action - args)) - ((eq? op 'update) (apply update args)) - (else (error "readtable: invalid" (cons op args))))))) - table))) - -;;; Wrap ACTION in a readtable, and push bach the currently read character -;;; before calling ACTION. -;;; -;;; This is designed for readtable actions that already know what they -;;; are matching against. -(define readtable:sequence-wrapper - (lambda (action) - (readtable:pass/blank - (readtable:exec:new - (lambda (table char acc port) - (port 'push char) - (action 'step table #f #f port)))))) - -;;; Add ACTION as the action to be taken in TABLE following SEQ. -;;; The action is wrapped in an empty PASS table with ACTION as the default -;;; action. This normalizes the character ACTION is passed. -(define readtable:add-sequence - (lambda (table seq action) - (let ((seq (cond - ((char? seq) (list seq)) - ((string? seq) (string->list seq)) - (else seq)))) - (table 'update seq (readtable:sequence-wrapper action))))) - -;;; ;;;;;;;;;;;;;;;;;;; +;;; ;;;;;;;;;;;;;;;;;; ;;; Default readtables -;;; ;;;;;;;;;;;;;;;;;;; +;;; ;;;;;;;;;;;;;;;;;; -(define readtable:empty - (readtable:new (readtable:error-action "no more actions") - '())) +;;; Discard the current character and continue reading the readtable. +(define readtable:skip + (lambda (table char acc port) + (readtable:act table (port 'read) acc port))) -;;; Ignore all characters until newline. -(define readtable:read-line-comment - (readtable:new - readtable:skip - (charmap:insert-many '() - (list - (cons #\newline (readtable:return-value #f)))))) +;;; Push back CHAR and return ACC. +(define readtable:return-acc-keep-char + (lambda (table char acc port) + (port 'push char) + acc)) +;;; Push CHAR to ACC and continue reading from TABLE. +(define readtable:push-acc + (lambda (table char acc port) + (readtable:act table (port 'read) (cons char acc) port))) + +;;; Define a readtable that constructs an identifier by accepting all +;;; characters that are not listed. +(define readtable:exclude-from-identifiers + (lambda (table excluded) + (fold (lambda (char table) + (readtable:update table char readtable:return-acc-keep-char)) + table + excluded))) + +;;; ASCII whitespace. +(define readtable:ASCII-whitespace + (list #\newline + #\space + (integer->char #x09) + (integer->char #x0B) + (integer->char #x0C) + (integer->char #x0D))) + +;;; Readtable for identifiers. +(define readtable:identifier + (readtable:process + (readtable:empty/default readtable:push-acc) + (list readtable:exclude-from-identifiers + readtable:ASCII-whitespace) + (list readtable:exclude-from-identifiers + (list #\| #\( #\) #\' #\; #f)))) + +;;; Read an identifier starting with CHAR. +(define readtable:read-ident + (lambda (table char acc port) + (reverse (readtable:act readtable:identifier + (port 'read) (list char) port)))) + +;;; Add all chars in TO-SKIP to TABLE as skipped characters. Used for +;;; whitespace. +(define readtable:add-all-as-skip + (lambda (table to-skip) + (fold (lambda (char table) + (readtable:update table char readtable:skip)) + table + to-skip))) + +;;; Toplevel reader. (define readtable:top - (readtable:update-many-sequence - readtable:empty - (cons "#!fold-case" - (readtable:exec:new - (lambda (table char acc port) - (port 'fold-case! #t) - '()))) - (cons "#!no-fold-case" - (readtable:exec:new - (lambda (table char acc port) - (port 'fold-case! #f) - '()))) - (cons "#t" (readtable:return-value #t)) - (cons "#f" (readtable:return-value #f)) - (cons "#true" (readtable:return-value #t)))) + (readtable:process + (readtable:empty/default readtable:read-ident) + (list readtable:add-all-as-skip readtable:ASCII-whitespace))) ;;; ;;;;;;;;;;; ;;; Test reader @@ -356,12 +232,13 @@ ch))) "test"))) -(let ((true-reader (%list->read (string->list "#!fold-case#TRUE#!no-fold-case#TRUE")))) - (display (list "first:" (readtable:top (true-reader 'read) #f true-reader))) +(let ((true-reader (%list->read (string->list "xyzw abcd x!")))) + (display (list "first:" (readtable:act + readtable:top (true-reader 'read) #f true-reader))) (newline) - (display (list "second: " (readtable:top (true-reader 'read) #f true-reader))) + (display (list "second: " (readtable:act + readtable:top (true-reader 'read) #f true-reader))) (newline) - (display (list "third: " (readtable:top (true-reader 'read) #f true-reader))) - (newline) - (display (list "fourth: " (readtable:top (true-reader 'read) #f true-reader))) + (display (list "third: " (readtable:act + readtable:top (true-reader 'read) #f true-reader))) (newline)) diff --git a/util.scm b/util.scm index fa361f7..cf1fe2e 100644 --- a/util.scm +++ b/util.scm @@ -33,3 +33,18 @@ ((not (f (car lst))) #f) (else (all f (cdr lst)))))) +;;; (REVAPPEND L1 ... LN) returns L{N-1}, L{N-2}, ... reversed and +;;; appended to LN, in that order. +(define revappend + (letrec ((loop + (lambda (lst1 lst2) + (if (null? lst1) + lst2 + (loop (cdr lst1) (cons (car lst1) lst2)))))) + (lambda lists + (cond + ((null? lists) '()) + ((null? (cdr lists)) (car lists)) + (else + (apply revappend (loop (car lists) (cadr lists)) + (cddr lists)))))))