UNSLISP/read.scm

368 lines
12 KiB
Scheme

;;; Copyright (C) Peter McGoron 2024
;;; This program is free software: you can redistribute it and/or modify
;;; it under the terms of the GNU General Public License as published by
;;; the Free Software Foundation, version 3 of the License.
;;;
;;; This program is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with this program. If not, see <https://www.gnu.org/licenses/>.
;;; R7RS reader. This is the lexer-parser end, so it returns tokens and
;;; not concrete objects.
;;;
;;; The reader is based on a readtable that acts (in some instances) like
;;; a trie. The reader reads a character and looks it up in the readtable.
;;; The actions stored in the readtable are either opaque execution actions
;;; or transparent "pass" actions that jump to a new readtable to read more
;;; characters.
(load "chez-compat.scm")
(load "util.scm")
(load "set.scm")
;;; ;;;;;;;;;;;;;;;;;;;;;;;;
;;; Port reader wrapper
;;; ;;;;;;;;;;;;;;;;;;;;;;;;
(define port->read-function
(lambda (port)
(lambda ()
(read-char port))))
;;; READ:
;;;
;;; (POS): Return (LIST FILENAME LINE-NUMBER OFFSET).
;;; (READ): Read the next character in the stream. Returns #F on EOF.
;;; (PUSH CHAR): Push CHAR such that it will be the next character read
;;; when (READ) is called.
;;; (FOLD-CASE?): Returns a boolean if case folding is enabled.
;;; (FOLD-CASE! BOOL): Sets the case folding to BOOL.
(define port->read
(lambda (read-function filename)
(let ((line-number 1)
(offset 0)
(pushback-buffer '())
(fold-case? #f))
(letrec ((update-position!
(lambda (ch)
(cond
((eqv? ch #\newline)
(set! line-number (+ 1 line-number)) (set! offset 0))
(offset (set! offset (+ 1 offset))))))
(process
(lambda (ch)
(update-position! ch)
(cond
((or (eof-object? ch) (not ch)) ch)
(fold-case? (char-downcase ch))
(else ch))))
(port
(lambda (op . args)
(cond
((eq? op 'pos) (list filename line-number offset))
((eq? op 'read)
(process
(if (null? pushback-buffer)
(read-function)
(let ((ch (car pushback-buffer)))
(set! pushback-buffer (cdr pushback-buffer))
ch))))
((eq? op 'push)
(let ((ch (car args)))
(if (eqv? ch #\newline)
(begin
(set! line-number (- line-number 1))
(set! offset #f))
(set! offset (- offset 1)))
(set! pushback-buffer (cons ch pushback-buffer))))
((eq? op 'fold-case?) fold-case?)
((eq? op 'fold-case!) (set! fold-case? (car args)))
(else (error "read->port: invalid" (cons op args)))))))
port))))
;;; ;;;;;;;;;;;;;;
;;; Character maps
;;; ;;;;;;;;;;;;;;
(define integer<=>
(lambda (x y)
(cond
((< x y) '<)
((= x y) '=)
(else '>))))
(define char<=>
(lambda (x y)
(integer<=> (char->integer x)
(char->integer y))))
(define %charmap:<=> (set:<=>-to-map char<=>))
(define %charmap:update (set:update %charmap:<=>))
(define charmap:update (map:update %charmap:update))
(define charmap:insert (map:insert %charmap:update))
(define charmap:search (map:search %charmap:<=>))
(define charmap:insert-many
(lambda (cmap . pairs)
(fold (lambda (pair cmap)
(car (charmap:insert cmap (car pair) (cdr pair))))
cmap
pairs)))
;;; ;;;;;;;;;;
;;; Readtable constructors
;;;
;;; Readtable actions are objects with the following messages:
;;;
;;; (STEP TABLE CHAR ACC PORT): Act on CHAR from TABLE with accumulated
;;; value ACC and PORT.
;;; (UPDATE REST ACTION): Return a new readtable action which is the
;;; action of reading REST and executing ACTION after that. This should
;;; preserve the previous action as much as possible.
;;; ;;;;;;;;;;
;;; Pass to new readtable.
;;;
;;; STEP reads the next character in the sequence and jumps to TABLE
;;; modifying ACC.
;;;
;;; On UPDATE:
;;; If (NULL? REST), return PASS with a TABLE updated with its default
;;; action to be ACTION.
;;; If (PAIR? REST), return PASS with (TABLE UPDATE REST ACTION).
(define readtable:pass:new
(lambda (table)
(let ((step
(lambda (oldtable char acc port)
(table (port 'read) acc port)))
(update
(lambda (rest action)
(readtable:pass:new
(if (null? rest)
(table 'with-default-action action)
(table 'update rest action))))))
(lambda (op . args)
(cond
((eq? op 'update) (apply update args))
((eq? op 'step) (apply step args))
(else (error "readtable:pass: invalid" args)))))))
;;; Create a new PASS with a blank TABLE with DEFAULT-ACTION as the
;;; default action.
(define readtable:pass/blank
(lambda (default-action)
(readtable:pass:new (readtable:new default-action '()))))
;;; Execute an action.
;;;
;;; STEP calls PROC with the same arguments.
;;;
;;; On UPDATE:
;;; If (NULL? REST), then replace this action with ACTION.
;;; If (PAIR? REST), return PASS with an empty table with this object
;;; as the default action, and run (UPDATE REST ACTION) on it.
(define readtable:exec:new
(lambda (proc)
(letrec
((update
(lambda (rest action)
(if (null? rest)
action
((readtable:pass:new (readtable:new exec '()))
'update rest action))))
(exec
(lambda (op . args)
(cond
((eq? op 'update) (apply update args))
((eq? op 'step) (apply proc args))
(else (error "readtable:exec: invalid" args))))))
exec)))
;;; ;;;;;;;;;;;;;;;
;;; Default actions
;;; ;;;;;;;;;;;;;;;
;;; Add multiple literal sequences to the readtable.
(define readtable:update-many-sequence
(lambda (table . pairs)
(fold (lambda (pair table)
(readtable:add-sequence table (car pair)
(cdr pair)))
table
pairs)))
;;; Signal error on this action.
(define readtable:error-action
(lambda (emsg)
(readtable:exec:new
(lambda (table char acc port)
(error emsg (list table char acc port))))))
;;; Create an EXEC action that discards the current character.
;;; This should not be a PASS action, because PASS actions emulate
;;; a trie, which is not cyclic.
(define readtable:skip
(readtable:exec:new
(lambda (table char acc port)
(table 'run (port 'read) acc port))))
;;; Ignore everything and return a constant.
(define readtable:return-value
(lambda (value)
(readtable:exec:new
(lambda (table char acc port)
value))))
;;; Define a new readtable.
;;;
;;; (X) where (CHAR? X): Execute the action associated with X, or the
;;; default action.
;;;
;;; (WITH-DEFAULT-ACTION NEW-ACTION): Return a readtable with the same
;;; backing table but with NEW-ACTION as the default action.
;;;
;;; (UPDATE REST ACTION): Update the action taken by the total application
;;; of REST to be ACTION. REST must be a pair.
(define readtable:new
(lambda (default-action charmap)
(letrec
((lookup?
(lambda (char)
(let ((node (charmap:search charmap char)))
(if (null? node)
#f
(map:val node)))))
(lookup
(lambda (char)
(or (lookup? char) default-action)))
(run*
(lambda (handler char acc port)
(handler 'step table char acc port)))
(run
(lambda (char acc port)
(run* (lookup char) char acc port)))
(with-default-action
(lambda (new-default-action)
(readtable:new new-default-action charmap)))
(empty-pass/error
(lambda ()
(readtable:pass/blank (readtable:error-action
"reading long name"))))
(update-oldnode
(lambda (rest action)
(lambda (_ oldnode)
(if (null? rest)
action
(let ((replaced (if (null? oldnode)
(empty-pass/error)
(map:val oldnode))))
(replaced 'update rest action))))))
(update
(lambda (rest action)
(if (null? rest)
(error "readtable update: invalid" (list rest action))
(readtable:new default-action
(charmap:update
charmap
(car rest)
(update-oldnode (cdr rest) action))))))
(table
(lambda (op . args)
(cond
((not op)
(apply run* default-action op args))
((char? op) (apply run op args))
((eq? op 'with-default-action) (apply with-default-action
args))
((eq? op 'update) (apply update args))
(else (error "readtable: invalid" (cons op args)))))))
table)))
;;; Wrap ACTION in a readtable, and push bach the currently read character
;;; before calling ACTION.
;;;
;;; This is designed for readtable actions that already know what they
;;; are matching against.
(define readtable:sequence-wrapper
(lambda (action)
(readtable:pass/blank
(readtable:exec:new
(lambda (table char acc port)
(port 'push char)
(action 'step table #f #f port))))))
;;; Add ACTION as the action to be taken in TABLE following SEQ.
;;; The action is wrapped in an empty PASS table with ACTION as the default
;;; action. This normalizes the character ACTION is passed.
(define readtable:add-sequence
(lambda (table seq action)
(let ((seq (cond
((char? seq) (list seq))
((string? seq) (string->list seq))
(else seq))))
(table 'update seq (readtable:sequence-wrapper action)))))
;;; ;;;;;;;;;;;;;;;;;;;
;;; Default readtables
;;; ;;;;;;;;;;;;;;;;;;;
(define readtable:empty
(readtable:new (readtable:error-action "no more actions")
'()))
;;; Ignore all characters until newline.
(define readtable:read-line-comment
(readtable:new
readtable:skip
(charmap:insert-many '()
(list
(cons #\newline (readtable:return-value #f))))))
(define readtable:top
(readtable:update-many-sequence
readtable:empty
(cons "#!fold-case"
(readtable:exec:new
(lambda (table char acc port)
(port 'fold-case! #t)
'())))
(cons "#!no-fold-case"
(readtable:exec:new
(lambda (table char acc port)
(port 'fold-case! #f)
'())))
(cons "#t" (readtable:return-value #t))
(cons "#f" (readtable:return-value #f))
(cons "#true" (readtable:return-value #t))))
;;; ;;;;;;;;;;;
;;; Test reader
;;; ;;;;;;;;;;;
(define %list->read
(lambda (seq)
(port->read
(lambda ()
(if (null? seq)
#f
(let ((ch (car seq)))
(set! seq (cdr seq))
ch)))
"test")))
(let ((true-reader (%list->read (string->list "#!fold-case#TRUE#!no-fold-case#TRUE"))))
(display (list "first:" (readtable:top (true-reader 'read) #f true-reader)))
(newline)
(display (list "second: " (readtable:top (true-reader 'read) #f true-reader)))
(newline)
(display (list "third: " (readtable:top (true-reader 'read) #f true-reader)))
(newline)
(display (list "fourth: " (readtable:top (true-reader 'read) #f true-reader)))
(newline))