read: factor out ADD-ALL-AS-SKIP to UPDATE-LIST

This commit is contained in:
Peter McGoron 2024-09-22 09:46:58 -04:00
parent f52235cc51
commit 37355f1037
1 changed files with 50 additions and 12 deletions

View File

@ -13,6 +13,34 @@
;;; R7RS reader. This is the lexer-parser end, so it returns tokens and
;;; not concrete objects.
;;;
;;; Notes:
;;;
;;; Port stores datum labels. Datum labels are stored for the entirety of
;;; a READ: this is to emulate MIT Scheme, which allows for datum labels
;;; outside of the datum that the label appears in.
;;;
;;; The reader does not return Scheme data: it returns annotated data
;;; containing the source location, datum label number, resolved datum
;;; label pointer. This is for advanced syntax systems.
;;;
;;; How datum labels could work:
;;;
;;; When encountering #[number]=, allocate a datum label and assign it
;;; nothing. Then call READ after "=", and destructively update the
;;; datum label with the resulting datum. A pass over the new read
;;; structure to convert it to regular Scheme data will resolve the
;;; indirection.
;;;
;;; Printing circular structures and shared structures:
;;;
;;; The only way to do so is to find a way to sort lists in a way that
;;; respects EQ?. This is impossible in standard Scheme and also in
;;; many implementations because of moving collectors.
;;;
;;; A list could be maintained of all previous values, and each print
;;; could check the list using EQ? to find a match, but for R7RS WRITE
;;; and WRITE-SHARED this would be an O(N^2) operation.
(load "chez-compat.scm")
(load "util.scm")
@ -35,6 +63,9 @@
;;; READ:
;;;
;;; Stream readers contain mutable state. This is the case-folding mode
;;; and the current list of datum labels.
;;;
;;; (POS): Return (LIST FILENAME LINE-NUMBER OFFSET).
;;; (READ): Read the next character in the stream. Returns #F on EOF.
;;; (PUSH CHAR): Push CHAR such that it will be the next character read
@ -163,6 +194,14 @@
(car (charmap:insert
(%readtable:charmap table) char action)))))
;;; Update TABLE to act on all characters in LST with ACTION.
(define readtable:update-list
(lambda (table lst action)
(fold (lambda (char table)
(readtable:update table char action))
table
lst)))
;;; Construct new readtable with no characters in its map and
;;; DEFAULT-ACTION as the default action.
(define readtable:empty/default
@ -283,15 +322,6 @@
;;; Comments and whitespace reader
;;; ;;;;;;;;;;;;;;;;;;;;
;;; Add all chars in TO-SKIP to TABLE as skipped characters. Used for
;;; whitespace.
(define readtable:add-all-as-skip
(lambda (table to-skip)
(fold (lambda (char table)
(readtable:update table char readtable:skip))
table
to-skip)))
;;; Readtable for a line comment.
(define readtable:read-to-newline
(readtable:process
@ -321,7 +351,9 @@
(let ((table (readtable:process
(readtable:empty/default (readtable:error
"improper list has 1 cdr"))
(list readtable:add-all-as-skip readtable:ASCII-whitespace)
(list readtable:update-list
readtable:ASCII-whitespace
readtable:skip)
(list readtable:update %eol
(lambda dummy 'end-of-list)))))
(readtable:act table (port 'read) acc port)))))
@ -438,6 +470,10 @@
;;; Reads the next toplevel datum, discards it, and then continues at the
;;; toplevel.
;;;
;;; TODO: The R7RS reader can cause side-effects due to #!FOLD-CASE. This
;;; must be supressed in datum comments. A method could be added to PORT
;;; that saves and restores mutable state (besides stream position).
(define readtable:datum-comment
(lambda (_ __ toplevel port)
(readtable:next toplevel #f port)
@ -459,7 +495,9 @@
(lambda ()
(readtable:process
(readtable:empty/default readtable:read-ident)
(list readtable:add-all-as-skip readtable:ASCII-whitespace)
(list readtable:update-list
readtable:ASCII-whitespace
readtable:skip)
(list readtable:update #f (readtable:return 'eof))
(list readtable:update %bol readtable:read-list)
(list readtable:update %eol (readtable:error "unbalanced list"))
@ -506,4 +544,4 @@
(read-all "(x y #| this is a block\n comment\n |# z w)")
(read-all "#( a b #| this is a #| nested block |# comment|# z w)")
(read-all "#(a b #(c #|close#|comment|#|#y))")
(read-all "(this has a #;(call with current continuation) datum comment)")
(read-all "(this has a #;(call with (current continuation)) datum comment)")