From 2e43f36471d0484c92354f971f3b44266190b7a5 Mon Sep 17 00:00:00 2001 From: Peter McGoron Date: Sun, 22 Sep 2024 00:46:41 -0400 Subject: [PATCH] read: block comments --- read.scm | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 76 insertions(+), 2 deletions(-) diff --git a/read.scm b/read.scm index d9ea7f7..8d68093 100644 --- a/read.scm +++ b/read.scm @@ -120,6 +120,18 @@ ;;; ;;;;;;;;;;;;;;;;;;;;;; ;;; Readtable constructors +;;; +;;; Readtables are composed of a CHARMAP, which is a map from characters +;;; to actions, and a DEFAULT-ACTION, which is taken when there is no +;;; match in CHARMAP. +;;; +;;; An "action" is a procedure that takes four arguments: +;;; +;;; TABLE: The current table. +;;; CHAR: The character that was matched against the CHARMAP in TABLE. +;;; ACC: An arbitrary "accumulator" value that is different depending +;;; on the readtable in question. +;;; PORT: A port reader object. ;;; ;;;;;;;;;;;;;;;;;;;;;; ;;; (READTABLE:NEW DEFAULT-ACTION CHARMAP) @@ -167,7 +179,7 @@ functions))) ;;; ;;;;;;;;;;;;;;;;;; -;;; Default readtables +;;; Default actions ;;; ;;;;;;;;;;;;;;;;;; ;;; Return an error. @@ -208,6 +220,12 @@ (lambda (oldtable __ _ port) (readtable:next newtable oldtable port)))) +;;; Jump to a new readtable, reading the new character. +(define readtable:jump/next + (lambda (newtable) + (lambda (oldtable _ acc port) + (readtable:next newtable acc port)))) + ;;; ;;;;;;;;;;;;;;;;; ;;; Identifier reader ;;; ;;;;;;;;;;;;;;;;; @@ -359,16 +377,69 @@ port))) ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Reader for datum that start with "#" +;;; Reader for stuff that start with "#" ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define readtable:vector (lambda (_ __ toplevel port) (list 'vector (readtable:read-proper-list toplevel port)))) +;;; Block comment reader. +;;; +;;; The outermost block comment reader is passed the toplevel reader as +;;; ACC. When the outermost block is finished, it will tail-call ACC. +;;; (It is basically the continuation of the reader.) +;;; +;;; When a nested block comment is found, it is passed #F as ACC, which +;;; it will not call. It will return an unspecified value. +;;; +;;; Since the read tables are not procedures, references to other tables +;;; in the same LETREC declaration must be protected with explicit LAMBDAs. +;;; Macros could make this much easier to read. +(define readtable:block-comment + (letrec ((potential-end + (readtable:process + (readtable:empty/default + (lambda (this char acc port) (readtable:act + loop + char + acc + port))) + (list readtable:update #\# + (lambda (this char acc port) + (if acc + (readtable:next acc #f port)))))) + (potential-start + (readtable:process + (readtable:empty/default + (lambda (this char acc port) (readtable:act + loop + char + acc + port))) + (list readtable:update #\| + (lambda (this char acc port) + (readtable:next loop #f port) + (readtable:next loop acc port))))) + (loop + (readtable:process + (readtable:empty/default readtable:skip) + (list readtable:update #\# + (lambda (this char acc port) + (readtable:next potential-start + acc + port))) + (list readtable:update #\| + (lambda (this char acc port) + (readtable:next potential-end + acc + port)))))) + loop)) + (define readtable:hash (readtable:process (readtable:empty/default (readtable:error "unimplemented")) + (list readtable:update #\| (readtable:jump/next readtable:block-comment)) (list readtable:update %bol readtable:vector))) ;;; ;;;;;;;;;;;;;;;; @@ -424,3 +495,6 @@ (read-all "( a . b )") (read-all "( a .b . c)") (read-all "#( a b y)") +(read-all "(x y #| this is a block\n comment\n |# z w)") +(read-all "#( a b #| this is a #| nested block |# comment|# z w)") +(read-all "#(a b #(c #|close#|comment|#|#y))")