;;; Copyright (C) Peter McGoron 2024 ;;; This program is free software: you can redistribute it and/or modify ;;; it under the terms of the GNU General Public License as published by ;;; the Free Software Foundation, version 3 of the License. ;;; ;;; This program is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with this program. If not, see . ;;; R7RS reader. This is the lexer-parser end, so it returns tokens and ;;; not concrete objects. ;;; ;;; Notes: ;;; ;;; Port stores datum labels. Datum labels are stored for the entirety of ;;; a READ: this is to emulate MIT Scheme, which allows for datum labels ;;; outside of the datum that the label appears in. ;;; ;;; The reader does not return Scheme data: it returns annotated data ;;; containing the source location, datum label number, resolved datum ;;; label pointer. This is for advanced syntax systems. ;;; ;;; How datum labels could work: ;;; ;;; When encountering #[number]=, allocate a datum label and assign it ;;; nothing. Then call READ after "=", and destructively update the ;;; datum label with the resulting datum. A pass over the new read ;;; structure to convert it to regular Scheme data will resolve the ;;; indirection. ;;; ;;; Printing circular structures and shared structures: ;;; ;;; The only way to do so is to find a way to sort lists in a way that ;;; respects EQ?. This is impossible in standard Scheme and also in ;;; many implementations because of moving collectors. ;;; ;;; A list could be maintained of all previous values, and each print ;;; could check the list using EQ? to find a match, but for R7RS WRITE ;;; and WRITE-SHARED this would be an O(N^2) operation. (load "chez-compat.scm") (load "util.scm") (load "set.scm") (load "linked-list.scm") ;;; My text editor cannot parse Scheme's character syntax. (define %bol #\() (define %eol #\)) ;;; ;;;;;;;;;;;;;;;;;;;;;;;; ;;; Port reader wrapper ;;; ;;;;;;;;;;;;;;;;;;;;;;;; (define port->read-function (lambda (port) (lambda () (read-char port)))) ;;; READ: ;;; ;;; Stream readers contain mutable state. This is the case-folding mode ;;; and the current list of datum labels. ;;; ;;; (POS): Return (LIST FILENAME LINE-NUMBER OFFSET). ;;; (READ): Read the next character in the stream. Returns #F on EOF. ;;; (PUSH CHAR): Push CHAR such that it will be the next character read ;;; when (READ) is called. ;;; (PEEK): Read character, push it back, and return it. ;;; (FOLD-CASE?): Returns a boolean if case folding is enabled. ;;; (FOLD-CASE! BOOL): Sets the case folding to BOOL. (define port->read (lambda (read-function filename) (let ((line-number 1) (offset 0) (pushback-buffer '()) (fold-case? #f)) (letrec ((update-position! (lambda (ch) (cond ((eqv? ch #\newline) (set! line-number (+ 1 line-number)) (set! offset 0)) (offset (set! offset (+ 1 offset)))))) (process (lambda (ch) (update-position! ch) (cond ((or (eof-object? ch) (not ch)) ch) (fold-case? (char-downcase ch)) (else ch)))) (port (lambda (op . args) (cond ((eq? op 'pos) (list filename line-number offset)) ((eq? op 'read) (process (if (null? pushback-buffer) (read-function) (let ((ch (car pushback-buffer))) (set! pushback-buffer (cdr pushback-buffer)) ch)))) ((eq? op 'peek) (let ((ch (port 'read))) (port 'push ch) ch)) ((eq? op 'push) (let ((ch (car args))) (if (eqv? ch #\newline) (begin (set! line-number (- line-number 1)) (set! offset #f)) (set! offset (- offset 1))) (set! pushback-buffer (cons ch pushback-buffer)))) ((eq? op 'fold-case?) fold-case?) ((eq? op 'fold-case!) (set! fold-case? (car args))) (else (error "read->port: invalid" (cons op args))))))) port)))) ;;; ;;;;;;;;;;;;;; ;;; Character maps ;;; ;;;;;;;;;;;;;; (define integer<=> (lambda (x y) (cond ((< x y) '<) ((= x y) '=) (else '>)))) ;;; Comparison on characters extended to #F, which is less than all ;;; characters. (define char*<=> (lambda (x y) (cond ((and (not x) y) '<) ((and x (not y)) '>) ((and (not x) (not y) '=)) (else (integer<=> (char->integer x) (char->integer y)))))) (define %charmap:<=> (set:<=>-to-map char*<=>)) (define %charmap:update (set:update %charmap:<=>)) (define charmap:update (map:update %charmap:update)) (define charmap:insert (map:insert %charmap:update)) (define charmap:search (map:search %charmap:<=>)) ;;; ;;;;;;;;;;;;;;;;;;;;;; ;;; Readtable constructors ;;; ;;; Readtables are composed of a CHARMAP, which is a map from characters ;;; to actions, and a DEFAULT-ACTION, which is taken when there is no ;;; match in CHARMAP. ;;; ;;; An "action" is a procedure that takes four arguments: ;;; ;;; TABLE: The current table. ;;; CHAR: The character that was matched against the CHARMAP in TABLE. ;;; ACC: An arbitrary "accumulator" value that is different depending ;;; on the readtable in question. ;;; PORT: A port reader object. ;;; ;;;;;;;;;;;;;;;;;;;;;; ;;; (READTABLE:NEW DEFAULT-ACTION CHARMAP) (define readtable:new cons) (define %readtable:default-action car) (define %readtable:charmap cdr) ;;; Run the action in TABLE assigned to CHAR, or the default action of ;;; TABLE if there is no entry for CHAR. (define readtable:act (lambda (table char acc port) (let ((node (charmap:search (%readtable:charmap table) char))) (let ((action (if (null? node) (%readtable:default-action table) (map:val node)))) (action table char acc port))))) ;;; Run the action in TABLE with the next character from PORT. (define readtable:next (lambda (table acc port) (readtable:act table (port 'read) acc port))) ;;; Return a new readtable where CHAR is bound to ACTION. (define readtable:update (lambda (table char action) (readtable:new (%readtable:default-action table) (car (charmap:insert (%readtable:charmap table) char action))))) ;;; Update TABLE to act on all characters in LST with ACTION. (define readtable:update-list (lambda (table lst action) (fold (lambda (char table) (readtable:update table char action)) table lst))) ;;; Construct new readtable with no characters in its map and ;;; DEFAULT-ACTION as the default action. (define readtable:empty/default (lambda (default-action) (readtable:new default-action '()))) ;;; Each value in FUNCTIONS is a list (PROCEDURE ARGS...) which is called ;;; like (PROCEDURE TABLE ARGS...) and returns a table. (define readtable:process (lambda (table . functions) (fold (lambda (function table) (apply (car function) table (cdr function))) table functions))) ;;; ;;;;;;;;;;;;;;;;;; ;;; Default actions ;;; ;;;;;;;;;;;;;;;;;; ;;; Return an error. (define readtable:error (lambda emsg (lambda tablemsg (apply error tablemsg emsg)))) ;;; Discard the current character and continue reading the readtable. (define readtable:skip (lambda (table char acc port) (readtable:act table (port 'read) acc port))) ;;; Discard char and return constant. (define readtable:return (lambda (return) (lambda (table char acc port) return))) ;;; Jump to a new readtable, discard it's return, and continue reading ;;; in the table. (define readtable:jump-discard (lambda (newtable) (lambda (oldtable char acc port) (readtable:act newtable (port 'read) '() port) (readtable:act oldtable (port 'read) acc port)))) ;;; Jump to a new readtable with the same characters. (define readtable:jump (lambda (newtable) (lambda (oldtable char acc port) (readtable:act newtable char acc port)))) ;;; Jump to a new readtable, reading the new character, with the old ;;; readtable as ACC. (define readtable:next/old-as-acc (lambda (newtable) (lambda (oldtable __ _ port) (readtable:next newtable oldtable port)))) ;;; Jump to a new readtable, reading the new character. (define readtable:jump/next (lambda (newtable) (lambda (oldtable _ acc port) (readtable:next newtable acc port)))) ;;; ;;;;;;;;;;;;;;;;; ;;; Identifier reader ;;; ;;;;;;;;;;;;;;;;; ;;; Push back CHAR and return ACC. (define readtable:return-acc-keep-char (lambda (table char acc port) (port 'push char) acc)) ;;; Push CHAR to ACC and continue reading from TABLE. (define readtable:push-char (lambda (table char acc port) (acc 'push-tail char) (readtable:act table (port 'read) acc port))) ;;; Define a readtable that constructs an identifier by accepting all ;;; characters that are not listed. (define readtable:exclude-from-identifiers (lambda (table excluded) (fold (lambda (char table) (readtable:update table char readtable:return-acc-keep-char)) table excluded))) ;;; ASCII whitespace. (define readtable:ASCII-whitespace (list #\newline #\space (integer->char #x09) (integer->char #x0B) (integer->char #x0C) (integer->char #x0D))) ;;; Readtable for identifiers. (define readtable:identifier (readtable:process (readtable:empty/default readtable:push-char) (list readtable:exclude-from-identifiers readtable:ASCII-whitespace) (list readtable:exclude-from-identifiers (list #\| %bol %eol #\' #\; #f)))) ;;; Read an identifier starting with CHAR. (define readtable:read-ident (lambda (table char acc port) (let ((lst (linked-list:new))) (lst 'push char) (list->string ((readtable:act readtable:identifier (port 'read) lst port) 'to-list))))) ;;; ;;;;;;;;;;;;;;;;;;;; ;;; Comments and whitespace reader ;;; ;;;;;;;;;;;;;;;;;;;; ;;; Readtable for a line comment. (define readtable:read-to-newline (readtable:process (readtable:empty/default readtable:skip) (list readtable:update #\newline (readtable:return #f)))) ;;; ;;;;;;;;;;; ;;; List reader ;;; ;;; The reader updates the previous readtable to handle ). This means ;;; that this read table does not have to handle end-of-line, whitespace, ;;; etc. ;;; ;;;;;;;;;;; ;;; Read the end of an improper list. (define readtable:read-improper-cdr (lambda (table acc port) (let ((val (readtable:act (readtable:update table %eol (readtable:error "proper list must have cdr")) (port 'read) #f port))) (acc 'set-cdr! val) (let ((table (readtable:process (readtable:empty/default (readtable:error "improper list has 1 cdr")) (list readtable:update-list readtable:ASCII-whitespace readtable:skip) (list readtable:update %eol (lambda dummy 'end-of-list))))) (readtable:act table (port 'read) acc port))))) ;;; Generic reader loop for a list. It takes as input the table that has ;;; already been updated with end of list and improper list handlers. (define readtable:read-list-loop (lambda (table port) (let ((acc (linked-list:new))) (letrec ((loop (lambda () (let ((value (readtable:act table (port 'read) acc port))) (cond ((eqv? value 'end-of-list) (acc 'to-list)) (else (acc 'push-tail value) (loop))))))) (loop))))) ;;; Readtable for a list, generic to proper and improper list ;;; readers. (define readtable:table-for-list (lambda (oldtable on-dot) (readtable:process oldtable (list readtable:update %eol (readtable:return 'end-of-list)) (list readtable:update #\. (lambda (table char acc port) (let ((entire-identifier (readtable:read-ident table char #f port))) (if (equal? entire-identifier ".") (on-dot table acc port) entire-identifier))))))) ;;; Read a proper or improper list. (define readtable:read-list (lambda (oldtable _ __ port) (readtable:read-list-loop (readtable:table-for-list oldtable readtable:read-improper-cdr) port))) (define readtable:read-proper-list (lambda (table port) (readtable:read-list-loop (readtable:table-for-list table (readtable:error "expected proper list")) port))) ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Reader for stuff that start with "#" ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define readtable:vector (lambda (_ __ toplevel port) (list 'vector (readtable:read-proper-list toplevel port)))) ;;; Block comment reader. ;;; ;;; The outermost block comment reader is passed the toplevel reader as ;;; ACC. When the outermost block is finished, it will tail-call ACC. ;;; (It is basically the continuation of the reader.) ;;; ;;; When a nested block comment is found, it is passed #F as ACC, which ;;; it will not call. It will return an unspecified value. ;;; ;;; Since the read tables are not procedures, references to other tables ;;; in the same LETREC declaration must be protected with explicit LAMBDAs. ;;; Macros could make this much easier to read. (define readtable:block-comment (letrec ((potential-end (readtable:process (readtable:empty/default (lambda (this char acc port) (readtable:act loop char acc port))) (list readtable:update #\# (lambda (this char acc port) (if acc (readtable:next acc #f port)))))) (potential-start (readtable:process (readtable:empty/default (lambda (this char acc port) (readtable:act loop char acc port))) (list readtable:update #\| (lambda (this char acc port) (readtable:next loop #f port) (readtable:next loop acc port))))) (loop (readtable:process (readtable:empty/default readtable:skip) (list readtable:update #\# (lambda (this char acc port) (readtable:next potential-start acc port))) (list readtable:update #\| (lambda (this char acc port) (readtable:next potential-end acc port)))))) loop)) ;;; Reads the next toplevel datum, discards it, and then continues at the ;;; toplevel. ;;; ;;; TODO: The R7RS reader can cause side-effects due to #!FOLD-CASE. This ;;; must be supressed in datum comments. A method could be added to PORT ;;; that saves and restores mutable state (besides stream position). (define readtable:datum-comment (lambda (_ __ toplevel port) (readtable:next toplevel #f port) (readtable:next toplevel #f port))) (define readtable:hash (readtable:process (readtable:empty/default (readtable:error "unimplemented")) (list readtable:update #\| (readtable:jump/next readtable:block-comment)) (list readtable:update #\; readtable:datum-comment) (list readtable:update %bol readtable:vector))) ;;; ;;;;;;;;;;;;;;;; ;;; Toplevel reader. ;;; ;;;;;;;;;;;;;;;; ;;; This is defined as a function so that it dynamically loads each ;;; sub-readtable. (define readtable:top (lambda () (readtable:process (readtable:empty/default readtable:read-ident) (list readtable:update-list readtable:ASCII-whitespace readtable:skip) (list readtable:update #f (readtable:return 'eof)) (list readtable:update %bol readtable:read-list) (list readtable:update %eol (readtable:error "unbalanced list")) (list readtable:update #\# (readtable:next/old-as-acc readtable:hash)) (list readtable:update #\; (readtable:jump-discard readtable:read-to-newline))))) ;;; ;;;;;;;;;;; ;;; Test reader ;;; ;;;;;;;;;;; (define %list->read (lambda (seq) (port->read (lambda () (if (null? seq) #f (let ((ch (car seq))) (set! seq (cdr seq)) ch))) "test"))) (define read-all (lambda (str) (let ((reader (%list->read (string->list str)))) (letrec ((loop (lambda () (if (not (reader 'peek)) #t (let ((value (readtable:act (readtable:top) (reader 'read) #f reader))) (display (list "return" value)) (newline) (loop)))))) (loop))))) (read-all "x yy zz ; this is a comment\nx call/cc ") (read-all "(a b c def (ghi j) k )") (read-all "( a . b )") (read-all "( a .b . c)") (read-all "#( a b y)") (read-all "(x y #| this is a block\n comment\n |# z w)") (read-all "#( a b #| this is a #| nested block |# comment|# z w)") (read-all "#(a b #(c #|close#|comment|#|#y))") (read-all "(this has a #;(call with (current continuation)) datum comment)")