;;; Copyright (C) Peter McGoron 2024 ;;; This program is free software: you can redistribute it and/or modify ;;; it under the terms of the GNU General Public License as published by ;;; the Free Software Foundation, version 3 of the License. ;;; ;;; This program is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with this program. If not, see . ;;; R7RS reader. This is the lexer-parser end, so it returns tokens and ;;; not concrete objects. ;;; ;;; Notes: ;;; ;;; Port stores datum labels. Datum labels are stored for the entirety of ;;; a READ: this is to emulate MIT Scheme, which allows for datum labels ;;; outside of the datum that the label appears in. ;;; ;;; The reader does not return Scheme data: it returns annotated data ;;; containing the source location, datum label number, resolved datum ;;; label pointer. This is for advanced syntax systems. ;;; ;;; How datum labels could work: ;;; ;;; When encountering #[number]=, allocate a datum label and assign it ;;; nothing. Then call READ after "=", and destructively update the ;;; datum label with the resulting datum. A pass over the new read ;;; structure to convert it to regular Scheme data will resolve the ;;; indirection. ;;; ;;; All tokens are procedure-encapsulated objects, since the reader should ;;; never return a literal procedure. Each procedure has a TYPE message. (load "chez-compat.scm") (load "util.scm") (load "set.scm") (load "object.scm") (load "linked-list.scm") ;;; My text editor cannot parse Scheme's character syntax. (define %bol #\() (define %eol #\)) ;;; ;;;;;;;;;;;;;;;;;;;;;;;; ;;; Port reader wrapper ;;; ;;;;;;;;;;;;;;;;;;;;;;;; (define port->read-function (lambda (port) (lambda () (read-char port)))) ;;; READ: ;;; ;;; Stream readers contain mutable state. This is the case-folding mode ;;; and the current list of datum labels. ;;; ;;; (POS): Return (LIST FILENAME LINE-NUMBER OFFSET). ;;; (READ): Read the next character in the stream. Returns #F on EOF. ;;; (PUSH CHAR): Push CHAR such that it will be the next character read ;;; when (READ) is called. ;;; (PEEK): Read character, push it back, and return it. ;;; (FOLD-CASE?): Returns a boolean if case folding is enabled. ;;; (FOLD-CASE! BOOL): Sets the case folding to BOOL. (define port->read (lambda (read-function filename) (letrec ((line-number 1) (offset 0) (pushback-buffer '()) (datum-labels '()) (fold-case? #f) (this (object/abstract 'process (lambda (ch) (this 'update-position! ch) (cond ((or (eof-object? ch) (not ch)) ch) (fold-case? (char-downcase ch)) (else ch))) 'update-position! (lambda (ch) (cond ((eqv? ch #\newline) (set! line-number (+ 1 line-number)) (set! offset 0)) ;; OFFSET is sometimes set to #F to denote an unknown ;; offset. (offset (set! offset (+ 1 offset))))) 'set-datum-label! (lambda (label value) (set! datum-labels (car (smap:insert datum-labels label value)))) 'get-datum-label (lambda (label) (smap:search datum-labels label)) 'dump-mutable (lambda () (list datum-labels fold-case?)) 'restore-mutable! (lambda (state) (set! datum-labels (car state)) (set! fold-case? (cadr state))) 'pos (lambda () (list filename line-number offset)) 'read (lambda () (this 'process (if (null? pushback-buffer) (read-function) (let ((ch (car pushback-buffer))) (set! pushback-buffer (cdr pushback-buffer)) ch)))) 'peek (lambda () (let ((ch (this 'read))) (this 'push! ch) ch)) 'push! (lambda (ch) (if (eqv? ch #\newline) (begin (set! line-number (- line-number 1)) (set! offset #f)) (set! offset (- offset 1))) (set! pushback-buffer (cons ch pushback-buffer))) 'fold-case? (lambda () fold-case?) 'fold-case! (lambda (val) (set! fold-case? val))))) this))) ;;; ;;;;;;;;;;;;;; ;;; Character maps ;;; ;;;;;;;;;;;;;; (define integer<=> (lambda (x y) (cond ((< x y) '<) ((= x y) '=) (else '>)))) ;;; Comparison on characters extended to #F, which is less than all ;;; characters. (define char*<=> (lambda (x y) (cond ((and (not x) y) '<) ((and x (not y)) '>) ((and (not x) (not y) '=)) (else (integer<=> (char->integer x) (char->integer y)))))) (define %charmap:<=> (set:<=>-to-map char*<=>)) (define %charmap:update (set:update %charmap:<=>)) (define charmap:update (map:update %charmap:update)) (define charmap:insert (map:insert %charmap:update)) (define charmap:search (map:search %charmap:<=>)) ;;; ;;;;;;;;;;;;;;;;;;;;;; ;;; Readtable constructors ;;; ;;; Readtables are composed of a CHARMAP, which is a map from characters ;;; to actions, and a DEFAULT-ACTION, which is taken when there is no ;;; match in CHARMAP. ;;; ;;; An "action" is a procedure that takes four arguments: ;;; ;;; TABLE: The current table. ;;; CHAR: The character that was matched against the CHARMAP in TABLE. ;;; ACC: An arbitrary "accumulator" value that is different depending ;;; on the readtable in question. ;;; PORT: A port reader object. ;;; ;;;;;;;;;;;;;;;;;;;;;; ;;; (READTABLE:NEW DEFAULT-ACTION CHARMAP) (define readtable:new cons) (define %readtable:default-action car) (define %readtable:charmap cdr) ;;; Run the action in TABLE assigned to CHAR, or the default action of ;;; TABLE if there is no entry for CHAR. (define readtable:act (lambda (table char acc port) (let ((node (charmap:search (%readtable:charmap table) char))) (let ((action (if (null? node) (%readtable:default-action table) (map:val node)))) (action table char acc port))))) ;;; Run the action in TABLE with the next character from PORT. (define readtable:next (lambda (table acc port) (readtable:act table (port 'read) acc port))) ;;; Return a new readtable where CHAR is bound to ACTION. (define readtable:update (lambda (table char action) (readtable:new (%readtable:default-action table) (car (charmap:insert (%readtable:charmap table) char action))))) ;;; Update TABLE to act on all characters in LST with ACTION. (define readtable:update-list (lambda (table lst action) (fold (lambda (char table) (readtable:update table char action)) table lst))) ;;; Construct new readtable with no characters in its map and ;;; DEFAULT-ACTION as the default action. (define readtable:empty/default (lambda (default-action) (readtable:new default-action '()))) ;;; Each value in FUNCTIONS is a list (PROCEDURE ARGS...) which is called ;;; like (PROCEDURE TABLE ARGS...) and returns a table. (define readtable:process (lambda (table . functions) (fold (lambda (function table) (apply (car function) table (cdr function))) table functions))) ;;; ;;;;;;;;;;;;;;;;;; ;;; Default actions ;;; ;;;;;;;;;;;;;;;;;; ;;; Return an error. (define readtable:error (lambda emsg (lambda (table char acc port) (error emsg char acc table port)))) ;;; Discard the current character and continue reading the readtable. (define readtable:skip (lambda (table char acc port) (readtable:act table (port 'read) acc port))) ;;; Discard char and return constant. (define readtable:return (lambda (return) (lambda (table char acc port) return))) ;;; Jump to a new readtable, discard it's return, and continue reading ;;; in the table. (define readtable:jump-discard (lambda (newtable) (lambda (oldtable char acc port) (readtable:act newtable (port 'read) '() port) (readtable:act oldtable (port 'read) acc port)))) ;;; Jump to a new readtable with the same characters. (define readtable:jump (lambda (newtable) (lambda (oldtable char acc port) (readtable:act newtable char acc port)))) ;;; Jump to a new readtable, reading the new character, with the old ;;; readtable as ACC. (define readtable:next/old-as-acc (lambda (newtable) (lambda (oldtable __ _ port) (readtable:next newtable oldtable port)))) ;;; Jump to a new readtable, reading the new character. (define readtable:jump/next (lambda (newtable) (lambda (oldtable _ acc port) (readtable:next newtable acc port)))) ;;; ;;;;;;;;;;;;;;;;; ;;; Identifier reader ;;; ;;;;;;;;;;;;;;;;; ;;; Push back CHAR and return ACC. (define readtable:return-acc-keep-char (lambda (table char acc port) (port 'push! char) acc)) ;;; Push CHAR to ACC and continue reading from TABLE. (define readtable:push-char (lambda (table char acc port) (acc 'push-tail! char) (readtable:act table (port 'read) acc port))) ;;; Define a readtable that constructs an identifier by accepting all ;;; characters that are not listed. (define readtable:exclude-from-identifiers (lambda (table excluded) (fold (lambda (char table) (readtable:update table char readtable:return-acc-keep-char)) table excluded))) ;;; ASCII whitespace. (define readtable:ASCII-whitespace (list #\newline #\space (integer->char #x09) (integer->char #x0B) (integer->char #x0C) (integer->char #x0D))) ;;; Readtable for identifiers. (define readtable:identifier (readtable:process (readtable:empty/default readtable:push-char) (list readtable:exclude-from-identifiers readtable:ASCII-whitespace) (list readtable:exclude-from-identifiers (list #\| %bol %eol #\' #\; #f)))) ;;; Read an identifier starting with CHAR. (define readtable:read-ident (lambda (table char acc port) (let ((lst (linked-list:new))) (lst 'push! char) (list->string ((readtable:act readtable:identifier (port 'read) lst port) 'to-list))))) ;;; ;;;;;;;;;;;;;;;;;;;; ;;; Comments and whitespace reader ;;; ;;;;;;;;;;;;;;;;;;;; ;;; Readtable for a line comment. (define readtable:read-to-newline (readtable:process (readtable:empty/default readtable:skip) (list readtable:update #\newline (readtable:return #f)))) ;;; ;;;;;;;;;;; ;;; List reader ;;; ;;; The reader updates the previous readtable to handle ). This means ;;; that this read table does not have to handle end-of-line, whitespace, ;;; etc. ;;; ;;;;;;;;;;; ;;; Read the end of an improper list. (define readtable:read-improper-cdr (lambda (table acc port) (let ((val (readtable:act (readtable:update table %eol (readtable:error "proper list must have cdr")) (port 'read) #f port))) (acc 'set-cdr! val) (let ((table (readtable:process (readtable:empty/default (readtable:error "improper list has 1 cdr")) (list readtable:update-list readtable:ASCII-whitespace readtable:skip) (list readtable:update %eol (lambda dummy 'end-of-list))))) (readtable:act table (port 'read) acc port))))) ;;; Generic reader loop for a list. It takes as input the table that has ;;; already been updated with end of list and improper list handlers. (define readtable:read-list-loop (lambda (table port) (let ((acc (linked-list:new))) (letrec ((loop (lambda () (let ((value (readtable:act table (port 'read) acc port))) (cond ((eqv? value 'end-of-list) (acc 'to-list)) (else (acc 'push-tail! value) (loop))))))) (loop))))) ;;; Readtable for a list, generic to proper and improper list ;;; readers. (define readtable:table-for-list (lambda (oldtable on-dot) (readtable:process oldtable (list readtable:update %eol (readtable:return 'end-of-list)) (list readtable:update #\. (lambda (table char acc port) (let ((entire-identifier (readtable:read-ident table char #f port))) (if (equal? entire-identifier ".") (on-dot table acc port) entire-identifier))))))) ;;; Read a proper or improper list. (define readtable:read-list (lambda (oldtable _ __ port) (readtable:read-list-loop (readtable:table-for-list oldtable readtable:read-improper-cdr) port))) (define readtable:read-proper-list (lambda (table port) (readtable:read-list-loop (readtable:table-for-list table (readtable:error "expected proper list")) port))) ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Reader for stuff that start with "#" ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define readtable:digits (list #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)) (define readtable:vector (lambda (_ __ toplevel port) (list 'vector (readtable:read-proper-list toplevel port)))) ;;; Block comment reader. ;;; ;;; The outermost block comment reader is passed the toplevel reader as ;;; ACC. When the outermost block is finished, it will tail-call ACC. ;;; (It is basically the continuation of the reader.) ;;; ;;; When a nested block comment is found, it is passed #F as ACC, which ;;; it will not call. It will return an unspecified value. ;;; ;;; Since the read tables are not procedures, references to other tables ;;; in the same LETREC declaration must be protected with explicit LAMBDAs. ;;; Macros could make this much easier to read. (define readtable:block-comment (letrec ((potential-end (readtable:process (readtable:empty/default (lambda (this char acc port) (readtable:act loop char acc port))) (list readtable:update #\# (lambda (this char acc port) (if acc (readtable:next acc #f port)))))) (potential-start (readtable:process (readtable:empty/default (lambda (this char acc port) (readtable:act loop char acc port))) (list readtable:update #\| (lambda (this char acc port) (readtable:next loop #f port) (readtable:next loop acc port))))) (loop (readtable:process (readtable:empty/default readtable:skip) (list readtable:update #\# (lambda (this char acc port) (readtable:next potential-start acc port))) (list readtable:update #\| (lambda (this char acc port) (readtable:next potential-end acc port)))))) loop)) ;;; Encapsulate LINKED-LIST object with an additional value for the ;;; toplevel table. (define linked-list/toplevel:new (lambda (toplevel) (let ((ll (linked-list:new))) (lambda (op . args) (cond ((eq? op 'toplevel) toplevel) (else (apply ll op args))))))) ;;; Readtable for the number part of a datum label / reference. A label ;;; looks like "#[NUMBER]=" and a reference looks like "#[NUMBER]#". (define readtable:datum-label-next (readtable:process (readtable:empty/default (readtable:error "invalid datum label/ref")) (list readtable:update-list readtable:digits readtable:push-char) (list readtable:update #\= (lambda (_ __ acc port) ;; All datum labels are saved as strings. (let ((label (list->string (acc 'to-list)))) ;; Datum labels are pairs, one with the label and the other ;; with the data that is to be used. The label is there to ;; detect when a datum label refers to iself: i.e. ;; #0=#0# . ;; ;; The CAR is the label and the CDR is the thing it refers ;; to. (let ((datum-label (cons label '()))) (port 'set-datum-label! label datum-label) (let ((next-value (readtable:next (acc 'toplevel) #f port))) ;; After reading the part after the datum label, check ;; that the labeled datum is not self a reference to ;; the datum label. (if (and (pair? next-value) (equal? (car next-value) label)) (error "datum label cannot be itself") (set-cdr! datum-label next-value)) next-value))))) (list readtable:update #\# (lambda (_ __ acc port) (let ((label (list->string (acc 'to-list)))) (let ((datum-label (port 'get-datum-label label))) (if (null? datum-label) (error "unknown reference to datum label" label) (map:val datum-label)))))))) ;;; Reads the next toplevel datum, discards it, and then continues at the ;;; toplevel. ;;; ;;; TODO: The R7RS reader can cause side-effects due to #!FOLD-CASE. This ;;; must be supressed in datum comments. A method could be added to PORT ;;; that saves and restores mutable state (besides stream position). (define readtable:datum-comment (lambda (_ __ toplevel port) (readtable:next toplevel #f port) (readtable:next toplevel #f port))) (define readtable:hash (readtable:process (readtable:empty/default (readtable:error "unimplemented")) (list readtable:update #\| (readtable:jump/next readtable:block-comment)) (list readtable:update #\; readtable:datum-comment) (list readtable:update-list readtable:digits ; Datum labels (lambda (_ char toplevel port) (readtable:act readtable:datum-label-next char (linked-list/toplevel:new toplevel) port))) (list readtable:update %bol readtable:vector))) ;;; ;;;;;;;;;;;;;;;; ;;; Toplevel reader. ;;; ;;;;;;;;;;;;;;;; ;;; This is defined as a function so that it dynamically loads each ;;; sub-readtable. (define readtable:top (lambda () (readtable:process (readtable:empty/default readtable:read-ident) (list readtable:update-list readtable:ASCII-whitespace readtable:skip) (list readtable:update #f (readtable:return 'eof)) (list readtable:update %bol readtable:read-list) (list readtable:update %eol (readtable:error "unbalanced list")) (list readtable:update #\# (readtable:next/old-as-acc readtable:hash)) (list readtable:update #\; (readtable:jump-discard readtable:read-to-newline))))) (define intset:insert (set:insert (set:update integer<=>))) (define intset:in (set:in integer<=>)) (define uncycle (lambda (value) (let ((cntr 0) (used-counters '()) (pointers '())) (letrec ((uncycle (lambda (value) (cond ((pair? value) (let ((pair (assq value pointers))) (if (pair? pair) (begin (set! used-counters (car (intset:insert used-counters (cdr pair)))) (list 'ref (cdr pair))) (begin (set! pointers (cons (cons value cntr) pointers)) (let ((cur-cntr cntr)) (set! cntr (+ 1 cntr)) (let ((returned (cons (uncycle (car value)) (uncycle (cdr value))))) (if (not (null? (intset:in used-counters cur-cntr))) (list 'def cur-cntr '= returned) returned))))))) (else value))))) (uncycle value))))) ;;; ;;;;;;;;;;; ;;; Test reader ;;; ;;;;;;;;;;; (define %list->read (lambda (seq) (port->read (lambda () (if (null? seq) #f (let ((ch (car seq))) (set! seq (cdr seq)) ch))) "test"))) (define read-all (lambda (str) (let ((reader (%list->read (string->list str)))) (letrec ((loop (lambda () (if (not (reader 'peek)) #t (let ((value (readtable:act (readtable:top) (reader 'read) #f reader))) (display (list "return" (uncycle value))) (newline) (loop)))))) (loop))))) (read-all "x yy zz ; this is a comment\nx call/cc ") (read-all "(a b c def (ghi j) k )") (read-all "( a . b )") (read-all "( a .b . c)") (read-all "#( a b y)") (read-all "(x y #| this is a block\n comment\n |# z w)") (read-all "#( a b #| this is a #| nested block |# comment|# z w)") (read-all "#(a b #(c #|close#|comment|#|#y))") (read-all "(this has a #;(call with (current continuation)) datum comment)") (read-all "#0=(#0# not unix)")