;;; Copyright (C) Peter McGoron 2024 ;;; This program is free software: you can redistribute it and/or modify ;;; it under the terms of the GNU General Public License as published by ;;; the Free Software Foundation, version 3 of the License. ;;; ;;; This program is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with this program. If not, see . ;;; R7RS reader. This is the lexer-parser end, so it returns tokens and ;;; not concrete objects. ;;; ;;; Notes: ;;; ;;; Port stores datum labels. Datum labels are stored for the entirety of ;;; a READ: this is to emulate MIT Scheme, which allows for datum labels ;;; outside of the datum that the label appears in. ;;; ;;; The reader does not return Scheme data: it returns annotated data ;;; containing the source location, datum label number, resolved datum ;;; label pointer. This is for advanced syntax systems. ;;; ;;; How datum labels could work: ;;; ;;; When encountering #[number]=, allocate a datum label and assign it ;;; nothing. Then call READ after "=", and destructively update the ;;; datum label with the resulting datum. A pass over the new read ;;; structure to convert it to regular Scheme data will resolve the ;;; indirection. ;;; ;;; All tokens are procedure-encapsulated objects, since the reader should ;;; never return a literal procedure. Each procedure has a TYPE message. (load "chez-compat.scm") (load "util.scm") (load "set.scm") (load "linked-list.scm") ;;; My text editor cannot parse Scheme's character syntax. (define %bol #\() (define %eol #\)) ;;; ;;;;;;;;;;;;;;;;;;;;;;;; ;;; Port reader wrapper ;;; ;;;;;;;;;;;;;;;;;;;;;;;; (define port->read-function (lambda (port) (lambda () (read-char port)))) (define string->read-function (lambda (str) (let ((i -1) (length (string-length str))) (lambda () (set! i (+ i 1)) (if (>= i length) #f (string-ref str i)))))) ;;; READ: ;;; ;;; Stream readers contain mutable state. This is the case-folding mode ;;; and the current list of datum labels. ;;; ;;; (POS): Return (LIST FILENAME LINE-NUMBER OFFSET). ;;; (READ): Read the next character in the stream. Returns #F on EOF. ;;; (PUSH CHAR): Push CHAR such that it will be the next character read ;;; when (READ) is called. ;;; (PEEK): Read character, push it back, and return it. ;;; (FOLD-CASE?): Returns a boolean if case folding is enabled. ;;; (FOLD-CASE! BOOL): Sets the case folding to BOOL. (define port->read (lambda (read-function filename . location) (let ((line-number (if (null? location) 1 (car location))) (offset (if (null? location) 0 (cadr location))) (pushback-buffer '()) (datum-labels '()) (fold-case? #f)) (letrec ((update-position! (lambda (ch) (cond ((eqv? ch #\newline) (set! line-number (+ 1 line-number)) (set! offset 0)) ;; OFFSET is sometimes set to #F to denote an unknown ;; offset. (offset (set! offset (+ 1 offset)))))) (location (lambda () (list filename line-number offset))) (set-datum-label! (lambda (label value) (set! datum-labels (car (smap:insert datum-labels label value))))) (get-datum-label (lambda (label) (smap:search datum-labels label))) (clear-datum-labels! (lambda () (set! datum-labels '()))) (dump-mutable (lambda () (list datum-labels fold-case?))) (restore-mutable! (lambda (state) (set! datum-labels (car state)) (set! fold-case? (cadr state)))) (process (lambda (ch) (update-position! ch) (cond ((or (eof-object? ch) (not ch)) ch) (fold-case? (char-downcase ch)) (else ch)))) (port (lambda (op . args) ;; TODO: turn into string map? (cond ((eq? op 'location) (location)) ((eq? op 'read) (process (if (null? pushback-buffer) (read-function) (let ((ch (car pushback-buffer))) (set! pushback-buffer (cdr pushback-buffer)) ch)))) ((eq? op 'peek) (let ((ch (port 'read))) (port 'push ch) ch)) ((eq? op 'push) (let ((ch (car args))) (if (eqv? ch #\newline) (begin (set! line-number (- line-number 1)) (set! offset #f)) (set! offset (- offset 1))) (set! pushback-buffer (cons ch pushback-buffer)))) ((eq? op 'fold-case?) fold-case?) ((eq? op 'fold-case!) (set! fold-case? (car args))) ((eq? op 'set-datum-label!) (apply set-datum-label! args)) ((eq? op 'get-datum-label) (apply get-datum-label args)) ((eq? op 'clear-datum-labels!) (apply clear-datum-labels! args)) ((eq? op 'dump-mutable) (apply dump-mutable args)) ((eq? op 'restore-mutable!) (apply restore-mutable! args)) (else (error 'read->port 'invalid (cons op args))))))) port)))) (define ident->read (lambda (location ident) (apply port->read (string->read-function (ident 'value)) location))) ;;; ;;;;;;;;;;;;;; ;;; Character maps ;;; ;;;;;;;;;;;;;; (define integer<=> (lambda (x y) (cond ((< x y) '<) ((= x y) '=) (else '>)))) ;;; Comparison on characters extended to #F, which is less than all ;;; characters. (define char*<=> (lambda (x y) (cond ((and (not x) y) '<) ((and x (not y)) '>) ((and (not x) (not y) '=)) (else (integer<=> (char->integer x) (char->integer y)))))) (define %charmap:<=> (set:<=>-to-map char*<=>)) (define %charmap:update (set:update %charmap:<=>)) (define charmap:update (map:update %charmap:update)) (define charmap:insert (map:insert %charmap:update)) (define charmap:search (map:search %charmap:<=>)) ;;; ;;;;;;;;;;;;;;;;;;;;;; ;;; Readtable constructors ;;; ;;; Readtables are composed of a CHARMAP, which is a map from characters ;;; to actions, and a DEFAULT-ACTION, which is taken when there is no ;;; match in CHARMAP. ;;; ;;; An "action" is a procedure that takes four arguments: ;;; ;;; TABLE: The current table. ;;; CHAR: The character that was matched against the CHARMAP in TABLE. ;;; ACC: An arbitrary "accumulator" value that is different depending ;;; on the readtable in question. ;;; PORT: A port reader object. ;;; ;;;;;;;;;;;;;;;;;;;;;; ;;; (READTABLE:NEW DEFAULT-ACTION CHARMAP) (define readtable:new cons) (define %readtable:default-action car) (define %readtable:charmap cdr) ;;; Run the action in TABLE assigned to CHAR, or the default action of ;;; TABLE if there is no entry for CHAR. (define readtable:act (lambda (table char acc port) (let ((node (charmap:search (%readtable:charmap table) char))) (let ((action (if (null? node) (%readtable:default-action table) (map:val node)))) (action table char acc port))))) ;;; Run the action in TABLE with the next character from PORT. (define readtable:next (lambda (table acc port) (readtable:act table (port 'read) acc port))) ;;; Return a new readtable where CHAR is bound to ACTION. (define readtable:update (lambda (table chars action) (let ((chars (if (pair? chars) chars (list chars)))) (fold (lambda (char table) (readtable:new (%readtable:default-action table) (car (charmap:insert (%readtable:charmap table) char action)))) table chars)))) ;;; Construct new readtable with no characters in its map and ;;; DEFAULT-ACTION as the default action. (define readtable:empty/default (lambda (default-action) (readtable:new default-action '()))) ;;; Each value in FUNCTIONS is a list (PROCEDURE ARGS...) which is called ;;; like (PROCEDURE TABLE ARGS...) and returns a table. (define readtable:process (lambda (table . functions) (fold (lambda (function table) (apply (car function) table (cdr function))) table functions))) ;;; ;;;;;;;;;;;;;;;;;; ;;; Default actions ;;; ;;;;;;;;;;;;;;;;;; ;;; Return an error. (define readtable:error (lambda emsg (lambda (table char acc port) (error emsg char acc table port)))) ;;; Discard the current character and continue reading the readtable. (define readtable:skip (lambda (table char acc port) (readtable:act table (port 'read) acc port))) ;;; Discard char and return constant. (define readtable:return (lambda (return) (lambda (table char acc port) return))) ;;; Jump to a new readtable, discard it's return, and continue reading ;;; in the table. (define readtable:jump-discard (lambda (newtable) (lambda (oldtable char acc port) (readtable:act newtable (port 'read) '() port) (readtable:act oldtable (port 'read) acc port)))) ;;; Jump to a new readtable with the same characters. (define readtable:jump (lambda (newtable) (lambda (oldtable char acc port) (readtable:act newtable char acc port)))) ;;; Jump to a new readtable, reading the new character, with the old ;;; readtable as ACC. (define readtable:next/old-as-acc (lambda (newtable) (lambda (oldtable __ _ port) (readtable:next newtable oldtable port)))) ;;; Jump to a new readtable, reading the new character. (define readtable:jump/next (lambda (newtable) (lambda (oldtable _ acc port) (readtable:next newtable acc port)))) ;;; ;;;;;;;;;;;;;;;;; ;;; Identifier reader ;;; ;;;;;;;;;;;;;;;;; (define read:ident (lambda (name location) (lambda (op . args) (cond ((eq? op 'type) 'ident) ((eq? op 'value) name) (else (error 'read:ident "invalid operation" op args)))))) (define read:ident-builder (lambda (location . start-char) (let ((char-list (linked-list:new))) (if (not (null? start-char)) (char-list 'push (car start-char))) (lambda (op . args) (cond ((eq? op 'finalize->ident) (read:ident (list->string (char-list 'to-list)) location)) (else (apply char-list op args))))))) ;;; Push back CHAR and return ACC. (define readtable:return-acc-as-ident (lambda (table char acc port) (port 'push char) (acc 'finalize->ident))) ;;; Push CHAR to ACC and continue reading from TABLE. (define readtable:push-char (lambda (table char acc port) (acc 'push-tail char) (readtable:act table (port 'read) acc port))) ;;; Define a readtable that constructs an identifier by accepting all ;;; characters that are not listed. (define readtable:exclude-from-identifiers (lambda (table excluded) (fold (lambda (char table) (readtable:update table char readtable:return-acc-as-ident)) table excluded))) ;;; ASCII whitespace. (define readtable:ASCII-whitespace (list #\newline #\space (integer->char #x09) (integer->char #x0B) (integer->char #x0C) (integer->char #x0D))) ;;; Readtable for identifiers. (define readtable:identifier (readtable:process (readtable:empty/default readtable:push-char) (list readtable:exclude-from-identifiers readtable:ASCII-whitespace) (list readtable:exclude-from-identifiers (list #\| %bol %eol #\' #\; #f)))) ;;; Read an identifier starting with CHAR. (define readtable:read-ident (lambda (table char acc port) (readtable:act readtable:identifier (port 'read) (read:ident-builder (port 'location) char) port))) ;;; ;;;;;;;;;;;;;;;;;;;; ;;; Comments and whitespace reader ;;; ;;;;;;;;;;;;;;;;;;;; ;;; Readtable for a line comment. (define readtable:read-to-newline (readtable:process (readtable:empty/default readtable:skip) (list readtable:update #\newline (readtable:return #f)))) ;;; ;;;;;;;;;;; ;;; List reader ;;; ;;; The reader updates the previous readtable to handle ). This means ;;; that this read table does not have to handle end-of-line, whitespace, ;;; etc. ;;; ;;;;;;;;;;; ;;; Read the end of an improper list. (define readtable:read-improper-cdr (lambda (table acc port) (let ((val (readtable:act (readtable:update table %eol (readtable:error 'read-improper-cdr "proper list must have cdr")) (port 'read) #f port))) (acc 'set-cdr! val) (let ((table (readtable:process (readtable:empty/default (readtable:error 'read-improper-cdr "improper list has 1 cdr")) (list readtable:update readtable:ASCII-whitespace readtable:skip) (list readtable:update %eol (lambda dummy 'end-of-list))))) (readtable:act table (port 'read) acc port))))) ;;; Generic reader loop for a list. It takes as input the table that has ;;; already been updated with end of list and improper list handlers. (define readtable:read-list-loop (lambda (table port) (let ((acc (linked-list:new))) (letrec ((loop (lambda () (let ((value (readtable:act table (port 'read) acc port))) (cond ((eqv? value 'end-of-list) (acc 'to-list)) (else (acc 'push-tail value) (loop))))))) (loop))))) ;;; Readtable for a list, generic to proper and improper list ;;; readers. (define readtable:table-for-list (lambda (oldtable on-dot) (readtable:process oldtable (list readtable:update %eol (readtable:return 'end-of-list)) (list readtable:update #\. (lambda (table char acc port) (let ((entire-identifier (readtable:read-ident table char #f port))) (if (equal? entire-identifier ".") (on-dot table acc port) entire-identifier))))))) ;;; Read a proper or improper list. (define readtable:read-list (lambda (oldtable _ __ port) (readtable:read-list-loop (readtable:table-for-list oldtable readtable:read-improper-cdr) port))) ;;; Read strictly a proper list. This assumes that BOL has been read. (define readtable:read-proper-list (lambda (table port) (readtable:read-list-loop (readtable:table-for-list table (readtable:error 'read-proper-list "expected proper list")) port))) (define readtable:expect-proper-list (lambda (table port) (if (not (eqv? (port 'read) %bol)) (error 'expect-port-list 'no-list-found) (readtable:read-proper-list table port)))) ;;; ;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Implementation of a Pratt parser. In a Pratt parser, all tokens are ;;; "operators" with "precedence". ;;; ;;; Since Pratt parsers are implemented in terms of procedures, they can ;;; parse any grammar. ;;; ;;; This file implements the algorithm found in: ;;; ;;; van de Vanter, Michael L. "A Formalization and Correctness Proof of ;;; the CGOL Language System." (Master's Thesis). MIT Laboratory for ;;; Computer Science Technical Report MIT-LCS-TR-147 (Cambridge, ;;; Massachusetts). 1975. https://hdl.handle.net/1721.1/149442. ;;; (PRATT:OPERATOR PRECEDENCE PROCEDURE) ;;; constructs a description of an operator. ;;; ;;; PRECEDENCE is an exact number describing the precedence of the ;;; operator, and for portability should only be an integer. ;;; ;;; (The special EOF operator, which always exists, has negative ;;; infinity precedence.) ;;; ;;; PROCEDURE is of the form (PROCEDURE OPERATORS LEFT TOKEN READER). ;;; OPERATORS is the operators map. ;;; LEFT is a PRATT:TOKEN, which was to the left of the operator. ;;; TOKEN is the token read from the reader wit the same type. ;;; READER is a reader object. ;;; ;;; PROCEDURE consumes tokens to obtain the ;;; right hand side of its expression. PROCEDURE produces a PRATT:TOKEN. (define pratt:operator cons) (define pratt:precedence car) (define pratt:procedure cdr) (define pratt:search-token (lambda (operators token) (smap:search operators (symbol->string (token 'type))))) ;;; Parse a stream from READER, where the precedence limit is specified as ;;; BINDING-POWER. If BINDING-POWER is #F, then there is no precedence ;;; limit. Operators are read from OPERATORS, which is a SMAP. (define pratt (lambda (operators binding-power reader) (letrec ((loop (lambda (left) (let ((token (reader 'peek))) (let ((operator (pratt:search-token operators token))) (if (null? next-operator) (error 'pratt 'unknown-operator token operators binding-power reader)) (if (and binding-power (>= (pratt:precedence operator) binding-power)) left (parse operator left (reader 'read) reader)))))) (parse (lambda (operator left token reader) (loop ((pratt:procedure operator) operators left token reader))))) ;; Read the first token. (let ((token (reader 'read))) (let ((operator (pratt:search-token operators token))) (if (null? operator) (error 'pratt 'unknown-operator token operators binding-power reader)) (parse operator #f token reader)))))) ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Math reader. ;;; ;;; Scheme numbers are a kludge. A proper LISP would either represent ;;; numbers as ;;; ;;; (+ 4 (*i 6)) or (@ (. 12 34) 2) or (e 12 3) etc. ;;; ;;; or just bite the bullet and include a full infix expression parser. ;;; ;;; This is a full infix expression parser. It will parse expressions ;;; such as SIN(X) or X * 5I/6. Identifiers and functions are any Scheme ;;; object. ;;; ;;; The math reader can be invoked using #MATH(expr). EXPR is an infix ;;; mathematical expression. #PREFIX(expr) reads EXPR in the normal ;;; reader. All other hash identifiers are unsupported in #MATH. ;;; The basic number reader. The basic syntax is ;;; ;;; PREFIXES* SIGN? (RADIX+ ("." RADIX+)? | "." RADIX+) ("e" RADIX+)? "i"? ;;; Prefixes are handled by the "#" reader. ;;; Incremental builder for a number, driven by the lexer. ;;; TODO: how will this be turned into a number, in the end? (define read:number-builder (lambda (radix-table) (let ((main '()) (decimal '()) (exp-char #f) (exp-digits '()) (imaginary? #f) (current-part 'main) (exactness #f)) (let ((append-part (lambda (char) (cond ((eq? current-part 'main) (set! main (cons char main))) ((eq? current-part 'decimal) (set! decimal (cons char decimal))) ((eq? current-part 'exp) (set! exp-digits (cons char exp-digits))))))) (lambda (op . args) (cond ((eq? op 'finalize) (if (not exactness) (set! exactness 'exact)) (list 'number-builder main exactness decimal exp-char exp-digits imaginary?)) ((eq? op 'exactness!) (set! exactness (car args))) ((eq? op 'radix-table!) (set! radix-table (car args))) ((eq? op 'set-imaginary!) (set! imaginary? #t)) ((eq? op 'set-decimal!) (if (not exactness) (set! exactness 'inexact)) ;; (if (not (eq? current-part 'main)) (error 'number-builder "cannot set to decimal" current-part) (set! current-part 'decimal))) ((eq? op 'set-exponential!) (if (not exactness) (set! exactness 'inexact)) ;; (if (eq? current-part 'exp) (error 'number-builder "cannot set to exponential")) (set! current-part 'exp) (set! exp-char (car args))) ;; ((eq? op 'radix-table) radix-table) (else (append-part op)))))))) (define readtable:return-number (lambda (_ char acc port) (port 'push char) (acc 'finalize))) ;;; Main lexer for number table. (define readtable:number-main-table (readtable:process (readtable:empty/default readtable:return-number) (list readtable:update #\. (lambda (table char number-builder port) (number-builder 'set-decimal!) (readtable:next table number-builder port))) (list readtable:update (list #\e #\s #\f #\d #\l) (lambda (table char number-builder port) (number-builder 'set-exponential! char) (readtable:next table number-builder port))) (list readtable:update #\_ readtable:skip) (list readtable:update #\i (lambda (table char number-builder port) (number-builder 'set-imaginary!) (number-builder 'finalize))))) ;;; Generate a lexer given a list of numbers in the radix. (define readtable:for-radix (lambda (radix-list) (readtable:process readtable:number-main-table (list readtable:update radix-list (lambda (table char number-builder port) (number-builder char) (readtable:next table number-builder port)))))) (define readtable:for-bin (readtable:for-radix '(#\0 #\1))) (define readtable:for-oct (readtable:for-radix '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7))) (define readtable:for-dec (readtable:for-radix '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9))) (define readtable:for-hex (readtable:for-radix '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\a #\b #\c #\d #\e #\f))) ;;; Read the sign at the start of a number. (define readtable:read-sign (readtable:process (readtable:empty/default (lambda (_ char number-builder port) (readtable:act (number-builder 'radix-table) char number-builder port))) (list readtable:update (list #\+ #\-) (lambda (_ char number-builder port) (number-builder 'sign! char) (readtable:next (number-builder 'radix-table) number-builder port))) (list readtable:update #\_ readtable:skip))) (define readtable:read-prefix-char (let ((act (lambda args (lambda (_ char number-builder port) (apply number-builder args) (readtable:next readtable:read-number-prefix number-builder port))))) (readtable:process (readtable:empty/default (readtable:error "invalid prefix")) (list readtable:update #\i (act 'exactness! 'inexact)) (list readtable:update #\e (act 'exactness! 'exact)) (list readtable:update #\b (act 'radix-table! readtable:for-bin)) (list readtable:update #\o (act 'radix-table! readtable:for-oct)) (list readtable:update #\d (act 'radix-table! readtable:for-dec)) (list readtable:update #\x (act 'radix-table! readtable:for-hex))))) (define readtable:read-number-prefix (readtable:process (readtable:empty/default (readtable:jump readtable:read-sign)) (list readtable:update #\# (readtable:jump/next readtable:read-prefix-char)))) ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Reader for stuff that start with "#" ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define readtable:digits (list #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)) (define readtable:vector (lambda (_ __ toplevel port) (list 'vector (readtable:read-proper-list toplevel port)))) ;;; Block comment reader. ;;; ;;; The outermost block comment reader is passed the toplevel reader as ;;; ACC. When the outermost block is finished, it will tail-call ACC. ;;; (It is basically the continuation of the reader.) ;;; ;;; When a nested block comment is found, it is passed #F as ACC, which ;;; it will not call. It will return an unspecified value. ;;; ;;; Since the read tables are not procedures, references to other tables ;;; in the same LETREC declaration must be protected with explicit LAMBDAs. ;;; Macros could make this much easier to read. (define readtable:block-comment (letrec ((potential-end (readtable:process (readtable:empty/default (lambda (this char acc port) (readtable:act loop char acc port))) (list readtable:update #\# (lambda (this char acc port) (if acc (readtable:next acc #f port)))))) (potential-start (readtable:process (readtable:empty/default (lambda (this char acc port) (readtable:act loop char acc port))) (list readtable:update #\| (lambda (this char acc port) (readtable:next loop #f port) (readtable:next loop acc port))))) (loop (readtable:process (readtable:empty/default readtable:skip) (list readtable:update #\# (lambda (this char acc port) (readtable:next potential-start acc port))) (list readtable:update #\| (lambda (this char acc port) (readtable:next potential-end acc port)))))) loop)) (define read:datum-label (lambda (location toplevel) (let ((finalized? #f) (value '()) (name (read:ident-builder location))) (lambda (op . args) (cond ((eq? op 'finalize-value) (if finalized? (error 'datum-label "cannot finalize twice")) (set! finalized? #t) (set! value (car args)) (set! toplevel #f)) ((eq? op 'finalize->ident) (set! name (name 'finalize->ident))) ((eq? op 'value) (if (not finalized?) (error 'datum-label "value called before finalize")) value) ((eq? op 'toplevel) toplevel) ((eq? op 'type) 'datum-label) ((eq? op 'as-string) (name 'value)) (else (apply name op args))))))) ;;; Readtable for the number part of a datum label / reference. A label ;;; looks like "#[NUMBER]=" and a reference looks like "#[NUMBER]#". ;;; ;;; Datum label assignments #XXX=[DATUM] return DATUM. ;;; Datum label references #XXX# return a DATUM-LABEL object, because ;;; circular references must be resolved later. (With some trickery it ;;; cold be resolved as soon as possible, but that requires knowledge of ;;; what is complex data and what is an atom. This requires knowing all ;;; possible values that the toplevel could read, which is not possible ;;; because new items can be added to the toplevel.) (define readtable:datum-label-next (readtable:process (readtable:empty/default (readtable:error 'datum-label-next "invalid datum label/ref")) (list readtable:update readtable:digits readtable:push-char) (list readtable:update #\= (lambda (_ __ acc port) (acc 'finalize->ident) (port 'set-datum-label! (acc 'as-string) acc) (let ((next-value (readtable:next (acc 'toplevel) #f port))) (if (eqv? acc next-value) (error 'datum-label-next "datum label cannot be itself")) (acc 'finalize-value next-value) (acc 'value)))) (list readtable:update #\# (lambda (_ __ acc port) (acc 'finalize->ident) (let ((datum-label-container (port 'get-datum-label (acc 'as-string)))) (if (null? datum-label-container) (error 'datum-label-next "unknown reference to datum label" label) (map:val datum-label-container))))))) ;;; Reads the next toplevel datum, discards it, and then continues at the ;;; toplevel. ;;; ;;; The R7RS reader can cause side-effects due to #!FOLD-CASE and datum ;;; labels. This must be supressed in datum comments. (define readtable:datum-comment (lambda (_ __ toplevel port) (let ((mutable (port 'dump-mutable))) (readtable:next toplevel #f port) (port 'restore-mutable! mutable)) (readtable:next toplevel #f port))) ;;; ;;;;;;;;;;;;;;;;;;;;;;;; ;;; Handling hash messages ;;; ;;; A "hash message" is any identifier that immediately follows a "#". ;;; This can be a constant, a part of a constant (like for numbers), or ;;; a directive (like #!FOLD-CASE). ;;; String map from constants to procedures with formal arguments ;;; (TOPLEVEL PORT) ;;; with TOPLEVEL being the current toplevel table and PORT being the ;;; current port being read from. ;;; ;;; Each one must return something. Directives that return nothing must ;;; call the toplevel again. (define read:hash-messages (smap:insert-many '() (cons "true" (lambda unused #t)) (cons "false" (lambda unused #f)) (cons "t" (lambda unused #t)) (cons "f" (lambda unused #f)) (cons "u8" (lambda (toplevel port) (list 'bytevector (readtable:expect-proper-list toplevel port)))) (cons "!fold-case" (lambda (toplevel port) (port 'fold-case! #t) (readtable:act toplevel (port 'read) #f port))) (cons "!no-fold-case" (lambda (toplevel port) (port 'fold-case! #f) (readtable:act toplevel (port 'read) #f port))))) (define read:read-hash-messages (lambda (_ char acc port) (let ((ident (readtable:read-ident #f char #f port))) (let ((container (smap:search read:hash-messages (ident 'value)))) (if (null? container) (let ((number-port (ident->read (port 'location) ident))) (number-port 'fold-case! #t) (readtable:next readtable:read-prefix-char (read:number-builder readtable:for-dec) number-port)) ((map:val container) acc port)))))) ;;; ;;;;;;;;;;;;;;;;;;;;;; ;;; Characters ;;; ;;; For completeness, this adds a few more newline characters that are C ;;; escape sequences but are not standard Scheme. (define read:named-characters (smap:insert-many '() (cons "null" (integer->char 0)) (cons "alarm" (integer->char #x7)) (cons "backspace" (integer->char #x8)) (cons "tab" (integer->char #x9)) (cons "newline" (integer->char #xA)) (cons "vertical-tab" (integer->char #xB)) (cons "form-feed" (integer->char #xC)) (cons "return" (integer->char #xD)) (cons "escape" (integer->char #x1B)) (cons "space" (integer->char #x20)) (cons "delete" (integer->char #x7F)))) (define readtable:character (lambda (_ char __ port) (let ((char (port 'read))) (let ((ident (readtable:read-ident #f char #f port))) (let ((container (smap:search read:named-characters (ident 'value)))) (display (ident 'value)) (newline) (cond ((and (null? container) (= (string-length (ident 'value)) 1)) (string-ref (ident 'value) 0)) ((null? container) (error 'character 'unknown-character ident)) (else (map:val container)))))))) (define readtable:hash (readtable:process (readtable:empty/default read:read-hash-messages) (list readtable:update readtable:ASCII-whitespace (readtable:error 'hash 'whitespace-after-hash)) (list readtable:update #\| (readtable:jump/next readtable:block-comment)) (list readtable:update #\; readtable:datum-comment) (list readtable:update #\\ readtable:character) (list readtable:update readtable:digits ; Datum labels (lambda (_ char toplevel port) (readtable:act readtable:datum-label-next char (read:datum-label (port 'location) toplevel) port))) (list readtable:update %bol readtable:vector))) ;;; ;;;;;;;;;;;;;;;; ;;; Toplevel reader. ;;; ;;;;;;;;;;;;;;;; ;;; This is defined as a function so that it dynamically loads each ;;; sub-readtable. (define readtable:top (lambda () (readtable:process (readtable:empty/default readtable:read-ident) (list readtable:update readtable:ASCII-whitespace readtable:skip) (list readtable:update #f (readtable:return 'eof)) (list readtable:update %bol readtable:read-list) (list readtable:update %eol (readtable:error 'top "unbalanced list")) (list readtable:update #\# (readtable:next/old-as-acc readtable:hash)) (list readtable:update #\; (readtable:jump-discard readtable:read-to-newline))))) (define read/toplevel (lambda (port) ;; Does each invocation of READ keep datum labels from previous reads? ;; (port 'clear-datum-labels!) (readtable:next (readtable:top) #f port))) (define intset:insert (set:insert (set:update integer<=>))) (define intset:in (set:in integer<=>)) (define uncycle (lambda (value) (let ((cntr 0) (used-counters '()) (pointers '())) (letrec ((uncycle (lambda (value) (cond ((pair? value) (let ((pair (assq value pointers))) (if (pair? pair) (begin (set! used-counters (car (intset:insert used-counters (cdr pair)))) (list 'ref (cdr pair))) (begin (set! pointers (cons (cons value cntr) pointers)) (let ((cur-cntr cntr)) (set! cntr (+ 1 cntr)) (let ((returned (cons (uncycle (car value)) (uncycle (cdr value))))) (if (not (null? (intset:in used-counters cur-cntr))) (list 'def cur-cntr '= returned) returned))))))) ((procedure? value) (let ((type (value 'type))) (cond ((eq? type 'ident) (value 'value)) ((eq? type 'datum-label) (uncycle (value 'value))) (else (vector 'unrepresentable type))))) (else value))))) (uncycle value))))) ;;; ;;;;;;;;;;; ;;; Test reader ;;; ;;;;;;;;;;; (define %list->read (lambda (seq) (port->read (lambda () (if (null? seq) #f (let ((ch (car seq))) (set! seq (cdr seq)) ch))) "test"))) (define read-all (lambda (str) (let ((reader (%list->read (string->list str)))) (letrec ((loop (lambda () (if (not (reader 'peek)) #t (let ((value (read/toplevel reader))) (display (list "return" (uncycle value))) (newline) (loop)))))) (loop))))) (read-all "x yy zz ; this is a comment\nx call/cc ") (read-all "(a b c def (ghi j) k )") (read-all "( a . b )") (read-all "( a .b . c)") (read-all "#( a b y)") (read-all "(x y #| this is a block\n comment\n |# z w)") (read-all "#( a b #| this is a #| nested block |# comment|# z w)") (read-all "#(a b #(c #|close#|comment|#|#y))") (read-all "(this has a #;(call with (current continuation)) datum comment)") (read-all "#0=(#0# not unix)") (read-all "#!no-fold-case #!fold-case #!NO-FOLD-CASE #false") (read-all "#!fold-case #TRUE") (read-all "#u8(x y z w)") (read-all "#\\newline") (read-all "#\\a") (read-all "#b11") (read-all "#XFF") (read-all "#i#x10") (read-all "#d1234.5678") (read-all "#e10.5") (read-all "#d10.24f12")