UNSLISP/read.scm

1136 lines
41 KiB
Scheme

;;; Copyright (C) Peter McGoron 2024
;;; This program is free software: you can redistribute it and/or modify
;;; it under the terms of the GNU General Public License as published by
;;; the Free Software Foundation, version 3 of the License.
;;;
;;; This program is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with this program. If not, see <https://www.gnu.org/licenses/>.
;;; R7RS reader. This is the lexer-parser end, so it returns tokens and
;;; not concrete objects.
;;;
;;; Notes:
;;;
;;; Port stores datum labels. Datum labels are stored for the entirety of
;;; a READ: this is to emulate MIT Scheme, which allows for datum labels
;;; outside of the datum that the label appears in.
;;;
;;; The reader does not return Scheme data: it returns annotated data
;;; containing the source location, datum label number, resolved datum
;;; label pointer. This is for advanced syntax systems.
;;;
;;; How datum labels could work:
;;;
;;; When encountering #[number]=, allocate a datum label and assign it
;;; nothing. Then call READ after "=", and destructively update the
;;; datum label with the resulting datum. A pass over the new read
;;; structure to convert it to regular Scheme data will resolve the
;;; indirection.
;;;
;;; All tokens are procedure-encapsulated objects, since the reader should
;;; never return a literal procedure. Each procedure has a TYPE message.
;;;
;;; ;;;;;;;;;;;;;;;
;;; Possible Improvements
;;; ;;;;;;;;;;;;;;;
;;;
;;; * A coherent API.
;;; * regex-based lexer.
;;; * A way to load readtables and read functions when the function is run,
;;; and not when functions are defined, without sacrificing performance.
;;; * Better Unicode handling.
(load "chez-compat.scm")
(load "util.scm")
(load "set.scm")
(load "linked-list.scm")
;;; My text editor cannot parse Scheme's character syntax.
(define %bol #\()
(define %eol #\))
;;; ;;;;;;;;;;;;;;;;;;;;;;;;
;;; Port reader wrapper
;;; ;;;;;;;;;;;;;;;;;;;;;;;;
(define port->read-function
(lambda (port)
(lambda ()
(read-char port))))
(define string->read-function
(lambda (str)
(let ((i -1)
(length (string-length str)))
(lambda ()
(set! i (+ i 1))
(if (>= i length)
#f
(string-ref str i))))))
;;; READ:
;;;
;;; Stream readers contain mutable state. This is the case-folding mode
;;; and the current list of datum labels.
;;;
;;; (POS): Return (LIST FILENAME LINE-NUMBER OFFSET).
;;; (READ): Read the next character in the stream. Returns #F on EOF.
;;; (PUSH CHAR): Push CHAR such that it will be the next character read
;;; when (READ) is called.
;;; (PEEK): Read character, push it back, and return it.
;;; (FOLD-CASE?): Returns a boolean if case folding is enabled.
;;; (FOLD-CASE! BOOL): Sets the case folding to BOOL.
(define port->read
(lambda (read-function filename . location)
(let ((line-number (if (null? location)
1
(car location)))
(offset (if (null? location)
0
(cadr location)))
(pushback-buffer '())
(datum-labels '())
(fold-case? #f))
(letrec ((update-position!
(lambda (ch)
(cond
((eqv? ch #\newline)
(set! line-number (+ 1 line-number)) (set! offset 0))
;; OFFSET is sometimes set to #F to denote an unknown
;; offset.
(offset (set! offset (+ 1 offset))))))
(location
(lambda () (list filename line-number offset)))
(set-datum-label!
(lambda (label value)
(set! datum-labels
(car (smap:insert datum-labels label value)))))
(get-datum-label
(lambda (label)
(smap:search datum-labels label)))
(clear-datum-labels!
(lambda ()
(set! datum-labels '())))
(dump-mutable
(lambda ()
(list datum-labels fold-case?)))
(restore-mutable!
(lambda (state)
(set! datum-labels (car state))
(set! fold-case? (cadr state))))
(process
(lambda (ch)
(update-position! ch)
(cond
((or (eof-object? ch) (not ch)) ch)
(fold-case? (char-downcase ch))
(else ch))))
(port
(lambda (op . args)
;; TODO: turn into string map?
(cond
((eq? op 'location) (location))
((eq? op 'read)
(process
(if (null? pushback-buffer)
(read-function)
(let ((ch (car pushback-buffer)))
(set! pushback-buffer (cdr pushback-buffer))
ch))))
((eq? op 'peek)
(let ((ch (port 'read)))
(port 'push ch)
ch))
((eq? op 'push)
(let ((ch (car args)))
(if (eqv? ch #\newline)
(begin
(set! line-number (- line-number 1))
(set! offset #f))
(set! offset (- offset 1)))
(set! pushback-buffer (cons ch pushback-buffer))))
((eq? op 'fold-case?) fold-case?)
((eq? op 'fold-case!) (set! fold-case? (car args)))
((eq? op 'set-datum-label!) (apply set-datum-label! args))
((eq? op 'get-datum-label) (apply get-datum-label args))
((eq? op 'clear-datum-labels!) (apply clear-datum-labels! args))
((eq? op 'dump-mutable) (apply dump-mutable args))
((eq? op 'restore-mutable!) (apply restore-mutable! args))
(else (error 'read->port 'invalid (cons op args)))))))
port))))
(define ident->read
(lambda (location ident)
(apply port->read (string->read-function (ident 'value))
location)))
;;; ;;;;;;;;;;;;;;
;;; Character maps
;;; ;;;;;;;;;;;;;;
(define integer<=>
(lambda (x y)
(cond
((< x y) '<)
((= x y) '=)
(else '>))))
;;; Comparison on characters extended to #F, which is less than all
;;; characters.
(define char*<=>
(lambda (x y)
(cond
((and (not x) y) '<)
((and x (not y)) '>)
((and (not x) (not y) '=))
(else (integer<=> (char->integer x)
(char->integer y))))))
(define %charmap:<=> (set:<=>-to-map char*<=>))
(define %charmap:update (set:update %charmap:<=>))
(define charmap:update (map:update %charmap:update))
(define charmap:insert (map:insert %charmap:update))
(define charmap:search (map:search %charmap:<=>))
;;; ;;;;;;;;;;;;;;;;;;;;;;
;;; Readtable constructors
;;;
;;; Readtables are composed of a CHARMAP, which is a map from characters
;;; to actions, and a DEFAULT-ACTION, which is taken when there is no
;;; match in CHARMAP.
;;;
;;; An "action" is a procedure that takes four arguments:
;;;
;;; TABLE: The current table.
;;; CHAR: The character that was matched against the CHARMAP in TABLE.
;;; ACC: An arbitrary "accumulator" value that is different depending
;;; on the readtable in question.
;;; PORT: A port reader object.
;;; ;;;;;;;;;;;;;;;;;;;;;;
;;; (READTABLE:NEW DEFAULT-ACTION CHARMAP)
(define readtable:new cons)
(define %readtable:default-action car)
(define %readtable:charmap cdr)
;;; Run the action in TABLE assigned to CHAR, or the default action of
;;; TABLE if there is no entry for CHAR.
(define readtable:act
(lambda (table char acc port)
(let ((node (charmap:search (%readtable:charmap table)
char)))
(let ((action (if (null? node)
(%readtable:default-action table)
(map:val node))))
(action table char acc port)))))
;;; Run the action in TABLE with the next character from PORT.
(define readtable:next
(lambda (table acc port)
(readtable:act table (port 'read) acc port)))
;;; Return a new readtable where CHAR is bound to ACTION.
(define readtable:update
(lambda (table chars action)
(let ((chars (if (pair? chars)
chars
(list chars))))
(fold (lambda (char table)
(readtable:new (%readtable:default-action table)
(car (charmap:insert
(%readtable:charmap table) char action))))
table
chars))))
;;; Construct new readtable with no characters in its map and
;;; DEFAULT-ACTION as the default action.
(define readtable:empty/default
(lambda (default-action)
(readtable:new default-action '())))
;;; Each value in FUNCTIONS is a list (PROCEDURE ARGS...) which is called
;;; like (PROCEDURE TABLE ARGS...) and returns a table.
(define readtable:process
(lambda (table . functions)
(fold (lambda (function table)
(apply (car function) table (cdr function)))
table
functions)))
;;; ;;;;;;;;;;;;;;;;;;
;;; Default actions
;;; ;;;;;;;;;;;;;;;;;;
;;; Return an error.
(define readtable:error
(lambda emsg
(lambda (table char acc port)
(error emsg char acc table port))))
;;; Discard the current character and continue reading the readtable.
(define readtable:skip
(lambda (table char acc port)
(readtable:act table (port 'read) acc port)))
;;; Discard char and return constant.
(define readtable:return
(lambda (return)
(lambda (table char acc port)
return)))
;;; Jump to a new readtable, discard it's return, and continue reading
;;; in the table.
(define readtable:jump-discard
(lambda (newtable)
(lambda (oldtable char acc port)
(readtable:act newtable (port 'read) '() port)
(readtable:act oldtable (port 'read) acc port))))
;;; Jump to a new readtable with the same characters.
(define readtable:jump
(lambda (newtable)
(lambda (oldtable char acc port)
(readtable:act newtable char acc port))))
;;; Jump to a new readtable, reading the new character, with the old
;;; readtable as ACC.
(define readtable:next/old-as-acc
(lambda (newtable)
(lambda (oldtable __ _ port)
(readtable:next newtable oldtable port))))
;;; Jump to a new readtable, reading the new character.
(define readtable:jump/next
(lambda (newtable)
(lambda (oldtable _ acc port)
(readtable:next newtable acc port))))
;;; ;;;;;;;;;;;;;;;;;
;;; Identifier reader
;;; ;;;;;;;;;;;;;;;;;
(define read:ident
(lambda (name location)
(lambda (op . args)
(cond
((eq? op 'type) 'ident)
((eq? op 'location) location)
((eq? op 'value) name)
(else (error 'read:ident "invalid operation" op args))))))
(define read:ident-builder
(lambda (location . start-char)
(let ((char-list (linked-list:new)))
(if (not (null? start-char))
(char-list 'push (car start-char)))
(lambda (op . args)
(cond
((eq? op 'location) location)
((eq? op 'finalize->ident)
(read:ident (list->string (char-list 'to-list)) location))
(else (apply char-list op args)))))))
;;; Push back CHAR and return ACC.
(define readtable:return-acc-as-ident
(lambda (table char acc port)
(port 'push char)
(acc 'finalize->ident)))
;;; Push CHAR to ACC and continue reading from TABLE.
(define readtable:push-char
(lambda (table char acc port)
(acc 'push-tail char)
(readtable:act table (port 'read) acc port)))
;;; Define a readtable that constructs an identifier by accepting all
;;; characters that are not listed.
(define readtable:exclude-from-identifiers
(lambda (table excluded)
(fold (lambda (char table)
(readtable:update table char readtable:return-acc-as-ident))
table
excluded)))
;;; ASCII whitespace.
(define readtable:ASCII-whitespace
(list #\newline
#\space
(integer->char #x09)
(integer->char #x0B)
(integer->char #x0C)
(integer->char #x0D)))
;;; Readtable for identifiers.
(define readtable:identifier
(readtable:process
(readtable:empty/default readtable:push-char)
(list readtable:exclude-from-identifiers
readtable:ASCII-whitespace)
(list readtable:exclude-from-identifiers
(list #\| %bol %eol #\' #\; #f))))
;;; Read an identifier starting with CHAR.
(define read:read-ident
(lambda (table char acc port)
(readtable:act readtable:identifier
(port 'read)
(read:ident-builder (port 'location) char)
port)))
;;; ;;;;;;;;;;;;;;;;;;;;
;;; Comments and whitespace reader
;;; ;;;;;;;;;;;;;;;;;;;;
;;; Readtable for a line comment.
(define readtable:read-to-newline
(readtable:process
(readtable:empty/default readtable:skip)
(list readtable:update #\newline (readtable:return #f))))
;;; ;;;;;;;;;;;
;;; List reader
;;;
;;; The reader updates the previous readtable to handle ). This means
;;; that this read table does not have to handle end-of-line, whitespace,
;;; etc.
;;; ;;;;;;;;;;;
;;; Read the end of an improper list.
(define readtable:read-improper-cdr
(lambda (table acc port)
(let ((val
(readtable:act (readtable:update table
%eol
(readtable:error
'read-improper-cdr
"proper list must have cdr"))
(port 'read)
#f
port)))
(acc 'set-cdr! val)
(let ((table (readtable:process
(readtable:empty/default (readtable:error
'read-improper-cdr
"improper list has 1 cdr"))
(list readtable:update
readtable:ASCII-whitespace
readtable:skip)
(list readtable:update %eol
(lambda dummy 'end-of-list)))))
(readtable:act table (port 'read) acc port)))))
;;; Generic reader loop for a list. It takes as input the table that has
;;; already been updated with end of list and improper list handlers.
(define readtable:read-list-loop
(lambda (table port)
(let ((acc (linked-list:new)))
(letrec ((loop
(lambda ()
(let ((value (readtable:act table
(port 'read)
acc
port)))
(cond
((eqv? value 'end-of-list) (acc 'to-list))
(else (acc 'push-tail value)
(loop)))))))
(loop)))))
;;; Readtable for a list, generic to proper and improper list
;;; readers.
(define readtable:table-for-list
(lambda (oldtable on-dot)
(readtable:process
oldtable
(list readtable:update %eol (readtable:return 'end-of-list))
(list readtable:update #\.
(lambda (table char acc port)
(let ((entire-identifier (readtable:read-ident
table
char
#f
port)))
(if (equal? entire-identifier ".")
(on-dot table acc port)
entire-identifier)))))))
;;; Read a proper or improper list.
(define readtable:read-list
(lambda (oldtable _ __ port)
(readtable:read-list-loop (readtable:table-for-list
oldtable
readtable:read-improper-cdr)
port)))
;;; Read strictly a proper list. This assumes that BOL has been read.
(define readtable:read-proper-list
(lambda (table port)
(readtable:read-list-loop (readtable:table-for-list
table
(readtable:error
'read-proper-list
"expected proper list"))
port)))
(define readtable:expect-proper-list
(lambda (table port)
(if (not (eqv? (port 'read) %bol))
(error 'expect-port-list 'no-list-found)
(readtable:read-proper-list table port))))
;;; ;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Implementation of a Pratt parser. In a Pratt parser, all tokens are
;;; "operators" with "precedence".
;;;
;;; Since Pratt parsers are implemented in terms of procedures, they can
;;; parse any grammar.
;;;
;;; This file implements the algorithm found in:
;;;
;;; van de Vanter, Michael L. "A Formalization and Correctness Proof of
;;; the CGOL Language System." (Master's Thesis). MIT Laboratory for
;;; Computer Science Technical Report MIT-LCS-TR-147 (Cambridge,
;;; Massachusetts). 1975. https://hdl.handle.net/1721.1/149442.
;;; (PRATT:OPERATOR PRECEDENCE PROCEDURE)
;;; constructs a description of an operator.
;;;
;;; PRECEDENCE is an exact number describing the precedence of the
;;; operator, and for portability should only be an integer.
;;;
;;; (The special EOF operator, which always exists, has negative
;;; infinity precedence.)
;;;
;;; PROCEDURE is of the form (PROCEDURE OPERATORS LEFT TOKEN READER).
;;; OPERATORS is the operators map.
;;; LEFT is a PRATT:TOKEN, which was to the left of the operator.
;;; TOKEN is the token read from the reader wit the same type.
;;; READER is a reader object.
;;;
;;; PROCEDURE consumes tokens to obtain the
;;; right hand side of its expression. PROCEDURE produces a PRATT:TOKEN.
(define pratt:operator cons)
(define pratt:precedence car)
(define pratt:procedure cdr)
(define pratt:search-token
(lambda (operators token)
(smap:search operators (symbol->string (token 'type)))))
;;; Parse a stream from READER, where the precedence limit is specified as
;;; BINDING-POWER. If BINDING-POWER is #F, then there is no precedence
;;; limit. Operators are read from OPERATORS, which is a SMAP.
(define pratt
(lambda (operators binding-power reader)
(letrec
((loop
(lambda (left)
(let ((token (reader 'peek)))
(let ((operator (pratt:search-token operators token)))
(if (null? next-operator)
(error 'pratt 'unknown-operator token
operators binding-power reader))
(if (and binding-power
(>= (pratt:precedence operator)
binding-power))
left
(parse operator left (reader 'read) reader))))))
(parse
(lambda (operator left token reader)
(loop ((pratt:procedure operator)
operators
left
token
reader)))))
;; Read the first token.
(let ((token (reader 'read)))
(let ((operator (pratt:search-token operators token)))
(if (null? operator)
(error 'pratt 'unknown-operator token
operators binding-power reader))
(parse operator #f token reader))))))
;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Math reader.
;;;
;;; Scheme numbers are a kludge. A proper LISP would either represent
;;; numbers as
;;;
;;; (+ 4 (*i 6)) or (@ (. 12 34) 2) or (e 12 3) etc.
;;;
;;; or just bite the bullet and include a full infix expression parser.
;;;
;;; This is a full infix expression parser. It will parse expressions
;;; such as SIN(X) or X * 5I/6. Identifiers and functions are any Scheme
;;; object.
;;;
;;; The math reader can be invoked using #MATH(expr). EXPR is an infix
;;; mathematical expression. #PREFIX(expr) reads EXPR in the normal
;;; reader. All other hash identifiers are unsupported in #MATH.
;;; The basic number reader. The basic syntax is
;;;
;;; PREFIXES* SIGN? (RADIX+ ("." RADIX+)? | "." RADIX+) ("e" RADIX+)? "i"?
;;; Prefixes are handled by the "#" reader.
;;; Incremental builder for a number, driven by the lexer.
;;; TODO: how will this be turned into a number, in the end?
(define read:number-builder
(lambda (radix-table location)
(let ((main '())
(sign #\+)
(decimal '())
(exp-char #\e)
(exp-digits '())
(imaginary? #f)
(current-part 'main)
(exactness #f))
(let ((append-part
(lambda (char)
(cond
((eq? current-part 'main)
(set! main (cons char main)))
((eq? current-part 'decimal)
(set! decimal (cons char decimal)))
((eq? current-part 'exp)
(set! exp-digits (cons char exp-digits)))))))
(lambda (op . args)
(cond
((eq? op 'push!) (apply append-part args))
((eq? op 'sign!) (set! sign (car args)))
((eq? op 'finalize)
(if (not exactness)
(set! exactness 'exact))
(if (null? main)
(set! main '(0)))
(if (null? decimal)
(set! decimal '(0)))
(if (null? exp-digits)
(set! exp-digits '(0)))
(list 'basic-number
main sign exactness decimal exp-char exp-digits imaginary?))
((eq? op 'exactness!) (set! exactness (car args)))
((eq? op 'radix-table!) (set! radix-table (car args)))
((eq? op 'set-imaginary!)
(set! imaginary? #t))
((eq? op 'set-decimal!)
(if (not exactness)
(set! exactness 'inexact))
;;
(if (not (eq? current-part 'main))
(error 'number-builder "cannot set to decimal" current-part)
(set! current-part 'decimal)))
((eq? op 'set-exponential!)
(if (not exactness)
(set! exactness 'inexact))
;;
(if (eq? current-part 'exp)
(error 'number-builder "cannot set to exponential"))
(set! current-part 'exp)
(set! exp-char (car args)))
;;
((eq? op 'radix-table) radix-table)
(else (error 'number-builder 'invalid op args))))))))
(define read:special-number
(lambda (type sign location)
(lambda (op)
(cond
((eq? op 'type) type)
((eq? op 'sign) sign)
((eq? op 'location) location)
(else (error 'special-numbe 'invalid op))))))
(define readtable:return-number
(lambda (_ char acc port)
(port 'push char)
(acc 'finalize)))
;;; Main lexer for number table.
(define readtable:number-main-table
(readtable:process
(readtable:empty/default readtable:return-number)
(list readtable:update #\.
(lambda (table char number-builder port)
(number-builder 'set-decimal!)
(readtable:next table
number-builder
port)))
(list readtable:update (list #\e #\s #\f #\d #\l)
(lambda (table char number-builder port)
(number-builder 'set-exponential! char)
(readtable:next table
number-builder
port)))
(list readtable:update #\_ readtable:skip)
(list readtable:update #\i
(lambda (table char number-builder port)
(number-builder 'set-imaginary!)
(number-builder 'finalize)))))
;;; Generate a lexer given a list of numbers in the radix.
(define readtable:for-radix
(lambda (radix-list)
(readtable:process
readtable:number-main-table
(list readtable:update radix-list
(lambda (table char number-builder port)
(number-builder 'push! char)
(readtable:next table number-builder port))))))
(define readtable:for-bin
(readtable:for-radix '(#\0 #\1)))
(define readtable:for-oct
(readtable:for-radix '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7)))
(define readtable:for-dec
(readtable:for-radix '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)))
(define readtable:for-hex
(readtable:for-radix '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9
#\a #\b #\c #\d #\e #\f)))
;;; Read the sign at the start of a number.
(define readtable:read-sign
(readtable:process
(readtable:empty/default
(lambda (_ char number-builder port)
(readtable:act (number-builder 'radix-table)
char
number-builder
port)))
(list readtable:update (list #\+ #\-)
(lambda (_ char number-builder port)
(number-builder 'sign! char)
(readtable:next (number-builder 'radix-table)
number-builder
port)))
(list readtable:update #\_ readtable:skip)))
(define readtable:read-prefix-char
(let ((act (lambda args
(lambda (_ char number-builder port)
(apply number-builder args)
(readtable:next readtable:read-number-prefix
number-builder
port)))))
(readtable:process
(readtable:empty/default (readtable:error "invalid prefix"))
(list readtable:update #\i (act 'exactness! 'inexact))
(list readtable:update #\e (act 'exactness! 'exact))
(list readtable:update #\b (act 'radix-table! readtable:for-bin))
(list readtable:update #\o (act 'radix-table! readtable:for-oct))
(list readtable:update #\d (act 'radix-table! readtable:for-dec))
(list readtable:update #\x (act 'radix-table! readtable:for-hex)))))
(define readtable:read-number-prefix
(readtable:process
(readtable:empty/default (readtable:jump readtable:read-sign))
(list readtable:update #\# (readtable:jump/next readtable:read-prefix-char))))
;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Reader for stuff that start with "#"
;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define readtable:digits
(list #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9))
(define readtable:vector
(lambda (_ __ toplevel port)
(list 'vector (readtable:read-proper-list toplevel port))))
;;; Block comment reader.
;;;
;;; The outermost block comment reader is passed the toplevel reader as
;;; ACC. When the outermost block is finished, it will tail-call ACC.
;;; (It is basically the continuation of the reader.)
;;;
;;; When a nested block comment is found, it is passed #F as ACC, which
;;; it will not call. It will return an unspecified value.
;;;
;;; Since the read tables are not procedures, references to other tables
;;; in the same LETREC declaration must be protected with explicit LAMBDAs.
;;; Macros could make this much easier to read.
(define readtable:block-comment
(letrec ((potential-end
(readtable:process
(readtable:empty/default
(lambda (this char acc port) (readtable:act
loop
char
acc
port)))
(list readtable:update #\#
(lambda (this char acc port)
(if acc
(readtable:next acc #f port))))))
(potential-start
(readtable:process
(readtable:empty/default
(lambda (this char acc port) (readtable:act
loop
char
acc
port)))
(list readtable:update #\|
(lambda (this char acc port)
(readtable:next loop #f port)
(readtable:next loop acc port)))))
(loop
(readtable:process
(readtable:empty/default readtable:skip)
(list readtable:update #\#
(lambda (this char acc port)
(readtable:next potential-start
acc
port)))
(list readtable:update #\|
(lambda (this char acc port)
(readtable:next potential-end
acc
port))))))
loop))
(define read:datum-label
(lambda (location toplevel)
(let ((finalized? #f)
(value '())
(name (read:ident-builder location)))
(lambda (op . args)
(cond
((eq? op 'finalize-value)
(if finalized?
(error 'datum-label "cannot finalize twice"))
(set! finalized? #t)
(set! value (car args))
(set! toplevel #f))
((eq? op 'finalize->ident)
(set! name (name 'finalize->ident)))
((eq? op 'value)
(if (not finalized?)
(error 'datum-label "value called before finalize"))
value)
((eq? op 'toplevel) toplevel)
((eq? op 'type) 'datum-label)
((eq? op 'as-string) (name 'value))
(else (apply name op args)))))))
;;; Readtable for the number part of a datum label / reference. A label
;;; looks like "#[NUMBER]=" and a reference looks like "#[NUMBER]#".
;;;
;;; Datum label assignments #XXX=[DATUM] return DATUM.
;;; Datum label references #XXX# return a DATUM-LABEL object, because
;;; circular references must be resolved later. (With some trickery it
;;; cold be resolved as soon as possible, but that requires knowledge of
;;; what is complex data and what is an atom. This requires knowing all
;;; possible values that the toplevel could read, which is not possible
;;; because new items can be added to the toplevel.)
(define readtable:datum-label-next
(readtable:process
(readtable:empty/default (readtable:error 'datum-label-next
"invalid datum label/ref"))
(list readtable:update readtable:digits readtable:push-char)
(list readtable:update #\=
(lambda (_ __ acc port)
(acc 'finalize->ident)
(port 'set-datum-label! (acc 'as-string) acc)
(let ((next-value (readtable:next (acc 'toplevel)
#f
port)))
(if (eqv? acc next-value)
(error 'datum-label-next "datum label cannot be itself"))
(acc 'finalize-value next-value)
(acc 'value))))
(list readtable:update #\#
(lambda (_ __ acc port)
(acc 'finalize->ident)
(let ((datum-label-container (port 'get-datum-label
(acc 'as-string))))
(if (null? datum-label-container)
(error 'datum-label-next
"unknown reference to datum label" label)
(map:val datum-label-container)))))))
;;; Reads the next toplevel datum, discards it, and then continues at the
;;; toplevel.
;;;
;;; The R7RS reader can cause side-effects due to #!FOLD-CASE and datum
;;; labels. This must be supressed in datum comments.
(define readtable:datum-comment
(lambda (_ __ toplevel port)
(let ((mutable (port 'dump-mutable)))
(readtable:next toplevel #f port)
(port 'restore-mutable! mutable))
(readtable:next toplevel #f port)))
;;; ;;;;;;;;;;;;;;;;;;;;;;;;
;;; Handling hash messages
;;;
;;; A "hash message" is any identifier that immediately follows a "#".
;;; This can be a constant, a part of a constant (like for numbers), or
;;; a directive (like #!FOLD-CASE).
;;; String map from constants to procedures with formal arguments
;;; (TOPLEVEL PORT)
;;; with TOPLEVEL being the current toplevel table and PORT being the
;;; current port being read from.
;;;
;;; Each one must return something. Directives that return nothing must
;;; call the toplevel again.
(define read:hash-messages
(smap:insert-many
'()
(cons "true" (lambda unused #t))
(cons "false" (lambda unused #f))
(cons "t" (lambda unused #t))
(cons "f" (lambda unused #f))
(cons "u8" (lambda (toplevel port)
(list 'bytevector
(readtable:expect-proper-list toplevel port))))
(cons "!fold-case"
(lambda (toplevel port)
(port 'fold-case! #t)
(readtable:act toplevel (port 'read) #f port)))
(cons "!no-fold-case"
(lambda (toplevel port)
(port 'fold-case! #f)
(readtable:act toplevel (port 'read) #f port)))))
(define read:read-hash-messages
(lambda (_ char acc port)
(let ((ident (readtable:read-ident #f char #f port)))
(let ((container (smap:search read:hash-messages
(ident 'value))))
(if (null? container)
(let ((number-port (ident->read (port 'location) ident)))
(number-port 'fold-case! #t)
(readtable:next readtable:read-prefix-char
(read:number-builder readtable:for-dec
(port 'location))
number-port))
((map:val container) acc port))))))
;;; ;;;;;;;;;;;;;;;;;;;;;;
;;; Characters
;;;
;;; For completeness, this adds a few more newline characters that are C
;;; escape sequences but are not standard Scheme.
(define read:named-characters
(smap:insert-many
'()
(cons "null" (integer->char 0))
(cons "alarm" (integer->char #x7))
(cons "backspace" (integer->char #x8))
(cons "tab" (integer->char #x9))
(cons "newline" (integer->char #xA))
(cons "vertical-tab" (integer->char #xB))
(cons "form-feed" (integer->char #xC))
(cons "return" (integer->char #xD))
(cons "escape" (integer->char #x1B))
(cons "space" (integer->char #x20))
(cons "delete" (integer->char #x7F))))
(define readtable:character
(lambda (_ char __ port)
(let ((char (port 'read)))
(let ((ident (readtable:read-ident #f char #f port)))
(let ((container (smap:search read:named-characters
(ident 'value))))
(display (ident 'value))
(newline)
(cond
((and (null? container)
(= (string-length (ident 'value)) 1))
(string-ref (ident 'value) 0))
((null? container) (error 'character 'unknown-character ident))
(else (map:val container))))))))
(define readtable:hash
(readtable:process
(readtable:empty/default read:read-hash-messages)
(list readtable:update readtable:ASCII-whitespace
(readtable:error 'hash 'whitespace-after-hash))
(list readtable:update #\| (readtable:jump/next readtable:block-comment))
(list readtable:update #\; readtable:datum-comment)
(list readtable:update #\\ readtable:character)
(list readtable:update readtable:digits ; Datum labels
(lambda (_ char toplevel port)
(readtable:act readtable:datum-label-next
char
(read:datum-label (port 'location) toplevel)
port)))
(list readtable:update %bol readtable:vector)))
;;; Read a decimal number without prefixes.
(define readtable:create-number
(lambda (toplevel number-char _ port)
(readtable:act readtable:read-sign
number-char
(read:number-builder readtable:for-dec
(port 'location))
port)))
(define readtable:check-for-special-ident
(readtable:process
(readtable:empty/default
(lambda (_ char ident-builder port)
(let ((return-ident (readtable:act readtable:identifier
char
ident-builder
port)))
(let ((val (return-ident 'value))
(loc (return-ident 'location)))
(cond
((equal? val "+inf.0") (read:special-number 'inf #\+ loc))
((equal? val "-inf.0") (read:special-number 'inf #\- loc))
((equal? val "+nan.0") (read:special-number 'nan #\+ loc))
((equal? val "-nan.0") (read:special-number 'nan #\- loc))
(else val))))))
(list readtable:update '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
(lambda (_ char ident-builder port)
(let ((ident (ident-builder 'finalize->ident)))
(let ((number-builder (read:number-builder
readtable:for-dec
(ident 'location))))
(number-builder 'sign! (string-ref (ident 'value) 0))
(readtable:act readtable:for-dec char number-builder port)))))))
(define read:ident-or-number
(lambda (_ sign __ port)
(readtable:next readtable:check-for-special-ident
(read:ident-builder (port 'location) sign)
port)))
;;; ;;;;;;;;;;;;;;;;
;;; Toplevel reader.
;;; ;;;;;;;;;;;;;;;;
;;; This is defined as a function so that it dynamically loads each
;;; sub-readtable.
(define readtable:top
(lambda ()
(readtable:process
(readtable:empty/default read:read-ident)
(list readtable:update '(#\+ #\-) read:ident-or-number)
(list readtable:update readtable:ASCII-whitespace readtable:skip)
(list readtable:update #f (readtable:return 'eof))
(list readtable:update %bol readtable:read-list)
(list readtable:update %eol (readtable:error 'top "unbalanced list"))
(list readtable:update #\# (readtable:next/old-as-acc
readtable:hash))
(list readtable:update '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
readtable:create-number)
(list readtable:update #\;
(readtable:jump-discard readtable:read-to-newline)))))
(define read/toplevel
(lambda (port)
;; Does each invocation of READ keep datum labels from previous reads?
;; (port 'clear-datum-labels!)
(readtable:next (readtable:top)
#f
port)))
(define intset:insert (set:insert (set:update integer<=>)))
(define intset:in (set:in integer<=>))
(define uncycle
(lambda (value)
(let ((cntr 0)
(used-counters '())
(pointers '()))
(letrec ((uncycle
(lambda (value)
(cond
((pair? value)
(let ((pair (assq value pointers)))
(if (pair? pair)
(begin
(set! used-counters
(car (intset:insert used-counters (cdr pair))))
(list 'ref (cdr pair)))
(begin
(set! pointers (cons (cons value cntr)
pointers))
(let ((cur-cntr cntr))
(set! cntr (+ 1 cntr))
(let ((returned (cons (uncycle (car value))
(uncycle (cdr value)))))
(if (not (null? (intset:in used-counters cur-cntr)))
(list 'def cur-cntr '= returned)
returned)))))))
((procedure? value)
(let ((type (value 'type)))
(cond
((eq? type 'ident) (value 'value))
((eq? type 'datum-label) (uncycle (value 'value)))
(else (vector 'unrepresentable type)))))
(else value)))))
(uncycle value)))))
;;; ;;;;;;;;;;;
;;; Test reader
;;; ;;;;;;;;;;;
(define %list->read
(lambda (seq)
(port->read
(lambda ()
(if (null? seq)
#f
(let ((ch (car seq)))
(set! seq (cdr seq))
ch)))
"test")))
(define read-all
(lambda (str)
(let ((reader (%list->read (string->list str))))
(letrec ((loop
(lambda ()
(if (not (reader 'peek))
#t
(let ((value (read/toplevel reader)))
(display (list "return" (uncycle value)))
(newline)
(loop))))))
(loop)))))
(read-all "x yy zz ; this is a comment\nx call/cc ")
(read-all "(a b c def (ghi j) k )")
(read-all "( a . b )")
(read-all "( a .b . c)")
(read-all "#( a b y)")
(read-all "(x y #| this is a block\n comment\n |# z w)")
(read-all "#( a b #| this is a #| nested block |# comment|# z w)")
(read-all "#(a b #(c #|close#|comment|#|#y))")
(read-all "(this has a #;(call with (current continuation)) datum comment)")
(read-all "#0=(#0# not unix)")
(read-all "#!no-fold-case #!fold-case #!NO-FOLD-CASE #false")
(read-all "#!fold-case #TRUE")
(read-all "#u8(x y z w)")
(read-all "#\\newline")
(read-all "#\\a")
(read-all "#b11")
(read-all "#XFF")
(read-all "#i#x10")
(read-all "#d1234.5678")
(read-all "#e10.5")
(read-all "#d10.24f12")
(read-all "#d-i")
(read-all "100i")
(read-all "+5")
(read-all "++1+")
(read-all "+inf.0")
(read-all "-inf.0")
(read-all "+nan.0")
(read-all "-100.5e5")