1136 lines
41 KiB
Scheme
1136 lines
41 KiB
Scheme
;;; Copyright (C) Peter McGoron 2024
|
|
;;; This program is free software: you can redistribute it and/or modify
|
|
;;; it under the terms of the GNU General Public License as published by
|
|
;;; the Free Software Foundation, version 3 of the License.
|
|
;;;
|
|
;;; This program is distributed in the hope that it will be useful,
|
|
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
;;; GNU General Public License for more details.
|
|
;;;
|
|
;;; You should have received a copy of the GNU General Public License
|
|
;;; along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
;;; R7RS reader. This is the lexer-parser end, so it returns tokens and
|
|
;;; not concrete objects.
|
|
;;;
|
|
;;; Notes:
|
|
;;;
|
|
;;; Port stores datum labels. Datum labels are stored for the entirety of
|
|
;;; a READ: this is to emulate MIT Scheme, which allows for datum labels
|
|
;;; outside of the datum that the label appears in.
|
|
;;;
|
|
;;; The reader does not return Scheme data: it returns annotated data
|
|
;;; containing the source location, datum label number, resolved datum
|
|
;;; label pointer. This is for advanced syntax systems.
|
|
;;;
|
|
;;; How datum labels could work:
|
|
;;;
|
|
;;; When encountering #[number]=, allocate a datum label and assign it
|
|
;;; nothing. Then call READ after "=", and destructively update the
|
|
;;; datum label with the resulting datum. A pass over the new read
|
|
;;; structure to convert it to regular Scheme data will resolve the
|
|
;;; indirection.
|
|
;;;
|
|
;;; All tokens are procedure-encapsulated objects, since the reader should
|
|
;;; never return a literal procedure. Each procedure has a TYPE message.
|
|
;;;
|
|
;;; ;;;;;;;;;;;;;;;
|
|
;;; Possible Improvements
|
|
;;; ;;;;;;;;;;;;;;;
|
|
;;;
|
|
;;; * A coherent API.
|
|
;;; * regex-based lexer.
|
|
;;; * A way to load readtables and read functions when the function is run,
|
|
;;; and not when functions are defined, without sacrificing performance.
|
|
;;; * Better Unicode handling.
|
|
|
|
(load "chez-compat.scm")
|
|
(load "util.scm")
|
|
(load "set.scm")
|
|
(load "linked-list.scm")
|
|
|
|
;;; My text editor cannot parse Scheme's character syntax.
|
|
|
|
(define %bol #\()
|
|
(define %eol #\))
|
|
|
|
;;; ;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;; Port reader wrapper
|
|
;;; ;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(define port->read-function
|
|
(lambda (port)
|
|
(lambda ()
|
|
(read-char port))))
|
|
|
|
(define string->read-function
|
|
(lambda (str)
|
|
(let ((i -1)
|
|
(length (string-length str)))
|
|
(lambda ()
|
|
(set! i (+ i 1))
|
|
(if (>= i length)
|
|
#f
|
|
(string-ref str i))))))
|
|
|
|
;;; READ:
|
|
;;;
|
|
;;; Stream readers contain mutable state. This is the case-folding mode
|
|
;;; and the current list of datum labels.
|
|
;;;
|
|
;;; (POS): Return (LIST FILENAME LINE-NUMBER OFFSET).
|
|
;;; (READ): Read the next character in the stream. Returns #F on EOF.
|
|
;;; (PUSH CHAR): Push CHAR such that it will be the next character read
|
|
;;; when (READ) is called.
|
|
;;; (PEEK): Read character, push it back, and return it.
|
|
;;; (FOLD-CASE?): Returns a boolean if case folding is enabled.
|
|
;;; (FOLD-CASE! BOOL): Sets the case folding to BOOL.
|
|
(define port->read
|
|
(lambda (read-function filename . location)
|
|
(let ((line-number (if (null? location)
|
|
1
|
|
(car location)))
|
|
(offset (if (null? location)
|
|
0
|
|
(cadr location)))
|
|
(pushback-buffer '())
|
|
(datum-labels '())
|
|
(fold-case? #f))
|
|
(letrec ((update-position!
|
|
(lambda (ch)
|
|
(cond
|
|
((eqv? ch #\newline)
|
|
(set! line-number (+ 1 line-number)) (set! offset 0))
|
|
;; OFFSET is sometimes set to #F to denote an unknown
|
|
;; offset.
|
|
(offset (set! offset (+ 1 offset))))))
|
|
(location
|
|
(lambda () (list filename line-number offset)))
|
|
(set-datum-label!
|
|
(lambda (label value)
|
|
(set! datum-labels
|
|
(car (smap:insert datum-labels label value)))))
|
|
(get-datum-label
|
|
(lambda (label)
|
|
(smap:search datum-labels label)))
|
|
(clear-datum-labels!
|
|
(lambda ()
|
|
(set! datum-labels '())))
|
|
(dump-mutable
|
|
(lambda ()
|
|
(list datum-labels fold-case?)))
|
|
(restore-mutable!
|
|
(lambda (state)
|
|
(set! datum-labels (car state))
|
|
(set! fold-case? (cadr state))))
|
|
(process
|
|
(lambda (ch)
|
|
(update-position! ch)
|
|
(cond
|
|
((or (eof-object? ch) (not ch)) ch)
|
|
(fold-case? (char-downcase ch))
|
|
(else ch))))
|
|
(port
|
|
(lambda (op . args)
|
|
;; TODO: turn into string map?
|
|
(cond
|
|
((eq? op 'location) (location))
|
|
((eq? op 'read)
|
|
(process
|
|
(if (null? pushback-buffer)
|
|
(read-function)
|
|
(let ((ch (car pushback-buffer)))
|
|
(set! pushback-buffer (cdr pushback-buffer))
|
|
ch))))
|
|
((eq? op 'peek)
|
|
(let ((ch (port 'read)))
|
|
(port 'push ch)
|
|
ch))
|
|
((eq? op 'push)
|
|
(let ((ch (car args)))
|
|
(if (eqv? ch #\newline)
|
|
(begin
|
|
(set! line-number (- line-number 1))
|
|
(set! offset #f))
|
|
(set! offset (- offset 1)))
|
|
(set! pushback-buffer (cons ch pushback-buffer))))
|
|
((eq? op 'fold-case?) fold-case?)
|
|
((eq? op 'fold-case!) (set! fold-case? (car args)))
|
|
((eq? op 'set-datum-label!) (apply set-datum-label! args))
|
|
((eq? op 'get-datum-label) (apply get-datum-label args))
|
|
((eq? op 'clear-datum-labels!) (apply clear-datum-labels! args))
|
|
((eq? op 'dump-mutable) (apply dump-mutable args))
|
|
((eq? op 'restore-mutable!) (apply restore-mutable! args))
|
|
(else (error 'read->port 'invalid (cons op args)))))))
|
|
port))))
|
|
|
|
(define ident->read
|
|
(lambda (location ident)
|
|
(apply port->read (string->read-function (ident 'value))
|
|
location)))
|
|
|
|
;;; ;;;;;;;;;;;;;;
|
|
;;; Character maps
|
|
;;; ;;;;;;;;;;;;;;
|
|
|
|
(define integer<=>
|
|
(lambda (x y)
|
|
(cond
|
|
((< x y) '<)
|
|
((= x y) '=)
|
|
(else '>))))
|
|
|
|
;;; Comparison on characters extended to #F, which is less than all
|
|
;;; characters.
|
|
(define char*<=>
|
|
(lambda (x y)
|
|
(cond
|
|
((and (not x) y) '<)
|
|
((and x (not y)) '>)
|
|
((and (not x) (not y) '=))
|
|
(else (integer<=> (char->integer x)
|
|
(char->integer y))))))
|
|
|
|
(define %charmap:<=> (set:<=>-to-map char*<=>))
|
|
(define %charmap:update (set:update %charmap:<=>))
|
|
|
|
(define charmap:update (map:update %charmap:update))
|
|
(define charmap:insert (map:insert %charmap:update))
|
|
(define charmap:search (map:search %charmap:<=>))
|
|
|
|
;;; ;;;;;;;;;;;;;;;;;;;;;;
|
|
;;; Readtable constructors
|
|
;;;
|
|
;;; Readtables are composed of a CHARMAP, which is a map from characters
|
|
;;; to actions, and a DEFAULT-ACTION, which is taken when there is no
|
|
;;; match in CHARMAP.
|
|
;;;
|
|
;;; An "action" is a procedure that takes four arguments:
|
|
;;;
|
|
;;; TABLE: The current table.
|
|
|
|
;;; CHAR: The character that was matched against the CHARMAP in TABLE.
|
|
;;; ACC: An arbitrary "accumulator" value that is different depending
|
|
;;; on the readtable in question.
|
|
;;; PORT: A port reader object.
|
|
;;; ;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;;; (READTABLE:NEW DEFAULT-ACTION CHARMAP)
|
|
(define readtable:new cons)
|
|
|
|
(define %readtable:default-action car)
|
|
(define %readtable:charmap cdr)
|
|
|
|
;;; Run the action in TABLE assigned to CHAR, or the default action of
|
|
;;; TABLE if there is no entry for CHAR.
|
|
(define readtable:act
|
|
(lambda (table char acc port)
|
|
(let ((node (charmap:search (%readtable:charmap table)
|
|
char)))
|
|
(let ((action (if (null? node)
|
|
(%readtable:default-action table)
|
|
(map:val node))))
|
|
(action table char acc port)))))
|
|
|
|
;;; Run the action in TABLE with the next character from PORT.
|
|
(define readtable:next
|
|
(lambda (table acc port)
|
|
(readtable:act table (port 'read) acc port)))
|
|
|
|
;;; Return a new readtable where CHAR is bound to ACTION.
|
|
(define readtable:update
|
|
(lambda (table chars action)
|
|
(let ((chars (if (pair? chars)
|
|
chars
|
|
(list chars))))
|
|
(fold (lambda (char table)
|
|
(readtable:new (%readtable:default-action table)
|
|
(car (charmap:insert
|
|
(%readtable:charmap table) char action))))
|
|
table
|
|
chars))))
|
|
|
|
;;; Construct new readtable with no characters in its map and
|
|
;;; DEFAULT-ACTION as the default action.
|
|
(define readtable:empty/default
|
|
(lambda (default-action)
|
|
(readtable:new default-action '())))
|
|
|
|
;;; Each value in FUNCTIONS is a list (PROCEDURE ARGS...) which is called
|
|
;;; like (PROCEDURE TABLE ARGS...) and returns a table.
|
|
(define readtable:process
|
|
(lambda (table . functions)
|
|
(fold (lambda (function table)
|
|
(apply (car function) table (cdr function)))
|
|
table
|
|
functions)))
|
|
|
|
;;; ;;;;;;;;;;;;;;;;;;
|
|
;;; Default actions
|
|
;;; ;;;;;;;;;;;;;;;;;;
|
|
|
|
;;; Return an error.
|
|
(define readtable:error
|
|
(lambda emsg
|
|
(lambda (table char acc port)
|
|
(error emsg char acc table port))))
|
|
|
|
;;; Discard the current character and continue reading the readtable.
|
|
(define readtable:skip
|
|
(lambda (table char acc port)
|
|
(readtable:act table (port 'read) acc port)))
|
|
|
|
;;; Discard char and return constant.
|
|
(define readtable:return
|
|
(lambda (return)
|
|
(lambda (table char acc port)
|
|
return)))
|
|
|
|
;;; Jump to a new readtable, discard it's return, and continue reading
|
|
;;; in the table.
|
|
(define readtable:jump-discard
|
|
(lambda (newtable)
|
|
(lambda (oldtable char acc port)
|
|
(readtable:act newtable (port 'read) '() port)
|
|
(readtable:act oldtable (port 'read) acc port))))
|
|
|
|
;;; Jump to a new readtable with the same characters.
|
|
(define readtable:jump
|
|
(lambda (newtable)
|
|
(lambda (oldtable char acc port)
|
|
(readtable:act newtable char acc port))))
|
|
|
|
;;; Jump to a new readtable, reading the new character, with the old
|
|
;;; readtable as ACC.
|
|
(define readtable:next/old-as-acc
|
|
(lambda (newtable)
|
|
(lambda (oldtable __ _ port)
|
|
(readtable:next newtable oldtable port))))
|
|
|
|
;;; Jump to a new readtable, reading the new character.
|
|
(define readtable:jump/next
|
|
(lambda (newtable)
|
|
(lambda (oldtable _ acc port)
|
|
(readtable:next newtable acc port))))
|
|
|
|
;;; ;;;;;;;;;;;;;;;;;
|
|
;;; Identifier reader
|
|
;;; ;;;;;;;;;;;;;;;;;
|
|
|
|
(define read:ident
|
|
(lambda (name location)
|
|
(lambda (op . args)
|
|
(cond
|
|
((eq? op 'type) 'ident)
|
|
((eq? op 'location) location)
|
|
((eq? op 'value) name)
|
|
(else (error 'read:ident "invalid operation" op args))))))
|
|
|
|
(define read:ident-builder
|
|
(lambda (location . start-char)
|
|
(let ((char-list (linked-list:new)))
|
|
(if (not (null? start-char))
|
|
(char-list 'push (car start-char)))
|
|
(lambda (op . args)
|
|
(cond
|
|
((eq? op 'location) location)
|
|
((eq? op 'finalize->ident)
|
|
(read:ident (list->string (char-list 'to-list)) location))
|
|
(else (apply char-list op args)))))))
|
|
|
|
;;; Push back CHAR and return ACC.
|
|
(define readtable:return-acc-as-ident
|
|
(lambda (table char acc port)
|
|
(port 'push char)
|
|
(acc 'finalize->ident)))
|
|
|
|
;;; Push CHAR to ACC and continue reading from TABLE.
|
|
(define readtable:push-char
|
|
(lambda (table char acc port)
|
|
(acc 'push-tail char)
|
|
(readtable:act table (port 'read) acc port)))
|
|
|
|
;;; Define a readtable that constructs an identifier by accepting all
|
|
;;; characters that are not listed.
|
|
(define readtable:exclude-from-identifiers
|
|
(lambda (table excluded)
|
|
(fold (lambda (char table)
|
|
(readtable:update table char readtable:return-acc-as-ident))
|
|
table
|
|
excluded)))
|
|
|
|
;;; ASCII whitespace.
|
|
(define readtable:ASCII-whitespace
|
|
(list #\newline
|
|
#\space
|
|
(integer->char #x09)
|
|
(integer->char #x0B)
|
|
(integer->char #x0C)
|
|
(integer->char #x0D)))
|
|
|
|
;;; Readtable for identifiers.
|
|
(define readtable:identifier
|
|
(readtable:process
|
|
(readtable:empty/default readtable:push-char)
|
|
(list readtable:exclude-from-identifiers
|
|
readtable:ASCII-whitespace)
|
|
(list readtable:exclude-from-identifiers
|
|
(list #\| %bol %eol #\' #\; #f))))
|
|
|
|
;;; Read an identifier starting with CHAR.
|
|
(define read:read-ident
|
|
(lambda (table char acc port)
|
|
(readtable:act readtable:identifier
|
|
(port 'read)
|
|
(read:ident-builder (port 'location) char)
|
|
port)))
|
|
|
|
;;; ;;;;;;;;;;;;;;;;;;;;
|
|
;;; Comments and whitespace reader
|
|
;;; ;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;;; Readtable for a line comment.
|
|
(define readtable:read-to-newline
|
|
(readtable:process
|
|
(readtable:empty/default readtable:skip)
|
|
(list readtable:update #\newline (readtable:return #f))))
|
|
|
|
;;; ;;;;;;;;;;;
|
|
;;; List reader
|
|
;;;
|
|
;;; The reader updates the previous readtable to handle ). This means
|
|
;;; that this read table does not have to handle end-of-line, whitespace,
|
|
;;; etc.
|
|
;;; ;;;;;;;;;;;
|
|
|
|
;;; Read the end of an improper list.
|
|
(define readtable:read-improper-cdr
|
|
(lambda (table acc port)
|
|
(let ((val
|
|
(readtable:act (readtable:update table
|
|
%eol
|
|
(readtable:error
|
|
'read-improper-cdr
|
|
"proper list must have cdr"))
|
|
(port 'read)
|
|
#f
|
|
port)))
|
|
(acc 'set-cdr! val)
|
|
(let ((table (readtable:process
|
|
(readtable:empty/default (readtable:error
|
|
'read-improper-cdr
|
|
"improper list has 1 cdr"))
|
|
(list readtable:update
|
|
readtable:ASCII-whitespace
|
|
readtable:skip)
|
|
(list readtable:update %eol
|
|
(lambda dummy 'end-of-list)))))
|
|
(readtable:act table (port 'read) acc port)))))
|
|
|
|
;;; Generic reader loop for a list. It takes as input the table that has
|
|
;;; already been updated with end of list and improper list handlers.
|
|
(define readtable:read-list-loop
|
|
(lambda (table port)
|
|
(let ((acc (linked-list:new)))
|
|
(letrec ((loop
|
|
(lambda ()
|
|
(let ((value (readtable:act table
|
|
(port 'read)
|
|
acc
|
|
port)))
|
|
(cond
|
|
((eqv? value 'end-of-list) (acc 'to-list))
|
|
(else (acc 'push-tail value)
|
|
(loop)))))))
|
|
(loop)))))
|
|
|
|
;;; Readtable for a list, generic to proper and improper list
|
|
;;; readers.
|
|
(define readtable:table-for-list
|
|
(lambda (oldtable on-dot)
|
|
(readtable:process
|
|
oldtable
|
|
(list readtable:update %eol (readtable:return 'end-of-list))
|
|
(list readtable:update #\.
|
|
(lambda (table char acc port)
|
|
(let ((entire-identifier (readtable:read-ident
|
|
table
|
|
char
|
|
#f
|
|
port)))
|
|
(if (equal? entire-identifier ".")
|
|
(on-dot table acc port)
|
|
entire-identifier)))))))
|
|
|
|
;;; Read a proper or improper list.
|
|
(define readtable:read-list
|
|
(lambda (oldtable _ __ port)
|
|
(readtable:read-list-loop (readtable:table-for-list
|
|
oldtable
|
|
readtable:read-improper-cdr)
|
|
port)))
|
|
|
|
;;; Read strictly a proper list. This assumes that BOL has been read.
|
|
(define readtable:read-proper-list
|
|
(lambda (table port)
|
|
(readtable:read-list-loop (readtable:table-for-list
|
|
table
|
|
(readtable:error
|
|
'read-proper-list
|
|
"expected proper list"))
|
|
port)))
|
|
|
|
(define readtable:expect-proper-list
|
|
(lambda (table port)
|
|
(if (not (eqv? (port 'read) %bol))
|
|
(error 'expect-port-list 'no-list-found)
|
|
(readtable:read-proper-list table port))))
|
|
|
|
;;; ;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;; Implementation of a Pratt parser. In a Pratt parser, all tokens are
|
|
;;; "operators" with "precedence".
|
|
;;;
|
|
;;; Since Pratt parsers are implemented in terms of procedures, they can
|
|
;;; parse any grammar.
|
|
;;;
|
|
;;; This file implements the algorithm found in:
|
|
;;;
|
|
;;; van de Vanter, Michael L. "A Formalization and Correctness Proof of
|
|
;;; the CGOL Language System." (Master's Thesis). MIT Laboratory for
|
|
;;; Computer Science Technical Report MIT-LCS-TR-147 (Cambridge,
|
|
;;; Massachusetts). 1975. https://hdl.handle.net/1721.1/149442.
|
|
|
|
;;; (PRATT:OPERATOR PRECEDENCE PROCEDURE)
|
|
;;; constructs a description of an operator.
|
|
;;;
|
|
;;; PRECEDENCE is an exact number describing the precedence of the
|
|
;;; operator, and for portability should only be an integer.
|
|
;;;
|
|
;;; (The special EOF operator, which always exists, has negative
|
|
;;; infinity precedence.)
|
|
;;;
|
|
;;; PROCEDURE is of the form (PROCEDURE OPERATORS LEFT TOKEN READER).
|
|
;;; OPERATORS is the operators map.
|
|
;;; LEFT is a PRATT:TOKEN, which was to the left of the operator.
|
|
;;; TOKEN is the token read from the reader wit the same type.
|
|
;;; READER is a reader object.
|
|
;;;
|
|
;;; PROCEDURE consumes tokens to obtain the
|
|
;;; right hand side of its expression. PROCEDURE produces a PRATT:TOKEN.
|
|
(define pratt:operator cons)
|
|
(define pratt:precedence car)
|
|
(define pratt:procedure cdr)
|
|
|
|
(define pratt:search-token
|
|
(lambda (operators token)
|
|
(smap:search operators (symbol->string (token 'type)))))
|
|
|
|
;;; Parse a stream from READER, where the precedence limit is specified as
|
|
;;; BINDING-POWER. If BINDING-POWER is #F, then there is no precedence
|
|
;;; limit. Operators are read from OPERATORS, which is a SMAP.
|
|
(define pratt
|
|
(lambda (operators binding-power reader)
|
|
(letrec
|
|
((loop
|
|
(lambda (left)
|
|
(let ((token (reader 'peek)))
|
|
(let ((operator (pratt:search-token operators token)))
|
|
(if (null? next-operator)
|
|
(error 'pratt 'unknown-operator token
|
|
operators binding-power reader))
|
|
(if (and binding-power
|
|
(>= (pratt:precedence operator)
|
|
binding-power))
|
|
left
|
|
(parse operator left (reader 'read) reader))))))
|
|
(parse
|
|
(lambda (operator left token reader)
|
|
(loop ((pratt:procedure operator)
|
|
operators
|
|
left
|
|
token
|
|
reader)))))
|
|
;; Read the first token.
|
|
(let ((token (reader 'read)))
|
|
(let ((operator (pratt:search-token operators token)))
|
|
(if (null? operator)
|
|
(error 'pratt 'unknown-operator token
|
|
operators binding-power reader))
|
|
(parse operator #f token reader))))))
|
|
|
|
;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;; Math reader.
|
|
;;;
|
|
;;; Scheme numbers are a kludge. A proper LISP would either represent
|
|
;;; numbers as
|
|
;;;
|
|
;;; (+ 4 (*i 6)) or (@ (. 12 34) 2) or (e 12 3) etc.
|
|
;;;
|
|
;;; or just bite the bullet and include a full infix expression parser.
|
|
;;;
|
|
;;; This is a full infix expression parser. It will parse expressions
|
|
;;; such as SIN(X) or X * 5I/6. Identifiers and functions are any Scheme
|
|
;;; object.
|
|
;;;
|
|
;;; The math reader can be invoked using #MATH(expr). EXPR is an infix
|
|
;;; mathematical expression. #PREFIX(expr) reads EXPR in the normal
|
|
;;; reader. All other hash identifiers are unsupported in #MATH.
|
|
|
|
;;; The basic number reader. The basic syntax is
|
|
;;;
|
|
;;; PREFIXES* SIGN? (RADIX+ ("." RADIX+)? | "." RADIX+) ("e" RADIX+)? "i"?
|
|
;;; Prefixes are handled by the "#" reader.
|
|
|
|
;;; Incremental builder for a number, driven by the lexer.
|
|
;;; TODO: how will this be turned into a number, in the end?
|
|
(define read:number-builder
|
|
(lambda (radix-table location)
|
|
(let ((main '())
|
|
(sign #\+)
|
|
(decimal '())
|
|
(exp-char #\e)
|
|
(exp-digits '())
|
|
(imaginary? #f)
|
|
(current-part 'main)
|
|
(exactness #f))
|
|
(let ((append-part
|
|
(lambda (char)
|
|
(cond
|
|
((eq? current-part 'main)
|
|
(set! main (cons char main)))
|
|
((eq? current-part 'decimal)
|
|
(set! decimal (cons char decimal)))
|
|
((eq? current-part 'exp)
|
|
(set! exp-digits (cons char exp-digits)))))))
|
|
(lambda (op . args)
|
|
(cond
|
|
((eq? op 'push!) (apply append-part args))
|
|
((eq? op 'sign!) (set! sign (car args)))
|
|
((eq? op 'finalize)
|
|
(if (not exactness)
|
|
(set! exactness 'exact))
|
|
(if (null? main)
|
|
(set! main '(0)))
|
|
(if (null? decimal)
|
|
(set! decimal '(0)))
|
|
(if (null? exp-digits)
|
|
(set! exp-digits '(0)))
|
|
(list 'basic-number
|
|
main sign exactness decimal exp-char exp-digits imaginary?))
|
|
((eq? op 'exactness!) (set! exactness (car args)))
|
|
((eq? op 'radix-table!) (set! radix-table (car args)))
|
|
((eq? op 'set-imaginary!)
|
|
(set! imaginary? #t))
|
|
((eq? op 'set-decimal!)
|
|
(if (not exactness)
|
|
(set! exactness 'inexact))
|
|
;;
|
|
(if (not (eq? current-part 'main))
|
|
(error 'number-builder "cannot set to decimal" current-part)
|
|
(set! current-part 'decimal)))
|
|
((eq? op 'set-exponential!)
|
|
(if (not exactness)
|
|
(set! exactness 'inexact))
|
|
;;
|
|
(if (eq? current-part 'exp)
|
|
(error 'number-builder "cannot set to exponential"))
|
|
(set! current-part 'exp)
|
|
(set! exp-char (car args)))
|
|
;;
|
|
((eq? op 'radix-table) radix-table)
|
|
(else (error 'number-builder 'invalid op args))))))))
|
|
|
|
(define read:special-number
|
|
(lambda (type sign location)
|
|
(lambda (op)
|
|
(cond
|
|
((eq? op 'type) type)
|
|
((eq? op 'sign) sign)
|
|
((eq? op 'location) location)
|
|
(else (error 'special-numbe 'invalid op))))))
|
|
|
|
(define readtable:return-number
|
|
(lambda (_ char acc port)
|
|
(port 'push char)
|
|
(acc 'finalize)))
|
|
|
|
;;; Main lexer for number table.
|
|
(define readtable:number-main-table
|
|
(readtable:process
|
|
(readtable:empty/default readtable:return-number)
|
|
(list readtable:update #\.
|
|
(lambda (table char number-builder port)
|
|
(number-builder 'set-decimal!)
|
|
(readtable:next table
|
|
number-builder
|
|
port)))
|
|
(list readtable:update (list #\e #\s #\f #\d #\l)
|
|
(lambda (table char number-builder port)
|
|
(number-builder 'set-exponential! char)
|
|
(readtable:next table
|
|
number-builder
|
|
port)))
|
|
(list readtable:update #\_ readtable:skip)
|
|
(list readtable:update #\i
|
|
(lambda (table char number-builder port)
|
|
(number-builder 'set-imaginary!)
|
|
(number-builder 'finalize)))))
|
|
|
|
;;; Generate a lexer given a list of numbers in the radix.
|
|
(define readtable:for-radix
|
|
(lambda (radix-list)
|
|
(readtable:process
|
|
readtable:number-main-table
|
|
(list readtable:update radix-list
|
|
(lambda (table char number-builder port)
|
|
(number-builder 'push! char)
|
|
(readtable:next table number-builder port))))))
|
|
|
|
(define readtable:for-bin
|
|
(readtable:for-radix '(#\0 #\1)))
|
|
(define readtable:for-oct
|
|
(readtable:for-radix '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7)))
|
|
(define readtable:for-dec
|
|
(readtable:for-radix '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)))
|
|
(define readtable:for-hex
|
|
(readtable:for-radix '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9
|
|
#\a #\b #\c #\d #\e #\f)))
|
|
|
|
;;; Read the sign at the start of a number.
|
|
(define readtable:read-sign
|
|
(readtable:process
|
|
(readtable:empty/default
|
|
(lambda (_ char number-builder port)
|
|
(readtable:act (number-builder 'radix-table)
|
|
char
|
|
number-builder
|
|
port)))
|
|
(list readtable:update (list #\+ #\-)
|
|
(lambda (_ char number-builder port)
|
|
(number-builder 'sign! char)
|
|
(readtable:next (number-builder 'radix-table)
|
|
number-builder
|
|
port)))
|
|
(list readtable:update #\_ readtable:skip)))
|
|
|
|
(define readtable:read-prefix-char
|
|
(let ((act (lambda args
|
|
(lambda (_ char number-builder port)
|
|
(apply number-builder args)
|
|
(readtable:next readtable:read-number-prefix
|
|
number-builder
|
|
port)))))
|
|
(readtable:process
|
|
(readtable:empty/default (readtable:error "invalid prefix"))
|
|
(list readtable:update #\i (act 'exactness! 'inexact))
|
|
(list readtable:update #\e (act 'exactness! 'exact))
|
|
(list readtable:update #\b (act 'radix-table! readtable:for-bin))
|
|
(list readtable:update #\o (act 'radix-table! readtable:for-oct))
|
|
(list readtable:update #\d (act 'radix-table! readtable:for-dec))
|
|
(list readtable:update #\x (act 'radix-table! readtable:for-hex)))))
|
|
|
|
(define readtable:read-number-prefix
|
|
(readtable:process
|
|
(readtable:empty/default (readtable:jump readtable:read-sign))
|
|
(list readtable:update #\# (readtable:jump/next readtable:read-prefix-char))))
|
|
|
|
;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;; Reader for stuff that start with "#"
|
|
;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(define readtable:digits
|
|
(list #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9))
|
|
|
|
(define readtable:vector
|
|
(lambda (_ __ toplevel port)
|
|
(list 'vector (readtable:read-proper-list toplevel port))))
|
|
|
|
;;; Block comment reader.
|
|
;;;
|
|
;;; The outermost block comment reader is passed the toplevel reader as
|
|
;;; ACC. When the outermost block is finished, it will tail-call ACC.
|
|
;;; (It is basically the continuation of the reader.)
|
|
;;;
|
|
;;; When a nested block comment is found, it is passed #F as ACC, which
|
|
;;; it will not call. It will return an unspecified value.
|
|
;;;
|
|
;;; Since the read tables are not procedures, references to other tables
|
|
;;; in the same LETREC declaration must be protected with explicit LAMBDAs.
|
|
;;; Macros could make this much easier to read.
|
|
(define readtable:block-comment
|
|
(letrec ((potential-end
|
|
(readtable:process
|
|
(readtable:empty/default
|
|
(lambda (this char acc port) (readtable:act
|
|
loop
|
|
char
|
|
acc
|
|
port)))
|
|
(list readtable:update #\#
|
|
(lambda (this char acc port)
|
|
(if acc
|
|
(readtable:next acc #f port))))))
|
|
(potential-start
|
|
(readtable:process
|
|
(readtable:empty/default
|
|
(lambda (this char acc port) (readtable:act
|
|
loop
|
|
char
|
|
acc
|
|
port)))
|
|
(list readtable:update #\|
|
|
(lambda (this char acc port)
|
|
(readtable:next loop #f port)
|
|
(readtable:next loop acc port)))))
|
|
(loop
|
|
(readtable:process
|
|
(readtable:empty/default readtable:skip)
|
|
(list readtable:update #\#
|
|
(lambda (this char acc port)
|
|
(readtable:next potential-start
|
|
acc
|
|
port)))
|
|
(list readtable:update #\|
|
|
(lambda (this char acc port)
|
|
(readtable:next potential-end
|
|
acc
|
|
port))))))
|
|
loop))
|
|
|
|
(define read:datum-label
|
|
(lambda (location toplevel)
|
|
(let ((finalized? #f)
|
|
(value '())
|
|
(name (read:ident-builder location)))
|
|
(lambda (op . args)
|
|
(cond
|
|
((eq? op 'finalize-value)
|
|
(if finalized?
|
|
(error 'datum-label "cannot finalize twice"))
|
|
(set! finalized? #t)
|
|
(set! value (car args))
|
|
(set! toplevel #f))
|
|
((eq? op 'finalize->ident)
|
|
(set! name (name 'finalize->ident)))
|
|
((eq? op 'value)
|
|
(if (not finalized?)
|
|
(error 'datum-label "value called before finalize"))
|
|
value)
|
|
((eq? op 'toplevel) toplevel)
|
|
((eq? op 'type) 'datum-label)
|
|
((eq? op 'as-string) (name 'value))
|
|
(else (apply name op args)))))))
|
|
|
|
;;; Readtable for the number part of a datum label / reference. A label
|
|
;;; looks like "#[NUMBER]=" and a reference looks like "#[NUMBER]#".
|
|
;;;
|
|
;;; Datum label assignments #XXX=[DATUM] return DATUM.
|
|
;;; Datum label references #XXX# return a DATUM-LABEL object, because
|
|
;;; circular references must be resolved later. (With some trickery it
|
|
;;; cold be resolved as soon as possible, but that requires knowledge of
|
|
;;; what is complex data and what is an atom. This requires knowing all
|
|
;;; possible values that the toplevel could read, which is not possible
|
|
;;; because new items can be added to the toplevel.)
|
|
(define readtable:datum-label-next
|
|
(readtable:process
|
|
(readtable:empty/default (readtable:error 'datum-label-next
|
|
"invalid datum label/ref"))
|
|
(list readtable:update readtable:digits readtable:push-char)
|
|
(list readtable:update #\=
|
|
(lambda (_ __ acc port)
|
|
(acc 'finalize->ident)
|
|
(port 'set-datum-label! (acc 'as-string) acc)
|
|
(let ((next-value (readtable:next (acc 'toplevel)
|
|
#f
|
|
port)))
|
|
(if (eqv? acc next-value)
|
|
(error 'datum-label-next "datum label cannot be itself"))
|
|
(acc 'finalize-value next-value)
|
|
(acc 'value))))
|
|
(list readtable:update #\#
|
|
(lambda (_ __ acc port)
|
|
(acc 'finalize->ident)
|
|
(let ((datum-label-container (port 'get-datum-label
|
|
(acc 'as-string))))
|
|
(if (null? datum-label-container)
|
|
(error 'datum-label-next
|
|
"unknown reference to datum label" label)
|
|
(map:val datum-label-container)))))))
|
|
|
|
;;; Reads the next toplevel datum, discards it, and then continues at the
|
|
;;; toplevel.
|
|
;;;
|
|
;;; The R7RS reader can cause side-effects due to #!FOLD-CASE and datum
|
|
;;; labels. This must be supressed in datum comments.
|
|
(define readtable:datum-comment
|
|
(lambda (_ __ toplevel port)
|
|
(let ((mutable (port 'dump-mutable)))
|
|
(readtable:next toplevel #f port)
|
|
(port 'restore-mutable! mutable))
|
|
(readtable:next toplevel #f port)))
|
|
|
|
;;; ;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;; Handling hash messages
|
|
;;;
|
|
;;; A "hash message" is any identifier that immediately follows a "#".
|
|
;;; This can be a constant, a part of a constant (like for numbers), or
|
|
;;; a directive (like #!FOLD-CASE).
|
|
|
|
;;; String map from constants to procedures with formal arguments
|
|
;;; (TOPLEVEL PORT)
|
|
;;; with TOPLEVEL being the current toplevel table and PORT being the
|
|
;;; current port being read from.
|
|
;;;
|
|
;;; Each one must return something. Directives that return nothing must
|
|
;;; call the toplevel again.
|
|
(define read:hash-messages
|
|
(smap:insert-many
|
|
'()
|
|
(cons "true" (lambda unused #t))
|
|
(cons "false" (lambda unused #f))
|
|
(cons "t" (lambda unused #t))
|
|
(cons "f" (lambda unused #f))
|
|
(cons "u8" (lambda (toplevel port)
|
|
(list 'bytevector
|
|
(readtable:expect-proper-list toplevel port))))
|
|
(cons "!fold-case"
|
|
(lambda (toplevel port)
|
|
(port 'fold-case! #t)
|
|
(readtable:act toplevel (port 'read) #f port)))
|
|
(cons "!no-fold-case"
|
|
(lambda (toplevel port)
|
|
(port 'fold-case! #f)
|
|
(readtable:act toplevel (port 'read) #f port)))))
|
|
|
|
(define read:read-hash-messages
|
|
(lambda (_ char acc port)
|
|
(let ((ident (readtable:read-ident #f char #f port)))
|
|
(let ((container (smap:search read:hash-messages
|
|
(ident 'value))))
|
|
(if (null? container)
|
|
(let ((number-port (ident->read (port 'location) ident)))
|
|
(number-port 'fold-case! #t)
|
|
(readtable:next readtable:read-prefix-char
|
|
(read:number-builder readtable:for-dec
|
|
(port 'location))
|
|
number-port))
|
|
((map:val container) acc port))))))
|
|
|
|
;;; ;;;;;;;;;;;;;;;;;;;;;;
|
|
;;; Characters
|
|
;;;
|
|
;;; For completeness, this adds a few more newline characters that are C
|
|
;;; escape sequences but are not standard Scheme.
|
|
|
|
(define read:named-characters
|
|
(smap:insert-many
|
|
'()
|
|
(cons "null" (integer->char 0))
|
|
(cons "alarm" (integer->char #x7))
|
|
(cons "backspace" (integer->char #x8))
|
|
(cons "tab" (integer->char #x9))
|
|
(cons "newline" (integer->char #xA))
|
|
(cons "vertical-tab" (integer->char #xB))
|
|
(cons "form-feed" (integer->char #xC))
|
|
(cons "return" (integer->char #xD))
|
|
(cons "escape" (integer->char #x1B))
|
|
(cons "space" (integer->char #x20))
|
|
(cons "delete" (integer->char #x7F))))
|
|
|
|
(define readtable:character
|
|
(lambda (_ char __ port)
|
|
(let ((char (port 'read)))
|
|
(let ((ident (readtable:read-ident #f char #f port)))
|
|
(let ((container (smap:search read:named-characters
|
|
(ident 'value))))
|
|
(display (ident 'value))
|
|
(newline)
|
|
(cond
|
|
((and (null? container)
|
|
(= (string-length (ident 'value)) 1))
|
|
(string-ref (ident 'value) 0))
|
|
((null? container) (error 'character 'unknown-character ident))
|
|
(else (map:val container))))))))
|
|
|
|
(define readtable:hash
|
|
(readtable:process
|
|
(readtable:empty/default read:read-hash-messages)
|
|
(list readtable:update readtable:ASCII-whitespace
|
|
(readtable:error 'hash 'whitespace-after-hash))
|
|
(list readtable:update #\| (readtable:jump/next readtable:block-comment))
|
|
(list readtable:update #\; readtable:datum-comment)
|
|
(list readtable:update #\\ readtable:character)
|
|
(list readtable:update readtable:digits ; Datum labels
|
|
(lambda (_ char toplevel port)
|
|
(readtable:act readtable:datum-label-next
|
|
char
|
|
(read:datum-label (port 'location) toplevel)
|
|
port)))
|
|
(list readtable:update %bol readtable:vector)))
|
|
|
|
;;; Read a decimal number without prefixes.
|
|
(define readtable:create-number
|
|
(lambda (toplevel number-char _ port)
|
|
(readtable:act readtable:read-sign
|
|
number-char
|
|
(read:number-builder readtable:for-dec
|
|
(port 'location))
|
|
port)))
|
|
|
|
(define readtable:check-for-special-ident
|
|
(readtable:process
|
|
(readtable:empty/default
|
|
(lambda (_ char ident-builder port)
|
|
(let ((return-ident (readtable:act readtable:identifier
|
|
char
|
|
ident-builder
|
|
port)))
|
|
(let ((val (return-ident 'value))
|
|
(loc (return-ident 'location)))
|
|
(cond
|
|
((equal? val "+inf.0") (read:special-number 'inf #\+ loc))
|
|
((equal? val "-inf.0") (read:special-number 'inf #\- loc))
|
|
((equal? val "+nan.0") (read:special-number 'nan #\+ loc))
|
|
((equal? val "-nan.0") (read:special-number 'nan #\- loc))
|
|
(else val))))))
|
|
(list readtable:update '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
|
|
(lambda (_ char ident-builder port)
|
|
(let ((ident (ident-builder 'finalize->ident)))
|
|
(let ((number-builder (read:number-builder
|
|
readtable:for-dec
|
|
(ident 'location))))
|
|
(number-builder 'sign! (string-ref (ident 'value) 0))
|
|
(readtable:act readtable:for-dec char number-builder port)))))))
|
|
|
|
(define read:ident-or-number
|
|
(lambda (_ sign __ port)
|
|
(readtable:next readtable:check-for-special-ident
|
|
(read:ident-builder (port 'location) sign)
|
|
port)))
|
|
|
|
;;; ;;;;;;;;;;;;;;;;
|
|
;;; Toplevel reader.
|
|
;;; ;;;;;;;;;;;;;;;;
|
|
;;; This is defined as a function so that it dynamically loads each
|
|
;;; sub-readtable.
|
|
(define readtable:top
|
|
(lambda ()
|
|
(readtable:process
|
|
(readtable:empty/default read:read-ident)
|
|
(list readtable:update '(#\+ #\-) read:ident-or-number)
|
|
(list readtable:update readtable:ASCII-whitespace readtable:skip)
|
|
(list readtable:update #f (readtable:return 'eof))
|
|
(list readtable:update %bol readtable:read-list)
|
|
(list readtable:update %eol (readtable:error 'top "unbalanced list"))
|
|
(list readtable:update #\# (readtable:next/old-as-acc
|
|
readtable:hash))
|
|
(list readtable:update '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
|
|
readtable:create-number)
|
|
(list readtable:update #\;
|
|
(readtable:jump-discard readtable:read-to-newline)))))
|
|
|
|
(define read/toplevel
|
|
(lambda (port)
|
|
;; Does each invocation of READ keep datum labels from previous reads?
|
|
;; (port 'clear-datum-labels!)
|
|
(readtable:next (readtable:top)
|
|
#f
|
|
port)))
|
|
|
|
(define intset:insert (set:insert (set:update integer<=>)))
|
|
(define intset:in (set:in integer<=>))
|
|
|
|
(define uncycle
|
|
(lambda (value)
|
|
(let ((cntr 0)
|
|
(used-counters '())
|
|
(pointers '()))
|
|
(letrec ((uncycle
|
|
(lambda (value)
|
|
(cond
|
|
((pair? value)
|
|
(let ((pair (assq value pointers)))
|
|
(if (pair? pair)
|
|
(begin
|
|
(set! used-counters
|
|
(car (intset:insert used-counters (cdr pair))))
|
|
(list 'ref (cdr pair)))
|
|
(begin
|
|
(set! pointers (cons (cons value cntr)
|
|
pointers))
|
|
(let ((cur-cntr cntr))
|
|
(set! cntr (+ 1 cntr))
|
|
(let ((returned (cons (uncycle (car value))
|
|
(uncycle (cdr value)))))
|
|
(if (not (null? (intset:in used-counters cur-cntr)))
|
|
(list 'def cur-cntr '= returned)
|
|
returned)))))))
|
|
((procedure? value)
|
|
(let ((type (value 'type)))
|
|
(cond
|
|
((eq? type 'ident) (value 'value))
|
|
((eq? type 'datum-label) (uncycle (value 'value)))
|
|
(else (vector 'unrepresentable type)))))
|
|
(else value)))))
|
|
(uncycle value)))))
|
|
|
|
;;; ;;;;;;;;;;;
|
|
;;; Test reader
|
|
;;; ;;;;;;;;;;;
|
|
(define %list->read
|
|
(lambda (seq)
|
|
(port->read
|
|
(lambda ()
|
|
(if (null? seq)
|
|
#f
|
|
(let ((ch (car seq)))
|
|
(set! seq (cdr seq))
|
|
ch)))
|
|
"test")))
|
|
|
|
(define read-all
|
|
(lambda (str)
|
|
(let ((reader (%list->read (string->list str))))
|
|
(letrec ((loop
|
|
(lambda ()
|
|
(if (not (reader 'peek))
|
|
#t
|
|
(let ((value (read/toplevel reader)))
|
|
(display (list "return" (uncycle value)))
|
|
(newline)
|
|
(loop))))))
|
|
(loop)))))
|
|
|
|
(read-all "x yy zz ; this is a comment\nx call/cc ")
|
|
(read-all "(a b c def (ghi j) k )")
|
|
(read-all "( a . b )")
|
|
(read-all "( a .b . c)")
|
|
(read-all "#( a b y)")
|
|
(read-all "(x y #| this is a block\n comment\n |# z w)")
|
|
(read-all "#( a b #| this is a #| nested block |# comment|# z w)")
|
|
(read-all "#(a b #(c #|close#|comment|#|#y))")
|
|
(read-all "(this has a #;(call with (current continuation)) datum comment)")
|
|
(read-all "#0=(#0# not unix)")
|
|
(read-all "#!no-fold-case #!fold-case #!NO-FOLD-CASE #false")
|
|
(read-all "#!fold-case #TRUE")
|
|
(read-all "#u8(x y z w)")
|
|
(read-all "#\\newline")
|
|
(read-all "#\\a")
|
|
|
|
(read-all "#b11")
|
|
(read-all "#XFF")
|
|
(read-all "#i#x10")
|
|
(read-all "#d1234.5678")
|
|
(read-all "#e10.5")
|
|
(read-all "#d10.24f12")
|
|
(read-all "#d-i")
|
|
(read-all "100i")
|
|
(read-all "+5")
|
|
(read-all "++1+")
|
|
(read-all "+inf.0")
|
|
(read-all "-inf.0")
|
|
(read-all "+nan.0")
|
|
(read-all "-100.5e5")
|
|
|