diff --git a/read.scm b/read.scm index 1007c00..fb60fdf 100644 --- a/read.scm +++ b/read.scm @@ -54,6 +54,16 @@ (lambda () (read-char port)))) +(define string->read-function + (lambda (str) + (let ((i -1) + (length (string-length str))) + (lambda () + (set! i (+ i 1)) + (if (>= i length) + #f + (string-ref str i)))))) + ;;; READ: ;;; ;;; Stream readers contain mutable state. This is the case-folding mode @@ -67,9 +77,13 @@ ;;; (FOLD-CASE?): Returns a boolean if case folding is enabled. ;;; (FOLD-CASE! BOOL): Sets the case folding to BOOL. (define port->read - (lambda (read-function filename) - (let ((line-number 1) - (offset 0) + (lambda (read-function filename . location) + (let ((line-number (if (null? location) + 1 + (car location))) + (offset (if (null? location) + 0 + (cadr location))) (pushback-buffer '()) (datum-labels '()) (fold-case? #f)) @@ -141,6 +155,11 @@ (else (error 'read->port 'invalid (cons op args))))))) port)))) +(define ident->read + (lambda (location ident) + (apply port->read (string->read-function (ident 'value)) + location))) + ;;; ;;;;;;;;;;;;;; ;;; Character maps ;;; ;;;;;;;;;;;;;; @@ -180,6 +199,7 @@ ;;; An "action" is a procedure that takes four arguments: ;;; ;;; TABLE: The current table. + ;;; CHAR: The character that was matched against the CHARMAP in TABLE. ;;; ACC: An arbitrary "accumulator" value that is different depending ;;; on the readtable in question. @@ -210,18 +230,16 @@ ;;; Return a new readtable where CHAR is bound to ACTION. (define readtable:update - (lambda (table char action) - (readtable:new (%readtable:default-action table) - (car (charmap:insert - (%readtable:charmap table) char action))))) - -;;; Update TABLE to act on all characters in LST with ACTION. -(define readtable:update-list - (lambda (table lst action) - (fold (lambda (char table) - (readtable:update table char action)) - table - lst))) + (lambda (table chars action) + (let ((chars (if (pair? chars) + chars + (list chars)))) + (fold (lambda (char table) + (readtable:new (%readtable:default-action table) + (car (charmap:insert + (%readtable:charmap table) char action)))) + table + chars)))) ;;; Construct new readtable with no characters in its map and ;;; DEFAULT-ACTION as the default action. @@ -392,7 +410,7 @@ (readtable:empty/default (readtable:error 'read-improper-cdr "improper list has 1 cdr")) - (list readtable:update-list + (list readtable:update readtable:ASCII-whitespace readtable:skip) (list readtable:update %eol @@ -458,6 +476,236 @@ (error 'expect-port-list 'no-list-found) (readtable:read-proper-list table port)))) +;;; ;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Implementation of a Pratt parser. In a Pratt parser, all tokens are +;;; "operators" with "precedence". +;;; +;;; Since Pratt parsers are implemented in terms of procedures, they can +;;; parse any grammar. +;;; +;;; This file implements the algorithm found in: +;;; +;;; van de Vanter, Michael L. "A Formalization and Correctness Proof of +;;; the CGOL Language System." (Master's Thesis). MIT Laboratory for +;;; Computer Science Technical Report MIT-LCS-TR-147 (Cambridge, +;;; Massachusetts). 1975. https://hdl.handle.net/1721.1/149442. + +;;; (PRATT:OPERATOR PRECEDENCE PROCEDURE) +;;; constructs a description of an operator. +;;; +;;; PRECEDENCE is an exact number describing the precedence of the +;;; operator, and for portability should only be an integer. +;;; +;;; (The special EOF operator, which always exists, has negative +;;; infinity precedence.) +;;; +;;; PROCEDURE is of the form (PROCEDURE OPERATORS LEFT TOKEN READER). +;;; OPERATORS is the operators map. +;;; LEFT is a PRATT:TOKEN, which was to the left of the operator. +;;; TOKEN is the token read from the reader wit the same type. +;;; READER is a reader object. +;;; +;;; PROCEDURE consumes tokens to obtain the +;;; right hand side of its expression. PROCEDURE produces a PRATT:TOKEN. +(define pratt:operator cons) +(define pratt:precedence car) +(define pratt:procedure cdr) + +(define pratt:search-token + (lambda (operators token) + (smap:search operators (symbol->string (token 'type))))) + +;;; Parse a stream from READER, where the precedence limit is specified as +;;; BINDING-POWER. If BINDING-POWER is #F, then there is no precedence +;;; limit. Operators are read from OPERATORS, which is a SMAP. +(define pratt + (lambda (operators binding-power reader) + (letrec + ((loop + (lambda (left) + (let ((token (reader 'peek))) + (let ((operator (pratt:search-token operators token))) + (if (null? next-operator) + (error 'pratt 'unknown-operator token + operators binding-power reader)) + (if (and binding-power + (>= (pratt:precedence operator) + binding-power)) + left + (parse operator left (reader 'read) reader)))))) + (parse + (lambda (operator left token reader) + (loop ((pratt:procedure operator) + operators + left + token + reader))))) + ;; Read the first token. + (let ((token (reader 'read))) + (let ((operator (pratt:search-token operators token))) + (if (null? operator) + (error 'pratt 'unknown-operator token + operators binding-power reader)) + (parse operator #f token reader)))))) + +;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Math reader. +;;; +;;; Scheme numbers are a kludge. A proper LISP would either represent +;;; numbers as +;;; +;;; (+ 4 (*i 6)) or (@ (. 12 34) 2) or (e 12 3) etc. +;;; +;;; or just bite the bullet and include a full infix expression parser. +;;; +;;; This is a full infix expression parser. It will parse expressions +;;; such as SIN(X) or X * 5I/6. Identifiers and functions are any Scheme +;;; object. +;;; +;;; The math reader can be invoked using #MATH(expr). EXPR is an infix +;;; mathematical expression. #PREFIX(expr) reads EXPR in the normal +;;; reader. All other hash identifiers are unsupported in #MATH. + +;;; The basic number reader. The basic syntax is +;;; +;;; PREFIXES* SIGN? (RADIX+ ("." RADIX+)? | "." RADIX+) ("e" RADIX+)? "i"? +;;; Prefixes are handled by the "#" reader. + +;;; Incremental builder for a number, driven by the lexer. +;;; TODO: how will this be turned into a number, in the end? +(define read:number-builder + (lambda (radix-table) + (let ((main '()) + (decimal '()) + (exp-char #f) + (exp-digits '()) + (imaginary? #f) + (current-part 'main) + (exactness #f)) + (let ((append-part + (lambda (char) + (cond + ((eq? current-part 'main) + (set! main (cons char main))) + ((eq? current-part 'decimal) + (set! decimal (cons char decimal))) + ((eq? current-part 'exp) + (set! exp-digits (cons char exp-digits))))))) + (lambda (op . args) + (cond + ((eq? op 'finalize) + (if (not exactness) + (set! exactness 'exact)) + (list 'number-builder + main exactness decimal exp-char exp-digits imaginary?)) + ((eq? op 'exactness!) (set! exactness (car args))) + ((eq? op 'radix-table!) (set! radix-table (car args))) + ((eq? op 'set-imaginary!) + (set! imaginary? #t)) + ((eq? op 'set-decimal!) + (if (not exactness) + (set! exactness 'inexact)) + ;; + (if (not (eq? current-part 'main)) + (error 'number-builder "cannot set to decimal" current-part) + (set! current-part 'decimal))) + ((eq? op 'set-exponential!) + (if (not exactness) + (set! exactness 'inexact)) + ;; + (if (eq? current-part 'exp) + (error 'number-builder "cannot set to exponential")) + (set! current-part 'exp) + (set! exp-char (car args))) + ;; + ((eq? op 'radix-table) radix-table) + (else (append-part op)))))))) + +(define readtable:return-number + (lambda (_ char acc port) + (port 'push char) + (acc 'finalize))) + +;;; Main lexer for number table. +(define readtable:number-main-table + (readtable:process + (readtable:empty/default readtable:return-number) + (list readtable:update #\. + (lambda (table char number-builder port) + (number-builder 'set-decimal!) + (readtable:next table + number-builder + port))) + (list readtable:update (list #\e #\s #\f #\d #\l) + (lambda (table char number-builder port) + (number-builder 'set-exponential! char) + (readtable:next table + number-builder + port))) + (list readtable:update #\_ readtable:skip) + (list readtable:update #\i + (lambda (table char number-builder port) + (number-builder 'set-imaginary!) + (number-builder 'finalize))))) + +;;; Generate a lexer given a list of numbers in the radix. +(define readtable:for-radix + (lambda (radix-list) + (readtable:process + readtable:number-main-table + (list readtable:update radix-list + (lambda (table char number-builder port) + (number-builder char) + (readtable:next table number-builder port)))))) + +(define readtable:for-bin + (readtable:for-radix '(#\0 #\1))) +(define readtable:for-oct + (readtable:for-radix '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7))) +(define readtable:for-dec + (readtable:for-radix '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9))) +(define readtable:for-hex + (readtable:for-radix '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 + #\a #\b #\c #\d #\e #\f))) + +;;; Read the sign at the start of a number. +(define readtable:read-sign + (readtable:process + (readtable:empty/default + (lambda (_ char number-builder port) + (readtable:act (number-builder 'radix-table) + char + number-builder + port))) + (list readtable:update (list #\+ #\-) + (lambda (_ char number-builder port) + (number-builder 'sign! char) + (readtable:next (number-builder 'radix-table) + number-builder + port))) + (list readtable:update #\_ readtable:skip))) + +(define readtable:read-prefix-char + (let ((act (lambda args + (lambda (_ char number-builder port) + (apply number-builder args) + (readtable:next readtable:read-number-prefix + number-builder + port))))) + (readtable:process + (readtable:empty/default (readtable:error "invalid prefix")) + (list readtable:update #\i (act 'exactness! 'inexact)) + (list readtable:update #\e (act 'exactness! 'exact)) + (list readtable:update #\b (act 'radix-table! readtable:for-bin)) + (list readtable:update #\o (act 'radix-table! readtable:for-oct)) + (list readtable:update #\d (act 'radix-table! readtable:for-dec)) + (list readtable:update #\x (act 'radix-table! readtable:for-hex))))) + +(define readtable:read-number-prefix + (readtable:process + (readtable:empty/default (readtable:jump readtable:read-sign)) + (list readtable:update #\# (readtable:jump/next readtable:read-prefix-char)))) + ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Reader for stuff that start with "#" ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -559,7 +807,7 @@ (readtable:process (readtable:empty/default (readtable:error 'datum-label-next "invalid datum label/ref")) - (list readtable:update-list readtable:digits readtable:push-char) + (list readtable:update readtable:digits readtable:push-char) (list readtable:update #\= (lambda (_ __ acc port) (acc 'finalize->ident) @@ -631,10 +879,12 @@ (let ((ident (readtable:read-ident #f char #f port))) (let ((container (smap:search read:hash-messages (ident 'value)))) - ;; TODO: check if the hash message starts a number. (if (null? container) - (error 'read-hash-messages 'unknown-constant - (ident 'value)) + (let ((number-port (ident->read (port 'location) ident))) + (number-port 'fold-case! #t) + (readtable:next readtable:read-prefix-char + (read:number-builder readtable:for-dec) + number-port)) ((map:val container) acc port)))))) ;;; ;;;;;;;;;;;;;;;;;;;;;; @@ -676,12 +926,12 @@ (define readtable:hash (readtable:process (readtable:empty/default read:read-hash-messages) - (list readtable:update-list readtable:ASCII-whitespace + (list readtable:update readtable:ASCII-whitespace (readtable:error 'hash 'whitespace-after-hash)) (list readtable:update #\| (readtable:jump/next readtable:block-comment)) (list readtable:update #\; readtable:datum-comment) (list readtable:update #\\ readtable:character) - (list readtable:update-list readtable:digits ; Datum labels + (list readtable:update readtable:digits ; Datum labels (lambda (_ char toplevel port) (readtable:act readtable:datum-label-next char @@ -698,7 +948,7 @@ (lambda () (readtable:process (readtable:empty/default readtable:read-ident) - (list readtable:update-list readtable:ASCII-whitespace readtable:skip) + (list readtable:update readtable:ASCII-whitespace readtable:skip) (list readtable:update #f (readtable:return 'eof)) (list readtable:update %bol readtable:read-list) (list readtable:update %eol (readtable:error 'top "unbalanced list")) @@ -794,3 +1044,10 @@ (read-all "#u8(x y z w)") (read-all "#\\newline") (read-all "#\\a") + +(read-all "#b11") +(read-all "#XFF") +(read-all "#i#x10") +(read-all "#d1234.5678") +(read-all "#e10.5") +(read-all "#d10.24f12")