aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Peter McGoron 2024-10-13 10:32:01 -0400
committerGravatar Peter McGoron 2024-10-13 10:32:01 -0400
commit6950a2f4e585cfb04c805e9ea0c1beaa12f0b9e7 (patch)
tree0b34295e9d0b3c960e3f9d476d77405c0b577db3
parentread: make whitespace after "#" an error (diff)
add basic number reader
-rw-r--r--read.scm303
1 files changed, 280 insertions, 23 deletions
diff --git a/read.scm b/read.scm
index 1007c00..fb60fdf 100644
--- a/read.scm
+++ b/read.scm
@@ -54,6 +54,16 @@
(lambda ()
(read-char port))))
+(define string->read-function
+ (lambda (str)
+ (let ((i -1)
+ (length (string-length str)))
+ (lambda ()
+ (set! i (+ i 1))
+ (if (>= i length)
+ #f
+ (string-ref str i))))))
+
;;; READ:
;;;
;;; Stream readers contain mutable state. This is the case-folding mode
@@ -67,9 +77,13 @@
;;; (FOLD-CASE?): Returns a boolean if case folding is enabled.
;;; (FOLD-CASE! BOOL): Sets the case folding to BOOL.
(define port->read
- (lambda (read-function filename)
- (let ((line-number 1)
- (offset 0)
+ (lambda (read-function filename . location)
+ (let ((line-number (if (null? location)
+ 1
+ (car location)))
+ (offset (if (null? location)
+ 0
+ (cadr location)))
(pushback-buffer '())
(datum-labels '())
(fold-case? #f))
@@ -141,6 +155,11 @@
(else (error 'read->port 'invalid (cons op args)))))))
port))))
+(define ident->read
+ (lambda (location ident)
+ (apply port->read (string->read-function (ident 'value))
+ location)))
+
;;; ;;;;;;;;;;;;;;
;;; Character maps
;;; ;;;;;;;;;;;;;;
@@ -180,6 +199,7 @@
;;; An "action" is a procedure that takes four arguments:
;;;
;;; TABLE: The current table.
+
;;; CHAR: The character that was matched against the CHARMAP in TABLE.
;;; ACC: An arbitrary "accumulator" value that is different depending
;;; on the readtable in question.
@@ -210,18 +230,16 @@
;;; Return a new readtable where CHAR is bound to ACTION.
(define readtable:update
- (lambda (table char action)
- (readtable:new (%readtable:default-action table)
- (car (charmap:insert
- (%readtable:charmap table) char action)))))
-
-;;; Update TABLE to act on all characters in LST with ACTION.
-(define readtable:update-list
- (lambda (table lst action)
- (fold (lambda (char table)
- (readtable:update table char action))
- table
- lst)))
+ (lambda (table chars action)
+ (let ((chars (if (pair? chars)
+ chars
+ (list chars))))
+ (fold (lambda (char table)
+ (readtable:new (%readtable:default-action table)
+ (car (charmap:insert
+ (%readtable:charmap table) char action))))
+ table
+ chars))))
;;; Construct new readtable with no characters in its map and
;;; DEFAULT-ACTION as the default action.
@@ -392,7 +410,7 @@
(readtable:empty/default (readtable:error
'read-improper-cdr
"improper list has 1 cdr"))
- (list readtable:update-list
+ (list readtable:update
readtable:ASCII-whitespace
readtable:skip)
(list readtable:update %eol
@@ -458,6 +476,236 @@
(error 'expect-port-list 'no-list-found)
(readtable:read-proper-list table port))))
+;;; ;;;;;;;;;;;;;;;;;;;;;;;;;
+;;; Implementation of a Pratt parser. In a Pratt parser, all tokens are
+;;; "operators" with "precedence".
+;;;
+;;; Since Pratt parsers are implemented in terms of procedures, they can
+;;; parse any grammar.
+;;;
+;;; This file implements the algorithm found in:
+;;;
+;;; van de Vanter, Michael L. "A Formalization and Correctness Proof of
+;;; the CGOL Language System." (Master's Thesis). MIT Laboratory for
+;;; Computer Science Technical Report MIT-LCS-TR-147 (Cambridge,
+;;; Massachusetts). 1975. https://hdl.handle.net/1721.1/149442.
+
+;;; (PRATT:OPERATOR PRECEDENCE PROCEDURE)
+;;; constructs a description of an operator.
+;;;
+;;; PRECEDENCE is an exact number describing the precedence of the
+;;; operator, and for portability should only be an integer.
+;;;
+;;; (The special EOF operator, which always exists, has negative
+;;; infinity precedence.)
+;;;
+;;; PROCEDURE is of the form (PROCEDURE OPERATORS LEFT TOKEN READER).
+;;; OPERATORS is the operators map.
+;;; LEFT is a PRATT:TOKEN, which was to the left of the operator.
+;;; TOKEN is the token read from the reader wit the same type.
+;;; READER is a reader object.
+;;;
+;;; PROCEDURE consumes tokens to obtain the
+;;; right hand side of its expression. PROCEDURE produces a PRATT:TOKEN.
+(define pratt:operator cons)
+(define pratt:precedence car)
+(define pratt:procedure cdr)
+
+(define pratt:search-token
+ (lambda (operators token)
+ (smap:search operators (symbol->string (token 'type)))))
+
+;;; Parse a stream from READER, where the precedence limit is specified as
+;;; BINDING-POWER. If BINDING-POWER is #F, then there is no precedence
+;;; limit. Operators are read from OPERATORS, which is a SMAP.
+(define pratt
+ (lambda (operators binding-power reader)
+ (letrec
+ ((loop
+ (lambda (left)
+ (let ((token (reader 'peek)))
+ (let ((operator (pratt:search-token operators token)))
+ (if (null? next-operator)
+ (error 'pratt 'unknown-operator token
+ operators binding-power reader))
+ (if (and binding-power
+ (>= (pratt:precedence operator)
+ binding-power))
+ left
+ (parse operator left (reader 'read) reader))))))
+ (parse
+ (lambda (operator left token reader)
+ (loop ((pratt:procedure operator)
+ operators
+ left
+ token
+ reader)))))
+ ;; Read the first token.
+ (let ((token (reader 'read)))
+ (let ((operator (pratt:search-token operators token)))
+ (if (null? operator)
+ (error 'pratt 'unknown-operator token
+ operators binding-power reader))
+ (parse operator #f token reader))))))
+
+;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;; Math reader.
+;;;
+;;; Scheme numbers are a kludge. A proper LISP would either represent
+;;; numbers as
+;;;
+;;; (+ 4 (*i 6)) or (@ (. 12 34) 2) or (e 12 3) etc.
+;;;
+;;; or just bite the bullet and include a full infix expression parser.
+;;;
+;;; This is a full infix expression parser. It will parse expressions
+;;; such as SIN(X) or X * 5I/6. Identifiers and functions are any Scheme
+;;; object.
+;;;
+;;; The math reader can be invoked using #MATH(expr). EXPR is an infix
+;;; mathematical expression. #PREFIX(expr) reads EXPR in the normal
+;;; reader. All other hash identifiers are unsupported in #MATH.
+
+;;; The basic number reader. The basic syntax is
+;;;
+;;; PREFIXES* SIGN? (RADIX+ ("." RADIX+)? | "." RADIX+) ("e" RADIX+)? "i"?
+;;; Prefixes are handled by the "#" reader.
+
+;;; Incremental builder for a number, driven by the lexer.
+;;; TODO: how will this be turned into a number, in the end?
+(define read:number-builder
+ (lambda (radix-table)
+ (let ((main '())
+ (decimal '())
+ (exp-char #f)
+ (exp-digits '())
+ (imaginary? #f)
+ (current-part 'main)
+ (exactness #f))
+ (let ((append-part
+ (lambda (char)
+ (cond
+ ((eq? current-part 'main)
+ (set! main (cons char main)))
+ ((eq? current-part 'decimal)
+ (set! decimal (cons char decimal)))
+ ((eq? current-part 'exp)
+ (set! exp-digits (cons char exp-digits)))))))
+ (lambda (op . args)
+ (cond
+ ((eq? op 'finalize)
+ (if (not exactness)
+ (set! exactness 'exact))
+ (list 'number-builder
+ main exactness decimal exp-char exp-digits imaginary?))
+ ((eq? op 'exactness!) (set! exactness (car args)))
+ ((eq? op 'radix-table!) (set! radix-table (car args)))
+ ((eq? op 'set-imaginary!)
+ (set! imaginary? #t))
+ ((eq? op 'set-decimal!)
+ (if (not exactness)
+ (set! exactness 'inexact))
+ ;;
+ (if (not (eq? current-part 'main))
+ (error 'number-builder "cannot set to decimal" current-part)
+ (set! current-part 'decimal)))
+ ((eq? op 'set-exponential!)
+ (if (not exactness)
+ (set! exactness 'inexact))
+ ;;
+ (if (eq? current-part 'exp)
+ (error 'number-builder "cannot set to exponential"))
+ (set! current-part 'exp)
+ (set! exp-char (car args)))
+ ;;
+ ((eq? op 'radix-table) radix-table)
+ (else (append-part op))))))))
+
+(define readtable:return-number
+ (lambda (_ char acc port)
+ (port 'push char)
+ (acc 'finalize)))
+
+;;; Main lexer for number table.
+(define readtable:number-main-table
+ (readtable:process
+ (readtable:empty/default readtable:return-number)
+ (list readtable:update #\.
+ (lambda (table char number-builder port)
+ (number-builder 'set-decimal!)
+ (readtable:next table
+ number-builder
+ port)))
+ (list readtable:update (list #\e #\s #\f #\d #\l)
+ (lambda (table char number-builder port)
+ (number-builder 'set-exponential! char)
+ (readtable:next table
+ number-builder
+ port)))
+ (list readtable:update #\_ readtable:skip)
+ (list readtable:update #\i
+ (lambda (table char number-builder port)
+ (number-builder 'set-imaginary!)
+ (number-builder 'finalize)))))
+
+;;; Generate a lexer given a list of numbers in the radix.
+(define readtable:for-radix
+ (lambda (radix-list)
+ (readtable:process
+ readtable:number-main-table
+ (list readtable:update radix-list
+ (lambda (table char number-builder port)
+ (number-builder char)
+ (readtable:next table number-builder port))))))
+
+(define readtable:for-bin
+ (readtable:for-radix '(#\0 #\1)))
+(define readtable:for-oct
+ (readtable:for-radix '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7)))
+(define readtable:for-dec
+ (readtable:for-radix '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)))
+(define readtable:for-hex
+ (readtable:for-radix '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9
+ #\a #\b #\c #\d #\e #\f)))
+
+;;; Read the sign at the start of a number.
+(define readtable:read-sign
+ (readtable:process
+ (readtable:empty/default
+ (lambda (_ char number-builder port)
+ (readtable:act (number-builder 'radix-table)
+ char
+ number-builder
+ port)))
+ (list readtable:update (list #\+ #\-)
+ (lambda (_ char number-builder port)
+ (number-builder 'sign! char)
+ (readtable:next (number-builder 'radix-table)
+ number-builder
+ port)))
+ (list readtable:update #\_ readtable:skip)))
+
+(define readtable:read-prefix-char
+ (let ((act (lambda args
+ (lambda (_ char number-builder port)
+ (apply number-builder args)
+ (readtable:next readtable:read-number-prefix
+ number-builder
+ port)))))
+ (readtable:process
+ (readtable:empty/default (readtable:error "invalid prefix"))
+ (list readtable:update #\i (act 'exactness! 'inexact))
+ (list readtable:update #\e (act 'exactness! 'exact))
+ (list readtable:update #\b (act 'radix-table! readtable:for-bin))
+ (list readtable:update #\o (act 'radix-table! readtable:for-oct))
+ (list readtable:update #\d (act 'radix-table! readtable:for-dec))
+ (list readtable:update #\x (act 'radix-table! readtable:for-hex)))))
+
+(define readtable:read-number-prefix
+ (readtable:process
+ (readtable:empty/default (readtable:jump readtable:read-sign))
+ (list readtable:update #\# (readtable:jump/next readtable:read-prefix-char))))
+
;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Reader for stuff that start with "#"
;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -559,7 +807,7 @@
(readtable:process
(readtable:empty/default (readtable:error 'datum-label-next
"invalid datum label/ref"))
- (list readtable:update-list readtable:digits readtable:push-char)
+ (list readtable:update readtable:digits readtable:push-char)
(list readtable:update #\=
(lambda (_ __ acc port)
(acc 'finalize->ident)
@@ -631,10 +879,12 @@
(let ((ident (readtable:read-ident #f char #f port)))
(let ((container (smap:search read:hash-messages
(ident 'value))))
- ;; TODO: check if the hash message starts a number.
(if (null? container)
- (error 'read-hash-messages 'unknown-constant
- (ident 'value))
+ (let ((number-port (ident->read (port 'location) ident)))
+ (number-port 'fold-case! #t)
+ (readtable:next readtable:read-prefix-char
+ (read:number-builder readtable:for-dec)
+ number-port))
((map:val container) acc port))))))
;;; ;;;;;;;;;;;;;;;;;;;;;;
@@ -676,12 +926,12 @@
(define readtable:hash
(readtable:process
(readtable:empty/default read:read-hash-messages)
- (list readtable:update-list readtable:ASCII-whitespace
+ (list readtable:update readtable:ASCII-whitespace
(readtable:error 'hash 'whitespace-after-hash))
(list readtable:update #\| (readtable:jump/next readtable:block-comment))
(list readtable:update #\; readtable:datum-comment)
(list readtable:update #\\ readtable:character)
- (list readtable:update-list readtable:digits ; Datum labels
+ (list readtable:update readtable:digits ; Datum labels
(lambda (_ char toplevel port)
(readtable:act readtable:datum-label-next
char
@@ -698,7 +948,7 @@
(lambda ()
(readtable:process
(readtable:empty/default readtable:read-ident)
- (list readtable:update-list readtable:ASCII-whitespace readtable:skip)
+ (list readtable:update readtable:ASCII-whitespace readtable:skip)
(list readtable:update #f (readtable:return 'eof))
(list readtable:update %bol readtable:read-list)
(list readtable:update %eol (readtable:error 'top "unbalanced list"))
@@ -794,3 +1044,10 @@
(read-all "#u8(x y z w)")
(read-all "#\\newline")
(read-all "#\\a")
+
+(read-all "#b11")
+(read-all "#XFF")
+(read-all "#i#x10")
+(read-all "#d1234.5678")
+(read-all "#e10.5")
+(read-all "#d10.24f12")