add basic number reader

This commit is contained in:
Peter McGoron 2024-10-13 10:32:01 -04:00
parent 3a3da137cf
commit 6950a2f4e5
1 changed files with 280 additions and 23 deletions

303
read.scm
View File

@ -54,6 +54,16 @@
(lambda ()
(read-char port))))
(define string->read-function
(lambda (str)
(let ((i -1)
(length (string-length str)))
(lambda ()
(set! i (+ i 1))
(if (>= i length)
#f
(string-ref str i))))))
;;; READ:
;;;
;;; Stream readers contain mutable state. This is the case-folding mode
@ -67,9 +77,13 @@
;;; (FOLD-CASE?): Returns a boolean if case folding is enabled.
;;; (FOLD-CASE! BOOL): Sets the case folding to BOOL.
(define port->read
(lambda (read-function filename)
(let ((line-number 1)
(offset 0)
(lambda (read-function filename . location)
(let ((line-number (if (null? location)
1
(car location)))
(offset (if (null? location)
0
(cadr location)))
(pushback-buffer '())
(datum-labels '())
(fold-case? #f))
@ -141,6 +155,11 @@
(else (error 'read->port 'invalid (cons op args)))))))
port))))
(define ident->read
(lambda (location ident)
(apply port->read (string->read-function (ident 'value))
location)))
;;; ;;;;;;;;;;;;;;
;;; Character maps
;;; ;;;;;;;;;;;;;;
@ -180,6 +199,7 @@
;;; An "action" is a procedure that takes four arguments:
;;;
;;; TABLE: The current table.
;;; CHAR: The character that was matched against the CHARMAP in TABLE.
;;; ACC: An arbitrary "accumulator" value that is different depending
;;; on the readtable in question.
@ -210,18 +230,16 @@
;;; Return a new readtable where CHAR is bound to ACTION.
(define readtable:update
(lambda (table char action)
(readtable:new (%readtable:default-action table)
(car (charmap:insert
(%readtable:charmap table) char action)))))
;;; Update TABLE to act on all characters in LST with ACTION.
(define readtable:update-list
(lambda (table lst action)
(fold (lambda (char table)
(readtable:update table char action))
table
lst)))
(lambda (table chars action)
(let ((chars (if (pair? chars)
chars
(list chars))))
(fold (lambda (char table)
(readtable:new (%readtable:default-action table)
(car (charmap:insert
(%readtable:charmap table) char action))))
table
chars))))
;;; Construct new readtable with no characters in its map and
;;; DEFAULT-ACTION as the default action.
@ -392,7 +410,7 @@
(readtable:empty/default (readtable:error
'read-improper-cdr
"improper list has 1 cdr"))
(list readtable:update-list
(list readtable:update
readtable:ASCII-whitespace
readtable:skip)
(list readtable:update %eol
@ -458,6 +476,236 @@
(error 'expect-port-list 'no-list-found)
(readtable:read-proper-list table port))))
;;; ;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Implementation of a Pratt parser. In a Pratt parser, all tokens are
;;; "operators" with "precedence".
;;;
;;; Since Pratt parsers are implemented in terms of procedures, they can
;;; parse any grammar.
;;;
;;; This file implements the algorithm found in:
;;;
;;; van de Vanter, Michael L. "A Formalization and Correctness Proof of
;;; the CGOL Language System." (Master's Thesis). MIT Laboratory for
;;; Computer Science Technical Report MIT-LCS-TR-147 (Cambridge,
;;; Massachusetts). 1975. https://hdl.handle.net/1721.1/149442.
;;; (PRATT:OPERATOR PRECEDENCE PROCEDURE)
;;; constructs a description of an operator.
;;;
;;; PRECEDENCE is an exact number describing the precedence of the
;;; operator, and for portability should only be an integer.
;;;
;;; (The special EOF operator, which always exists, has negative
;;; infinity precedence.)
;;;
;;; PROCEDURE is of the form (PROCEDURE OPERATORS LEFT TOKEN READER).
;;; OPERATORS is the operators map.
;;; LEFT is a PRATT:TOKEN, which was to the left of the operator.
;;; TOKEN is the token read from the reader wit the same type.
;;; READER is a reader object.
;;;
;;; PROCEDURE consumes tokens to obtain the
;;; right hand side of its expression. PROCEDURE produces a PRATT:TOKEN.
(define pratt:operator cons)
(define pratt:precedence car)
(define pratt:procedure cdr)
(define pratt:search-token
(lambda (operators token)
(smap:search operators (symbol->string (token 'type)))))
;;; Parse a stream from READER, where the precedence limit is specified as
;;; BINDING-POWER. If BINDING-POWER is #F, then there is no precedence
;;; limit. Operators are read from OPERATORS, which is a SMAP.
(define pratt
(lambda (operators binding-power reader)
(letrec
((loop
(lambda (left)
(let ((token (reader 'peek)))
(let ((operator (pratt:search-token operators token)))
(if (null? next-operator)
(error 'pratt 'unknown-operator token
operators binding-power reader))
(if (and binding-power
(>= (pratt:precedence operator)
binding-power))
left
(parse operator left (reader 'read) reader))))))
(parse
(lambda (operator left token reader)
(loop ((pratt:procedure operator)
operators
left
token
reader)))))
;; Read the first token.
(let ((token (reader 'read)))
(let ((operator (pratt:search-token operators token)))
(if (null? operator)
(error 'pratt 'unknown-operator token
operators binding-power reader))
(parse operator #f token reader))))))
;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Math reader.
;;;
;;; Scheme numbers are a kludge. A proper LISP would either represent
;;; numbers as
;;;
;;; (+ 4 (*i 6)) or (@ (. 12 34) 2) or (e 12 3) etc.
;;;
;;; or just bite the bullet and include a full infix expression parser.
;;;
;;; This is a full infix expression parser. It will parse expressions
;;; such as SIN(X) or X * 5I/6. Identifiers and functions are any Scheme
;;; object.
;;;
;;; The math reader can be invoked using #MATH(expr). EXPR is an infix
;;; mathematical expression. #PREFIX(expr) reads EXPR in the normal
;;; reader. All other hash identifiers are unsupported in #MATH.
;;; The basic number reader. The basic syntax is
;;;
;;; PREFIXES* SIGN? (RADIX+ ("." RADIX+)? | "." RADIX+) ("e" RADIX+)? "i"?
;;; Prefixes are handled by the "#" reader.
;;; Incremental builder for a number, driven by the lexer.
;;; TODO: how will this be turned into a number, in the end?
(define read:number-builder
(lambda (radix-table)
(let ((main '())
(decimal '())
(exp-char #f)
(exp-digits '())
(imaginary? #f)
(current-part 'main)
(exactness #f))
(let ((append-part
(lambda (char)
(cond
((eq? current-part 'main)
(set! main (cons char main)))
((eq? current-part 'decimal)
(set! decimal (cons char decimal)))
((eq? current-part 'exp)
(set! exp-digits (cons char exp-digits)))))))
(lambda (op . args)
(cond
((eq? op 'finalize)
(if (not exactness)
(set! exactness 'exact))
(list 'number-builder
main exactness decimal exp-char exp-digits imaginary?))
((eq? op 'exactness!) (set! exactness (car args)))
((eq? op 'radix-table!) (set! radix-table (car args)))
((eq? op 'set-imaginary!)
(set! imaginary? #t))
((eq? op 'set-decimal!)
(if (not exactness)
(set! exactness 'inexact))
;;
(if (not (eq? current-part 'main))
(error 'number-builder "cannot set to decimal" current-part)
(set! current-part 'decimal)))
((eq? op 'set-exponential!)
(if (not exactness)
(set! exactness 'inexact))
;;
(if (eq? current-part 'exp)
(error 'number-builder "cannot set to exponential"))
(set! current-part 'exp)
(set! exp-char (car args)))
;;
((eq? op 'radix-table) radix-table)
(else (append-part op))))))))
(define readtable:return-number
(lambda (_ char acc port)
(port 'push char)
(acc 'finalize)))
;;; Main lexer for number table.
(define readtable:number-main-table
(readtable:process
(readtable:empty/default readtable:return-number)
(list readtable:update #\.
(lambda (table char number-builder port)
(number-builder 'set-decimal!)
(readtable:next table
number-builder
port)))
(list readtable:update (list #\e #\s #\f #\d #\l)
(lambda (table char number-builder port)
(number-builder 'set-exponential! char)
(readtable:next table
number-builder
port)))
(list readtable:update #\_ readtable:skip)
(list readtable:update #\i
(lambda (table char number-builder port)
(number-builder 'set-imaginary!)
(number-builder 'finalize)))))
;;; Generate a lexer given a list of numbers in the radix.
(define readtable:for-radix
(lambda (radix-list)
(readtable:process
readtable:number-main-table
(list readtable:update radix-list
(lambda (table char number-builder port)
(number-builder char)
(readtable:next table number-builder port))))))
(define readtable:for-bin
(readtable:for-radix '(#\0 #\1)))
(define readtable:for-oct
(readtable:for-radix '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7)))
(define readtable:for-dec
(readtable:for-radix '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)))
(define readtable:for-hex
(readtable:for-radix '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9
#\a #\b #\c #\d #\e #\f)))
;;; Read the sign at the start of a number.
(define readtable:read-sign
(readtable:process
(readtable:empty/default
(lambda (_ char number-builder port)
(readtable:act (number-builder 'radix-table)
char
number-builder
port)))
(list readtable:update (list #\+ #\-)
(lambda (_ char number-builder port)
(number-builder 'sign! char)
(readtable:next (number-builder 'radix-table)
number-builder
port)))
(list readtable:update #\_ readtable:skip)))
(define readtable:read-prefix-char
(let ((act (lambda args
(lambda (_ char number-builder port)
(apply number-builder args)
(readtable:next readtable:read-number-prefix
number-builder
port)))))
(readtable:process
(readtable:empty/default (readtable:error "invalid prefix"))
(list readtable:update #\i (act 'exactness! 'inexact))
(list readtable:update #\e (act 'exactness! 'exact))
(list readtable:update #\b (act 'radix-table! readtable:for-bin))
(list readtable:update #\o (act 'radix-table! readtable:for-oct))
(list readtable:update #\d (act 'radix-table! readtable:for-dec))
(list readtable:update #\x (act 'radix-table! readtable:for-hex)))))
(define readtable:read-number-prefix
(readtable:process
(readtable:empty/default (readtable:jump readtable:read-sign))
(list readtable:update #\# (readtable:jump/next readtable:read-prefix-char))))
;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Reader for stuff that start with "#"
;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -559,7 +807,7 @@
(readtable:process
(readtable:empty/default (readtable:error 'datum-label-next
"invalid datum label/ref"))
(list readtable:update-list readtable:digits readtable:push-char)
(list readtable:update readtable:digits readtable:push-char)
(list readtable:update #\=
(lambda (_ __ acc port)
(acc 'finalize->ident)
@ -631,10 +879,12 @@
(let ((ident (readtable:read-ident #f char #f port)))
(let ((container (smap:search read:hash-messages
(ident 'value))))
;; TODO: check if the hash message starts a number.
(if (null? container)
(error 'read-hash-messages 'unknown-constant
(ident 'value))
(let ((number-port (ident->read (port 'location) ident)))
(number-port 'fold-case! #t)
(readtable:next readtable:read-prefix-char
(read:number-builder readtable:for-dec)
number-port))
((map:val container) acc port))))))
;;; ;;;;;;;;;;;;;;;;;;;;;;
@ -676,12 +926,12 @@
(define readtable:hash
(readtable:process
(readtable:empty/default read:read-hash-messages)
(list readtable:update-list readtable:ASCII-whitespace
(list readtable:update readtable:ASCII-whitespace
(readtable:error 'hash 'whitespace-after-hash))
(list readtable:update #\| (readtable:jump/next readtable:block-comment))
(list readtable:update #\; readtable:datum-comment)
(list readtable:update #\\ readtable:character)
(list readtable:update-list readtable:digits ; Datum labels
(list readtable:update readtable:digits ; Datum labels
(lambda (_ char toplevel port)
(readtable:act readtable:datum-label-next
char
@ -698,7 +948,7 @@
(lambda ()
(readtable:process
(readtable:empty/default readtable:read-ident)
(list readtable:update-list readtable:ASCII-whitespace readtable:skip)
(list readtable:update readtable:ASCII-whitespace readtable:skip)
(list readtable:update #f (readtable:return 'eof))
(list readtable:update %bol readtable:read-list)
(list readtable:update %eol (readtable:error 'top "unbalanced list"))
@ -794,3 +1044,10 @@
(read-all "#u8(x y z w)")
(read-all "#\\newline")
(read-all "#\\a")
(read-all "#b11")
(read-all "#XFF")
(read-all "#i#x10")
(read-all "#d1234.5678")
(read-all "#e10.5")
(read-all "#d10.24f12")