add basic number reader
This commit is contained in:
parent
3a3da137cf
commit
6950a2f4e5
303
read.scm
303
read.scm
|
@ -54,6 +54,16 @@
|
||||||
(lambda ()
|
(lambda ()
|
||||||
(read-char port))))
|
(read-char port))))
|
||||||
|
|
||||||
|
(define string->read-function
|
||||||
|
(lambda (str)
|
||||||
|
(let ((i -1)
|
||||||
|
(length (string-length str)))
|
||||||
|
(lambda ()
|
||||||
|
(set! i (+ i 1))
|
||||||
|
(if (>= i length)
|
||||||
|
#f
|
||||||
|
(string-ref str i))))))
|
||||||
|
|
||||||
;;; READ:
|
;;; READ:
|
||||||
;;;
|
;;;
|
||||||
;;; Stream readers contain mutable state. This is the case-folding mode
|
;;; Stream readers contain mutable state. This is the case-folding mode
|
||||||
|
@ -67,9 +77,13 @@
|
||||||
;;; (FOLD-CASE?): Returns a boolean if case folding is enabled.
|
;;; (FOLD-CASE?): Returns a boolean if case folding is enabled.
|
||||||
;;; (FOLD-CASE! BOOL): Sets the case folding to BOOL.
|
;;; (FOLD-CASE! BOOL): Sets the case folding to BOOL.
|
||||||
(define port->read
|
(define port->read
|
||||||
(lambda (read-function filename)
|
(lambda (read-function filename . location)
|
||||||
(let ((line-number 1)
|
(let ((line-number (if (null? location)
|
||||||
(offset 0)
|
1
|
||||||
|
(car location)))
|
||||||
|
(offset (if (null? location)
|
||||||
|
0
|
||||||
|
(cadr location)))
|
||||||
(pushback-buffer '())
|
(pushback-buffer '())
|
||||||
(datum-labels '())
|
(datum-labels '())
|
||||||
(fold-case? #f))
|
(fold-case? #f))
|
||||||
|
@ -141,6 +155,11 @@
|
||||||
(else (error 'read->port 'invalid (cons op args)))))))
|
(else (error 'read->port 'invalid (cons op args)))))))
|
||||||
port))))
|
port))))
|
||||||
|
|
||||||
|
(define ident->read
|
||||||
|
(lambda (location ident)
|
||||||
|
(apply port->read (string->read-function (ident 'value))
|
||||||
|
location)))
|
||||||
|
|
||||||
;;; ;;;;;;;;;;;;;;
|
;;; ;;;;;;;;;;;;;;
|
||||||
;;; Character maps
|
;;; Character maps
|
||||||
;;; ;;;;;;;;;;;;;;
|
;;; ;;;;;;;;;;;;;;
|
||||||
|
@ -180,6 +199,7 @@
|
||||||
;;; An "action" is a procedure that takes four arguments:
|
;;; An "action" is a procedure that takes four arguments:
|
||||||
;;;
|
;;;
|
||||||
;;; TABLE: The current table.
|
;;; TABLE: The current table.
|
||||||
|
|
||||||
;;; CHAR: The character that was matched against the CHARMAP in TABLE.
|
;;; CHAR: The character that was matched against the CHARMAP in TABLE.
|
||||||
;;; ACC: An arbitrary "accumulator" value that is different depending
|
;;; ACC: An arbitrary "accumulator" value that is different depending
|
||||||
;;; on the readtable in question.
|
;;; on the readtable in question.
|
||||||
|
@ -210,18 +230,16 @@
|
||||||
|
|
||||||
;;; Return a new readtable where CHAR is bound to ACTION.
|
;;; Return a new readtable where CHAR is bound to ACTION.
|
||||||
(define readtable:update
|
(define readtable:update
|
||||||
(lambda (table char action)
|
(lambda (table chars action)
|
||||||
(readtable:new (%readtable:default-action table)
|
(let ((chars (if (pair? chars)
|
||||||
(car (charmap:insert
|
chars
|
||||||
(%readtable:charmap table) char action)))))
|
(list chars))))
|
||||||
|
(fold (lambda (char table)
|
||||||
;;; Update TABLE to act on all characters in LST with ACTION.
|
(readtable:new (%readtable:default-action table)
|
||||||
(define readtable:update-list
|
(car (charmap:insert
|
||||||
(lambda (table lst action)
|
(%readtable:charmap table) char action))))
|
||||||
(fold (lambda (char table)
|
table
|
||||||
(readtable:update table char action))
|
chars))))
|
||||||
table
|
|
||||||
lst)))
|
|
||||||
|
|
||||||
;;; Construct new readtable with no characters in its map and
|
;;; Construct new readtable with no characters in its map and
|
||||||
;;; DEFAULT-ACTION as the default action.
|
;;; DEFAULT-ACTION as the default action.
|
||||||
|
@ -392,7 +410,7 @@
|
||||||
(readtable:empty/default (readtable:error
|
(readtable:empty/default (readtable:error
|
||||||
'read-improper-cdr
|
'read-improper-cdr
|
||||||
"improper list has 1 cdr"))
|
"improper list has 1 cdr"))
|
||||||
(list readtable:update-list
|
(list readtable:update
|
||||||
readtable:ASCII-whitespace
|
readtable:ASCII-whitespace
|
||||||
readtable:skip)
|
readtable:skip)
|
||||||
(list readtable:update %eol
|
(list readtable:update %eol
|
||||||
|
@ -458,6 +476,236 @@
|
||||||
(error 'expect-port-list 'no-list-found)
|
(error 'expect-port-list 'no-list-found)
|
||||||
(readtable:read-proper-list table port))))
|
(readtable:read-proper-list table port))))
|
||||||
|
|
||||||
|
;;; ;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; Implementation of a Pratt parser. In a Pratt parser, all tokens are
|
||||||
|
;;; "operators" with "precedence".
|
||||||
|
;;;
|
||||||
|
;;; Since Pratt parsers are implemented in terms of procedures, they can
|
||||||
|
;;; parse any grammar.
|
||||||
|
;;;
|
||||||
|
;;; This file implements the algorithm found in:
|
||||||
|
;;;
|
||||||
|
;;; van de Vanter, Michael L. "A Formalization and Correctness Proof of
|
||||||
|
;;; the CGOL Language System." (Master's Thesis). MIT Laboratory for
|
||||||
|
;;; Computer Science Technical Report MIT-LCS-TR-147 (Cambridge,
|
||||||
|
;;; Massachusetts). 1975. https://hdl.handle.net/1721.1/149442.
|
||||||
|
|
||||||
|
;;; (PRATT:OPERATOR PRECEDENCE PROCEDURE)
|
||||||
|
;;; constructs a description of an operator.
|
||||||
|
;;;
|
||||||
|
;;; PRECEDENCE is an exact number describing the precedence of the
|
||||||
|
;;; operator, and for portability should only be an integer.
|
||||||
|
;;;
|
||||||
|
;;; (The special EOF operator, which always exists, has negative
|
||||||
|
;;; infinity precedence.)
|
||||||
|
;;;
|
||||||
|
;;; PROCEDURE is of the form (PROCEDURE OPERATORS LEFT TOKEN READER).
|
||||||
|
;;; OPERATORS is the operators map.
|
||||||
|
;;; LEFT is a PRATT:TOKEN, which was to the left of the operator.
|
||||||
|
;;; TOKEN is the token read from the reader wit the same type.
|
||||||
|
;;; READER is a reader object.
|
||||||
|
;;;
|
||||||
|
;;; PROCEDURE consumes tokens to obtain the
|
||||||
|
;;; right hand side of its expression. PROCEDURE produces a PRATT:TOKEN.
|
||||||
|
(define pratt:operator cons)
|
||||||
|
(define pratt:precedence car)
|
||||||
|
(define pratt:procedure cdr)
|
||||||
|
|
||||||
|
(define pratt:search-token
|
||||||
|
(lambda (operators token)
|
||||||
|
(smap:search operators (symbol->string (token 'type)))))
|
||||||
|
|
||||||
|
;;; Parse a stream from READER, where the precedence limit is specified as
|
||||||
|
;;; BINDING-POWER. If BINDING-POWER is #F, then there is no precedence
|
||||||
|
;;; limit. Operators are read from OPERATORS, which is a SMAP.
|
||||||
|
(define pratt
|
||||||
|
(lambda (operators binding-power reader)
|
||||||
|
(letrec
|
||||||
|
((loop
|
||||||
|
(lambda (left)
|
||||||
|
(let ((token (reader 'peek)))
|
||||||
|
(let ((operator (pratt:search-token operators token)))
|
||||||
|
(if (null? next-operator)
|
||||||
|
(error 'pratt 'unknown-operator token
|
||||||
|
operators binding-power reader))
|
||||||
|
(if (and binding-power
|
||||||
|
(>= (pratt:precedence operator)
|
||||||
|
binding-power))
|
||||||
|
left
|
||||||
|
(parse operator left (reader 'read) reader))))))
|
||||||
|
(parse
|
||||||
|
(lambda (operator left token reader)
|
||||||
|
(loop ((pratt:procedure operator)
|
||||||
|
operators
|
||||||
|
left
|
||||||
|
token
|
||||||
|
reader)))))
|
||||||
|
;; Read the first token.
|
||||||
|
(let ((token (reader 'read)))
|
||||||
|
(let ((operator (pratt:search-token operators token)))
|
||||||
|
(if (null? operator)
|
||||||
|
(error 'pratt 'unknown-operator token
|
||||||
|
operators binding-power reader))
|
||||||
|
(parse operator #f token reader))))))
|
||||||
|
|
||||||
|
;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; Math reader.
|
||||||
|
;;;
|
||||||
|
;;; Scheme numbers are a kludge. A proper LISP would either represent
|
||||||
|
;;; numbers as
|
||||||
|
;;;
|
||||||
|
;;; (+ 4 (*i 6)) or (@ (. 12 34) 2) or (e 12 3) etc.
|
||||||
|
;;;
|
||||||
|
;;; or just bite the bullet and include a full infix expression parser.
|
||||||
|
;;;
|
||||||
|
;;; This is a full infix expression parser. It will parse expressions
|
||||||
|
;;; such as SIN(X) or X * 5I/6. Identifiers and functions are any Scheme
|
||||||
|
;;; object.
|
||||||
|
;;;
|
||||||
|
;;; The math reader can be invoked using #MATH(expr). EXPR is an infix
|
||||||
|
;;; mathematical expression. #PREFIX(expr) reads EXPR in the normal
|
||||||
|
;;; reader. All other hash identifiers are unsupported in #MATH.
|
||||||
|
|
||||||
|
;;; The basic number reader. The basic syntax is
|
||||||
|
;;;
|
||||||
|
;;; PREFIXES* SIGN? (RADIX+ ("." RADIX+)? | "." RADIX+) ("e" RADIX+)? "i"?
|
||||||
|
;;; Prefixes are handled by the "#" reader.
|
||||||
|
|
||||||
|
;;; Incremental builder for a number, driven by the lexer.
|
||||||
|
;;; TODO: how will this be turned into a number, in the end?
|
||||||
|
(define read:number-builder
|
||||||
|
(lambda (radix-table)
|
||||||
|
(let ((main '())
|
||||||
|
(decimal '())
|
||||||
|
(exp-char #f)
|
||||||
|
(exp-digits '())
|
||||||
|
(imaginary? #f)
|
||||||
|
(current-part 'main)
|
||||||
|
(exactness #f))
|
||||||
|
(let ((append-part
|
||||||
|
(lambda (char)
|
||||||
|
(cond
|
||||||
|
((eq? current-part 'main)
|
||||||
|
(set! main (cons char main)))
|
||||||
|
((eq? current-part 'decimal)
|
||||||
|
(set! decimal (cons char decimal)))
|
||||||
|
((eq? current-part 'exp)
|
||||||
|
(set! exp-digits (cons char exp-digits)))))))
|
||||||
|
(lambda (op . args)
|
||||||
|
(cond
|
||||||
|
((eq? op 'finalize)
|
||||||
|
(if (not exactness)
|
||||||
|
(set! exactness 'exact))
|
||||||
|
(list 'number-builder
|
||||||
|
main exactness decimal exp-char exp-digits imaginary?))
|
||||||
|
((eq? op 'exactness!) (set! exactness (car args)))
|
||||||
|
((eq? op 'radix-table!) (set! radix-table (car args)))
|
||||||
|
((eq? op 'set-imaginary!)
|
||||||
|
(set! imaginary? #t))
|
||||||
|
((eq? op 'set-decimal!)
|
||||||
|
(if (not exactness)
|
||||||
|
(set! exactness 'inexact))
|
||||||
|
;;
|
||||||
|
(if (not (eq? current-part 'main))
|
||||||
|
(error 'number-builder "cannot set to decimal" current-part)
|
||||||
|
(set! current-part 'decimal)))
|
||||||
|
((eq? op 'set-exponential!)
|
||||||
|
(if (not exactness)
|
||||||
|
(set! exactness 'inexact))
|
||||||
|
;;
|
||||||
|
(if (eq? current-part 'exp)
|
||||||
|
(error 'number-builder "cannot set to exponential"))
|
||||||
|
(set! current-part 'exp)
|
||||||
|
(set! exp-char (car args)))
|
||||||
|
;;
|
||||||
|
((eq? op 'radix-table) radix-table)
|
||||||
|
(else (append-part op))))))))
|
||||||
|
|
||||||
|
(define readtable:return-number
|
||||||
|
(lambda (_ char acc port)
|
||||||
|
(port 'push char)
|
||||||
|
(acc 'finalize)))
|
||||||
|
|
||||||
|
;;; Main lexer for number table.
|
||||||
|
(define readtable:number-main-table
|
||||||
|
(readtable:process
|
||||||
|
(readtable:empty/default readtable:return-number)
|
||||||
|
(list readtable:update #\.
|
||||||
|
(lambda (table char number-builder port)
|
||||||
|
(number-builder 'set-decimal!)
|
||||||
|
(readtable:next table
|
||||||
|
number-builder
|
||||||
|
port)))
|
||||||
|
(list readtable:update (list #\e #\s #\f #\d #\l)
|
||||||
|
(lambda (table char number-builder port)
|
||||||
|
(number-builder 'set-exponential! char)
|
||||||
|
(readtable:next table
|
||||||
|
number-builder
|
||||||
|
port)))
|
||||||
|
(list readtable:update #\_ readtable:skip)
|
||||||
|
(list readtable:update #\i
|
||||||
|
(lambda (table char number-builder port)
|
||||||
|
(number-builder 'set-imaginary!)
|
||||||
|
(number-builder 'finalize)))))
|
||||||
|
|
||||||
|
;;; Generate a lexer given a list of numbers in the radix.
|
||||||
|
(define readtable:for-radix
|
||||||
|
(lambda (radix-list)
|
||||||
|
(readtable:process
|
||||||
|
readtable:number-main-table
|
||||||
|
(list readtable:update radix-list
|
||||||
|
(lambda (table char number-builder port)
|
||||||
|
(number-builder char)
|
||||||
|
(readtable:next table number-builder port))))))
|
||||||
|
|
||||||
|
(define readtable:for-bin
|
||||||
|
(readtable:for-radix '(#\0 #\1)))
|
||||||
|
(define readtable:for-oct
|
||||||
|
(readtable:for-radix '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7)))
|
||||||
|
(define readtable:for-dec
|
||||||
|
(readtable:for-radix '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)))
|
||||||
|
(define readtable:for-hex
|
||||||
|
(readtable:for-radix '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9
|
||||||
|
#\a #\b #\c #\d #\e #\f)))
|
||||||
|
|
||||||
|
;;; Read the sign at the start of a number.
|
||||||
|
(define readtable:read-sign
|
||||||
|
(readtable:process
|
||||||
|
(readtable:empty/default
|
||||||
|
(lambda (_ char number-builder port)
|
||||||
|
(readtable:act (number-builder 'radix-table)
|
||||||
|
char
|
||||||
|
number-builder
|
||||||
|
port)))
|
||||||
|
(list readtable:update (list #\+ #\-)
|
||||||
|
(lambda (_ char number-builder port)
|
||||||
|
(number-builder 'sign! char)
|
||||||
|
(readtable:next (number-builder 'radix-table)
|
||||||
|
number-builder
|
||||||
|
port)))
|
||||||
|
(list readtable:update #\_ readtable:skip)))
|
||||||
|
|
||||||
|
(define readtable:read-prefix-char
|
||||||
|
(let ((act (lambda args
|
||||||
|
(lambda (_ char number-builder port)
|
||||||
|
(apply number-builder args)
|
||||||
|
(readtable:next readtable:read-number-prefix
|
||||||
|
number-builder
|
||||||
|
port)))))
|
||||||
|
(readtable:process
|
||||||
|
(readtable:empty/default (readtable:error "invalid prefix"))
|
||||||
|
(list readtable:update #\i (act 'exactness! 'inexact))
|
||||||
|
(list readtable:update #\e (act 'exactness! 'exact))
|
||||||
|
(list readtable:update #\b (act 'radix-table! readtable:for-bin))
|
||||||
|
(list readtable:update #\o (act 'radix-table! readtable:for-oct))
|
||||||
|
(list readtable:update #\d (act 'radix-table! readtable:for-dec))
|
||||||
|
(list readtable:update #\x (act 'radix-table! readtable:for-hex)))))
|
||||||
|
|
||||||
|
(define readtable:read-number-prefix
|
||||||
|
(readtable:process
|
||||||
|
(readtable:empty/default (readtable:jump readtable:read-sign))
|
||||||
|
(list readtable:update #\# (readtable:jump/next readtable:read-prefix-char))))
|
||||||
|
|
||||||
;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;;; Reader for stuff that start with "#"
|
;;; Reader for stuff that start with "#"
|
||||||
;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
@ -559,7 +807,7 @@
|
||||||
(readtable:process
|
(readtable:process
|
||||||
(readtable:empty/default (readtable:error 'datum-label-next
|
(readtable:empty/default (readtable:error 'datum-label-next
|
||||||
"invalid datum label/ref"))
|
"invalid datum label/ref"))
|
||||||
(list readtable:update-list readtable:digits readtable:push-char)
|
(list readtable:update readtable:digits readtable:push-char)
|
||||||
(list readtable:update #\=
|
(list readtable:update #\=
|
||||||
(lambda (_ __ acc port)
|
(lambda (_ __ acc port)
|
||||||
(acc 'finalize->ident)
|
(acc 'finalize->ident)
|
||||||
|
@ -631,10 +879,12 @@
|
||||||
(let ((ident (readtable:read-ident #f char #f port)))
|
(let ((ident (readtable:read-ident #f char #f port)))
|
||||||
(let ((container (smap:search read:hash-messages
|
(let ((container (smap:search read:hash-messages
|
||||||
(ident 'value))))
|
(ident 'value))))
|
||||||
;; TODO: check if the hash message starts a number.
|
|
||||||
(if (null? container)
|
(if (null? container)
|
||||||
(error 'read-hash-messages 'unknown-constant
|
(let ((number-port (ident->read (port 'location) ident)))
|
||||||
(ident 'value))
|
(number-port 'fold-case! #t)
|
||||||
|
(readtable:next readtable:read-prefix-char
|
||||||
|
(read:number-builder readtable:for-dec)
|
||||||
|
number-port))
|
||||||
((map:val container) acc port))))))
|
((map:val container) acc port))))))
|
||||||
|
|
||||||
;;; ;;;;;;;;;;;;;;;;;;;;;;
|
;;; ;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
@ -676,12 +926,12 @@
|
||||||
(define readtable:hash
|
(define readtable:hash
|
||||||
(readtable:process
|
(readtable:process
|
||||||
(readtable:empty/default read:read-hash-messages)
|
(readtable:empty/default read:read-hash-messages)
|
||||||
(list readtable:update-list readtable:ASCII-whitespace
|
(list readtable:update readtable:ASCII-whitespace
|
||||||
(readtable:error 'hash 'whitespace-after-hash))
|
(readtable:error 'hash 'whitespace-after-hash))
|
||||||
(list readtable:update #\| (readtable:jump/next readtable:block-comment))
|
(list readtable:update #\| (readtable:jump/next readtable:block-comment))
|
||||||
(list readtable:update #\; readtable:datum-comment)
|
(list readtable:update #\; readtable:datum-comment)
|
||||||
(list readtable:update #\\ readtable:character)
|
(list readtable:update #\\ readtable:character)
|
||||||
(list readtable:update-list readtable:digits ; Datum labels
|
(list readtable:update readtable:digits ; Datum labels
|
||||||
(lambda (_ char toplevel port)
|
(lambda (_ char toplevel port)
|
||||||
(readtable:act readtable:datum-label-next
|
(readtable:act readtable:datum-label-next
|
||||||
char
|
char
|
||||||
|
@ -698,7 +948,7 @@
|
||||||
(lambda ()
|
(lambda ()
|
||||||
(readtable:process
|
(readtable:process
|
||||||
(readtable:empty/default readtable:read-ident)
|
(readtable:empty/default readtable:read-ident)
|
||||||
(list readtable:update-list readtable:ASCII-whitespace readtable:skip)
|
(list readtable:update readtable:ASCII-whitespace readtable:skip)
|
||||||
(list readtable:update #f (readtable:return 'eof))
|
(list readtable:update #f (readtable:return 'eof))
|
||||||
(list readtable:update %bol readtable:read-list)
|
(list readtable:update %bol readtable:read-list)
|
||||||
(list readtable:update %eol (readtable:error 'top "unbalanced list"))
|
(list readtable:update %eol (readtable:error 'top "unbalanced list"))
|
||||||
|
@ -794,3 +1044,10 @@
|
||||||
(read-all "#u8(x y z w)")
|
(read-all "#u8(x y z w)")
|
||||||
(read-all "#\\newline")
|
(read-all "#\\newline")
|
||||||
(read-all "#\\a")
|
(read-all "#\\a")
|
||||||
|
|
||||||
|
(read-all "#b11")
|
||||||
|
(read-all "#XFF")
|
||||||
|
(read-all "#i#x10")
|
||||||
|
(read-all "#d1234.5678")
|
||||||
|
(read-all "#e10.5")
|
||||||
|
(read-all "#d10.24f12")
|
||||||
|
|
Loading…
Reference in New Issue