UNSLISP/read.scm

391 lines
13 KiB
Scheme
Raw Normal View History

2024-09-07 17:47:10 -04:00
;;; Copyright (C) Peter McGoron 2024
;;; This program is free software: you can redistribute it and/or modify
;;; it under the terms of the GNU General Public License as published by
;;; the Free Software Foundation, version 3 of the License.
;;;
;;; This program is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with this program. If not, see <https://www.gnu.org/licenses/>.
;;; R7RS reader. This is the lexer-parser end, so it returns tokens and
;;; not concrete objects.
(load "chez-compat.scm")
(load "util.scm")
(load "set.scm")
2024-09-20 19:11:36 -04:00
(load "linked-list.scm")
2024-09-07 17:47:10 -04:00
2024-09-20 22:27:17 -04:00
;;; My text editor cannot parse Scheme's character syntax.
(define %bol #\()
(define %eol #\))
2024-09-07 17:47:10 -04:00
;;; ;;;;;;;;;;;;;;;;;;;;;;;;
;;; Port reader wrapper
;;; ;;;;;;;;;;;;;;;;;;;;;;;;
(define port->read-function
(lambda (port)
(lambda ()
(read-char port))))
;;; READ:
;;;
;;; (POS): Return (LIST FILENAME LINE-NUMBER OFFSET).
;;; (READ): Read the next character in the stream. Returns #F on EOF.
;;; (PUSH CHAR): Push CHAR such that it will be the next character read
;;; when (READ) is called.
2024-09-08 08:44:42 -04:00
;;; (PEEK): Read character, push it back, and return it.
2024-09-07 21:10:40 -04:00
;;; (FOLD-CASE?): Returns a boolean if case folding is enabled.
;;; (FOLD-CASE! BOOL): Sets the case folding to BOOL.
2024-09-07 17:47:10 -04:00
(define port->read
(lambda (read-function filename)
(let ((line-number 1)
(offset 0)
2024-09-07 21:10:40 -04:00
(pushback-buffer '())
(fold-case? #f))
(letrec ((update-position!
(lambda (ch)
(cond
((eqv? ch #\newline)
(set! line-number (+ 1 line-number)) (set! offset 0))
(offset (set! offset (+ 1 offset))))))
(process
(lambda (ch)
(update-position! ch)
(cond
((or (eof-object? ch) (not ch)) ch)
(fold-case? (char-downcase ch))
(else ch))))
(port
(lambda (op . args)
(cond
((eq? op 'pos) (list filename line-number offset))
((eq? op 'read)
(process
(if (null? pushback-buffer)
(read-function)
(let ((ch (car pushback-buffer)))
(set! pushback-buffer (cdr pushback-buffer))
ch))))
2024-09-08 08:44:42 -04:00
((eq? op 'peek)
(let ((ch (port 'read)))
(port 'push ch)
ch))
2024-09-07 21:10:40 -04:00
((eq? op 'push)
(let ((ch (car args)))
(if (eqv? ch #\newline)
(begin
(set! line-number (- line-number 1))
(set! offset #f))
(set! offset (- offset 1)))
(set! pushback-buffer (cons ch pushback-buffer))))
((eq? op 'fold-case?) fold-case?)
((eq? op 'fold-case!) (set! fold-case? (car args)))
(else (error "read->port: invalid" (cons op args)))))))
port))))
2024-09-07 17:47:10 -04:00
;;; ;;;;;;;;;;;;;;
;;; Character maps
;;; ;;;;;;;;;;;;;;
(define integer<=>
(lambda (x y)
(cond
((< x y) '<)
((= x y) '=)
(else '>))))
;;; Comparison on characters extended to #F, which is less than all
;;; characters.
(define char*<=>
2024-09-07 17:47:10 -04:00
(lambda (x y)
(cond
((and (not x) y) '<)
((and x (not y)) '>)
((and (not x) (not y) '=))
(else (integer<=> (char->integer x)
(char->integer y))))))
2024-09-07 17:47:10 -04:00
(define %charmap:<=> (set:<=>-to-map char*<=>))
2024-09-07 17:47:10 -04:00
(define %charmap:update (set:update %charmap:<=>))
(define charmap:update (map:update %charmap:update))
(define charmap:insert (map:insert %charmap:update))
(define charmap:search (map:search %charmap:<=>))
;;; ;;;;;;;;;;;;;;;;;;;;;;
2024-09-07 17:47:10 -04:00
;;; Readtable constructors
;;; ;;;;;;;;;;;;;;;;;;;;;;
;;; (READTABLE:NEW DEFAULT-ACTION CHARMAP)
(define readtable:new cons)
(define %readtable:default-action car)
(define %readtable:charmap cdr)
;;; Run the action in TABLE assigned to CHAR, or the default action of
;;; TABLE if there is no entry for CHAR.
(define readtable:act
(lambda (table char acc port)
(let ((node (charmap:search (%readtable:charmap table)
char)))
(let ((action (if (null? node)
(%readtable:default-action table)
(map:val node))))
(action table char acc port)))))
;;; Return a new readtable where CHAR is bound to ACTION.
(define readtable:update
(lambda (table char action)
(readtable:new (%readtable:default-action table)
(car (charmap:insert
(%readtable:charmap table) char action)))))
;;; Construct new readtable with no characters in its map and
;;; DEFAULT-ACTION as the default action.
(define readtable:empty/default
(lambda (default-action)
(readtable:new default-action '())))
;;; Each value in FUNCTIONS is a list (PROCEDURE ARGS...) which is called
;;; like (PROCEDURE TABLE ARGS...) and returns a table.
(define readtable:process
(lambda (table . functions)
(fold (lambda (function table)
(apply (car function) table (cdr function)))
table
functions)))
2024-09-07 17:47:10 -04:00
;;; ;;;;;;;;;;;;;;;;;;
2024-09-07 17:47:10 -04:00
;;; Default readtables
;;; ;;;;;;;;;;;;;;;;;;
2024-09-07 17:47:10 -04:00
2024-09-20 19:11:36 -04:00
;;; Return an error.
(define readtable:error
(lambda emsg
(lambda tablemsg
(apply error tablemsg emsg))))
;;; Discard the current character and continue reading the readtable.
(define readtable:skip
(lambda (table char acc port)
(readtable:act table (port 'read) acc port)))
2024-09-08 08:44:42 -04:00
;;; Discard char and return constant.
(define readtable:return
(lambda (return)
(lambda (table char acc port)
return)))
;;; Jump to a new readtable, discard it's return, and continue reading
;;; in the table.
(define readtable:jump-discard
(lambda (newtable)
(lambda (oldtable char acc port)
(readtable:act newtable (port 'read) '() port)
(readtable:act oldtable (port 'read) acc port))))
2024-09-20 19:11:36 -04:00
;;; Jump to a new readtable with the same characters.
(define readtable:jump
(lambda (newtable)
(lambda (oldtable char acc port)
(readtable:act newtable char acc port))))
2024-09-21 23:52:55 -04:00
;;; ;;;;;;;;;;;;;;;;;
;;; Identifier reader
;;; ;;;;;;;;;;;;;;;;;
;;; Push back CHAR and return ACC.
(define readtable:return-acc-keep-char
(lambda (table char acc port)
(port 'push char)
acc))
;;; Push CHAR to ACC and continue reading from TABLE.
2024-09-20 19:11:36 -04:00
(define readtable:push-char
(lambda (table char acc port)
2024-09-20 19:11:36 -04:00
(acc 'push-tail char)
(readtable:act table (port 'read) acc port)))
;;; Define a readtable that constructs an identifier by accepting all
;;; characters that are not listed.
(define readtable:exclude-from-identifiers
(lambda (table excluded)
(fold (lambda (char table)
(readtable:update table char readtable:return-acc-keep-char))
table
excluded)))
;;; ASCII whitespace.
(define readtable:ASCII-whitespace
(list #\newline
#\space
(integer->char #x09)
(integer->char #x0B)
(integer->char #x0C)
(integer->char #x0D)))
;;; Readtable for identifiers.
(define readtable:identifier
(readtable:process
2024-09-20 19:11:36 -04:00
(readtable:empty/default readtable:push-char)
(list readtable:exclude-from-identifiers
readtable:ASCII-whitespace)
(list readtable:exclude-from-identifiers
2024-09-20 22:27:17 -04:00
(list #\| %bol %eol #\' #\; #f))))
;;; Read an identifier starting with CHAR.
(define readtable:read-ident
(lambda (table char acc port)
2024-09-20 19:11:36 -04:00
(let ((lst (linked-list:new)))
(lst 'push char)
(list->string
((readtable:act readtable:identifier
(port 'read) lst port)
'to-list)))))
2024-09-21 23:52:55 -04:00
;;; ;;;;;;;;;;;;;;;;;;;;
;;; Comments and whitespace reader
;;; ;;;;;;;;;;;;;;;;;;;;
;;; Add all chars in TO-SKIP to TABLE as skipped characters. Used for
;;; whitespace.
(define readtable:add-all-as-skip
(lambda (table to-skip)
(fold (lambda (char table)
(readtable:update table char readtable:skip))
table
to-skip)))
2024-09-07 17:47:10 -04:00
2024-09-08 08:44:42 -04:00
;;; Readtable for a line comment.
(define readtable:read-to-newline
(readtable:process
(readtable:empty/default readtable:skip)
(list readtable:update #\newline (readtable:return #f))))
2024-09-21 23:52:55 -04:00
;;; ;;;;;;;;;;;
;;; List reader
;;;
;;; The reader updates the previous readtable to handle ). This means
;;; that this read table does not have to handle end-of-line, whitespace,
;;; etc.
;;; ;;;;;;;;;;;
2024-09-20 22:27:17 -04:00
;;; Read the end of an improper list.
(define readtable:read-improper-cdr
(lambda (table acc port)
(let ((val
(readtable:act (readtable:update table
%eol
(readtable:error
"proper list must have cdr"))
(port 'read)
#f
port)))
(acc 'set-cdr! val)
(let ((table (readtable:process
(readtable:empty/default (readtable:error
"improper list has 1 cdr"))
(list readtable:add-all-as-skip readtable:ASCII-whitespace)
(list readtable:update %eol
2024-09-21 23:52:55 -04:00
(lambda dummy 'end-of-list)))))
2024-09-20 22:27:17 -04:00
(readtable:act table (port 'read) acc port)))))
2024-09-21 23:52:55 -04:00
;;; Generic reader loop for a list. It takes as input the table that has
;;; already been updated with end of list and improper list handlers.
(define readtable:read-list-loop
(lambda (table port)
(let ((acc (linked-list:new)))
2024-09-20 19:11:36 -04:00
(letrec ((loop
(lambda ()
(let ((value (readtable:act table
(port 'read)
2024-09-20 22:27:17 -04:00
acc
2024-09-20 19:11:36 -04:00
port)))
(cond
((eqv? value 'end-of-list) (acc 'to-list))
(else (acc 'push-tail value)
(loop)))))))
(loop)))))
2024-09-21 23:52:55 -04:00
;;; Readtable for a list, generic to proper and improper list
;;; readers.
(define readtable:table-for-list
(lambda (oldtable on-dot)
(readtable:process
oldtable
(list readtable:update %eol (readtable:return 'end-of-list))
(list readtable:update #\.
(lambda (table char acc port)
(let ((entire-identifier (readtable:read-ident
table
char
#f
port)))
(if (equal? entire-identifier ".")
(on-dot table acc port)
entire-identifier)))))))
;;; Read a proper or improper list.
(define readtable:read-list
(lambda (oldtable _ __ port)
(readtable:read-list-loop (readtable:table-for-list
oldtable
readtable:read-improper-cdr)
port)))
;;; Toplevel reader.
2024-09-20 19:11:36 -04:00
;;; This is defined as a function so that it dynamically loads each
;;; sub-readtable.
2024-09-07 17:47:10 -04:00
(define readtable:top
2024-09-20 19:11:36 -04:00
(lambda ()
(readtable:process
(readtable:empty/default readtable:read-ident)
(list readtable:add-all-as-skip readtable:ASCII-whitespace)
(list readtable:update #f (readtable:return 'eof))
2024-09-20 22:27:17 -04:00
(list readtable:update %bol readtable:read-list)
(list readtable:update %eol (readtable:error "unbalanced list"))
2024-09-20 19:11:36 -04:00
(list readtable:update #\;
(readtable:jump-discard readtable:read-to-newline)))))
2024-09-07 17:47:10 -04:00
;;; ;;;;;;;;;;;
;;; Test reader
;;; ;;;;;;;;;;;
(define %list->read
(lambda (seq)
(port->read
(lambda ()
(if (null? seq)
#f
(let ((ch (car seq)))
(set! seq (cdr seq))
ch)))
"test")))
2024-09-08 08:44:42 -04:00
(define read-all
(lambda (str)
(let ((reader (%list->read (string->list str))))
(letrec ((loop
(lambda ()
(if (not (reader 'peek))
#t
(let ((value (readtable:act
2024-09-20 19:11:36 -04:00
(readtable:top) (reader 'read)
2024-09-08 08:44:42 -04:00
#f
reader)))
(display (list "return" value))
(newline)
(loop))))))
(loop)))))
(read-all "x yy zz ; this is a comment\nx call/cc ")
2024-09-20 19:11:36 -04:00
(read-all "(a b c def (ghi j) k )")
2024-09-20 22:27:17 -04:00
(read-all "( a . b )")
(read-all "( a .b . c)")
2024-09-21 23:52:55 -04:00