aboutsummaryrefslogtreecommitdiffstats
path: root/read.scm
blob: d9ea7f7df675e5bbe3ad7cf2be97686dea7c9123 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
;;; Copyright (C) Peter McGoron 2024
;;; This program is free software: you can redistribute it and/or modify
;;; it under the terms of the GNU General Public License as published by
;;; the Free Software Foundation, version 3 of the License.
;;; 
;;; This program is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;; GNU General Public License for more details.
;;; 
;;; You should have received a copy of the GNU General Public License
;;; along with this program.  If not, see <https://www.gnu.org/licenses/>.

;;; R7RS reader. This is the lexer-parser end, so it returns tokens and
;;; not concrete objects.

(load "chez-compat.scm")
(load "util.scm")
(load "set.scm")
(load "linked-list.scm")

;;; My text editor cannot parse Scheme's character syntax.

(define %bol #\()
(define %eol #\))

;;; ;;;;;;;;;;;;;;;;;;;;;;;;
;;; Port reader wrapper
;;; ;;;;;;;;;;;;;;;;;;;;;;;;

(define port->read-function
  (lambda (port)
    (lambda ()
      (read-char port))))

;;; READ:
;;; 
;;; (POS): Return (LIST FILENAME LINE-NUMBER OFFSET).
;;; (READ): Read the next character in the stream. Returns #F on EOF.
;;; (PUSH CHAR): Push CHAR such that it will be the next character read
;;; when (READ) is called.
;;; (PEEK): Read character, push it back, and return it.
;;; (FOLD-CASE?): Returns a boolean if case folding is enabled.
;;; (FOLD-CASE! BOOL): Sets the case folding to BOOL.
(define port->read
  (lambda (read-function filename)
    (let ((line-number 1)
          (offset 0)
          (pushback-buffer '())
          (fold-case? #f))
      (letrec ((update-position!
                (lambda (ch)
                  (cond
                    ((eqv? ch #\newline)
                     (set! line-number (+ 1 line-number)) (set! offset 0))
                    (offset (set! offset (+ 1 offset))))))
               (process
                (lambda (ch)
                  (update-position! ch)
                  (cond
                    ((or (eof-object? ch) (not ch)) ch)
                    (fold-case? (char-downcase ch))
                    (else ch))))
               (port
                (lambda (op . args)
                  (cond
                    ((eq? op 'pos) (list filename line-number offset))
                    ((eq? op 'read)
                     (process
                      (if (null? pushback-buffer)
                          (read-function)
                          (let ((ch (car pushback-buffer)))
                            (set! pushback-buffer (cdr pushback-buffer))
                            ch))))
                    ((eq? op 'peek)
                     (let ((ch (port 'read)))
                       (port 'push ch)
                       ch))
                    ((eq? op 'push)
                     (let ((ch (car args)))
                       (if (eqv? ch #\newline)
                           (begin
                             (set! line-number (- line-number 1))
                             (set! offset #f))
                           (set! offset (- offset 1)))
                       (set! pushback-buffer (cons ch pushback-buffer))))
                    ((eq? op 'fold-case?) fold-case?)
                    ((eq? op 'fold-case!) (set! fold-case? (car args)))
                    (else (error "read->port: invalid" (cons op args)))))))
        port))))

;;; ;;;;;;;;;;;;;;
;;; Character maps
;;; ;;;;;;;;;;;;;;

(define integer<=>
  (lambda (x y)
    (cond
      ((< x y) '<)
      ((= x y) '=)
      (else '>))))

;;; Comparison on characters extended to #F, which is less than all
;;; characters.
(define char*<=>
  (lambda (x y)
    (cond
      ((and (not x) y) '<)
      ((and x (not y)) '>)
      ((and (not x) (not y) '=))
      (else (integer<=> (char->integer x)
                        (char->integer y))))))

(define %charmap:<=> (set:<=>-to-map char*<=>))
(define %charmap:update (set:update %charmap:<=>))

(define charmap:update (map:update %charmap:update))
(define charmap:insert (map:insert %charmap:update))
(define charmap:search (map:search %charmap:<=>))

;;; ;;;;;;;;;;;;;;;;;;;;;;
;;; Readtable constructors
;;; ;;;;;;;;;;;;;;;;;;;;;;

;;; (READTABLE:NEW DEFAULT-ACTION CHARMAP)
(define readtable:new cons)

(define %readtable:default-action car)
(define %readtable:charmap cdr)

;;; Run the action in TABLE assigned to CHAR, or the default action of
;;; TABLE if there is no entry for CHAR.
(define readtable:act
  (lambda (table char acc port)
    (let ((node (charmap:search (%readtable:charmap table)
                                char)))
      (let ((action (if (null? node)
                        (%readtable:default-action table)
                        (map:val node))))
        (action table char acc port)))))

;;; Run the action in TABLE with the next character from PORT.
(define readtable:next
  (lambda (table acc port)
    (readtable:act table (port 'read) acc port)))

;;; Return a new readtable where CHAR is bound to ACTION.
(define readtable:update
  (lambda (table char action)
    (readtable:new (%readtable:default-action table)
                   (car (charmap:insert
                         (%readtable:charmap table) char action)))))

;;; Construct new readtable with no characters in its map and
;;; DEFAULT-ACTION as the default action.
(define readtable:empty/default
  (lambda (default-action)
    (readtable:new default-action '())))

;;; Each value in FUNCTIONS is a list (PROCEDURE ARGS...) which is called
;;; like (PROCEDURE TABLE ARGS...) and returns a table.
(define readtable:process
  (lambda (table . functions)
    (fold (lambda (function table)
            (apply (car function) table (cdr function)))
          table
          functions)))

;;; ;;;;;;;;;;;;;;;;;;
;;; Default readtables
;;; ;;;;;;;;;;;;;;;;;;

;;; Return an error.
(define readtable:error
  (lambda emsg
    (lambda tablemsg
      (apply error tablemsg emsg))))

;;; Discard the current character and continue reading the readtable.
(define readtable:skip
  (lambda (table char acc port)
    (readtable:act table (port 'read) acc port)))

;;; Discard char and return constant.
(define readtable:return
  (lambda (return)
    (lambda (table char acc port)
      return)))

;;; Jump to a new readtable, discard it's return, and continue reading
;;; in the table.
(define readtable:jump-discard
  (lambda (newtable)
    (lambda (oldtable char acc port)
      (readtable:act newtable (port 'read) '() port)
      (readtable:act oldtable (port 'read) acc port))))

;;; Jump to a new readtable with the same characters.
(define readtable:jump
  (lambda (newtable)
    (lambda (oldtable char acc port)
      (readtable:act newtable char acc port))))

;;; Jump to a new readtable, reading the new character, with the old
;;; readtable as ACC.
(define readtable:next/old-as-acc
  (lambda (newtable)
    (lambda (oldtable __ _ port)
      (readtable:next newtable oldtable port))))

;;; ;;;;;;;;;;;;;;;;;
;;; Identifier reader
;;; ;;;;;;;;;;;;;;;;;

;;; Push back CHAR and return ACC.
(define readtable:return-acc-keep-char
  (lambda (table char acc port)
    (port 'push char)
    acc))

;;; Push CHAR to ACC and continue reading from TABLE.
(define readtable:push-char
  (lambda (table char acc port)
    (acc 'push-tail char)
    (readtable:act table (port 'read) acc port)))

;;; Define a readtable that constructs an identifier by accepting all
;;; characters that are not listed.
(define readtable:exclude-from-identifiers
  (lambda (table excluded)
    (fold (lambda (char table)
            (readtable:update table char readtable:return-acc-keep-char))
          table
          excluded)))

;;; ASCII whitespace.
(define readtable:ASCII-whitespace
  (list #\newline
        #\space
        (integer->char #x09)
        (integer->char #x0B)
        (integer->char #x0C)
        (integer->char #x0D)))

;;; Readtable for identifiers.
(define readtable:identifier
  (readtable:process
   (readtable:empty/default readtable:push-char)
   (list readtable:exclude-from-identifiers
         readtable:ASCII-whitespace)
   (list readtable:exclude-from-identifiers
         (list #\| %bol %eol #\' #\; #f))))

;;; Read an identifier starting with CHAR.
(define readtable:read-ident
  (lambda (table char acc port)
    (let ((lst (linked-list:new)))
      (lst 'push char)
      (list->string
       ((readtable:act readtable:identifier
                     (port 'read) lst port)
        'to-list)))))

;;; ;;;;;;;;;;;;;;;;;;;;
;;; Comments and whitespace reader
;;; ;;;;;;;;;;;;;;;;;;;;

;;; Add all chars in TO-SKIP to TABLE as skipped characters. Used for
;;; whitespace.
(define readtable:add-all-as-skip
  (lambda (table to-skip)
    (fold (lambda (char table)
            (readtable:update table char readtable:skip))
          table
          to-skip)))

;;; Readtable for a line comment.
(define readtable:read-to-newline
  (readtable:process
   (readtable:empty/default readtable:skip)
   (list readtable:update #\newline (readtable:return #f))))

;;; ;;;;;;;;;;;
;;; List reader
;;; 
;;; The reader updates the previous readtable to handle ). This means
;;; that this read table does not have to handle end-of-line, whitespace,
;;; etc.
;;; ;;;;;;;;;;;

;;; Read the end of an improper list.
(define readtable:read-improper-cdr
  (lambda (table acc port)
    (let ((val
           (readtable:act (readtable:update table
                                            %eol
                                            (readtable:error
                                             "proper list must have cdr"))
                          (port 'read)
                          #f
                          port)))
      (acc 'set-cdr! val)
      (let ((table (readtable:process
                    (readtable:empty/default (readtable:error
                                              "improper list has 1 cdr"))
                    (list readtable:add-all-as-skip readtable:ASCII-whitespace)
                    (list readtable:update %eol
                          (lambda dummy 'end-of-list)))))
        (readtable:act table (port 'read) acc port)))))

;;; Generic reader loop for a list. It takes as input the table that has
;;; already been updated with end of list and improper list handlers.
(define readtable:read-list-loop
  (lambda (table port)
    (let ((acc (linked-list:new)))
      (letrec ((loop
                (lambda ()
                  (let ((value (readtable:act table
                                              (port 'read)
                                              acc
                                              port)))
                    (cond
                      ((eqv? value 'end-of-list) (acc 'to-list))
                      (else (acc 'push-tail value)
                            (loop)))))))
        (loop)))))

;;; Readtable for a list, generic to proper and improper list
;;; readers.
(define readtable:table-for-list
  (lambda (oldtable on-dot)
    (readtable:process
     oldtable
     (list readtable:update %eol (readtable:return 'end-of-list))
     (list readtable:update #\.
           (lambda (table char acc port)
             (let ((entire-identifier (readtable:read-ident
                                       table
                                       char
                                       #f
                                       port)))
               (if (equal? entire-identifier ".")
                   (on-dot table acc port)
                   entire-identifier)))))))

;;; Read a proper or improper list.
(define readtable:read-list
  (lambda (oldtable _ __ port)
    (readtable:read-list-loop (readtable:table-for-list
                               oldtable
                               readtable:read-improper-cdr)
                              port)))

(define readtable:read-proper-list
  (lambda (table port)
    (readtable:read-list-loop (readtable:table-for-list
                               table
                               (readtable:error "expected proper list"))
                              port)))

;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Reader for datum that start with "#"
;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(define readtable:vector
  (lambda (_ __ toplevel port)
    (list 'vector (readtable:read-proper-list toplevel port))))

(define readtable:hash
  (readtable:process
   (readtable:empty/default (readtable:error "unimplemented"))
   (list readtable:update %bol readtable:vector)))

;;; ;;;;;;;;;;;;;;;;
;;; Toplevel reader.
;;; ;;;;;;;;;;;;;;;;
;;; This is defined as a function so that it dynamically loads each
;;; sub-readtable.
(define readtable:top
  (lambda ()
    (readtable:process
     (readtable:empty/default readtable:read-ident)
     (list readtable:add-all-as-skip readtable:ASCII-whitespace)
     (list readtable:update #f (readtable:return 'eof))
     (list readtable:update %bol readtable:read-list)
     (list readtable:update %eol (readtable:error "unbalanced list"))
     (list readtable:update #\# (readtable:next/old-as-acc
                                 readtable:hash))
     (list readtable:update #\;
           (readtable:jump-discard readtable:read-to-newline)))))

;;; ;;;;;;;;;;;
;;; Test reader
;;; ;;;;;;;;;;;
(define %list->read
  (lambda (seq)
    (port->read
     (lambda ()
       (if (null? seq)
           #f
           (let ((ch (car seq)))
             (set! seq (cdr seq))
             ch)))
     "test")))

(define read-all
  (lambda (str)
    (let ((reader (%list->read (string->list str))))
      (letrec ((loop
                (lambda ()
                  (if (not (reader 'peek))
                      #t
                      (let ((value (readtable:act
                                    (readtable:top) (reader 'read)
                                    #f
                                    reader)))
                        (display (list "return" value))
                        (newline)
                        (loop))))))
        (loop)))))

(read-all "x yy zz ; this is a comment\nx call/cc ")
(read-all "(a   b    c     def (ghi j)  k    )")
(read-all "(      a . b   )")
(read-all "(   a .b . c)")
(read-all "#( a b y)")