1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
|
;;; Copyright (C) Peter McGoron 2024
;;; This program is free software: you can redistribute it and/or modify
;;; it under the terms of the GNU General Public License as published by
;;; the Free Software Foundation, version 3 of the License.
;;;
;;; This program is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with this program. If not, see <https://www.gnu.org/licenses/>.
;;; R7RS reader. This is the lexer-parser end, so it returns tokens and
;;; not concrete objects.
;;;
;;; Notes:
;;;
;;; Port stores datum labels. Datum labels are stored for the entirety of
;;; a READ: this is to emulate MIT Scheme, which allows for datum labels
;;; outside of the datum that the label appears in.
;;;
;;; The reader does not return Scheme data: it returns annotated data
;;; containing the source location, datum label number, resolved datum
;;; label pointer. This is for advanced syntax systems.
;;;
;;; How datum labels could work:
;;;
;;; When encountering #[number]=, allocate a datum label and assign it
;;; nothing. Then call READ after "=", and destructively update the
;;; datum label with the resulting datum. A pass over the new read
;;; structure to convert it to regular Scheme data will resolve the
;;; indirection.
;;;
;;; All tokens are procedure-encapsulated objects, since the reader should
;;; never return a literal procedure. Each procedure has a TYPE message.
(load "chez-compat.scm")
(load "util.scm")
(load "set.scm")
(load "linked-list.scm")
;;; My text editor cannot parse Scheme's character syntax.
(define %bol #\()
(define %eol #\))
;;; ;;;;;;;;;;;;;;;;;;;;;;;;
;;; Port reader wrapper
;;; ;;;;;;;;;;;;;;;;;;;;;;;;
(define port->read-function
(lambda (port)
(lambda ()
(read-char port))))
;;; READ:
;;;
;;; Stream readers contain mutable state. This is the case-folding mode
;;; and the current list of datum labels.
;;;
;;; (POS): Return (LIST FILENAME LINE-NUMBER OFFSET).
;;; (READ): Read the next character in the stream. Returns #F on EOF.
;;; (PUSH CHAR): Push CHAR such that it will be the next character read
;;; when (READ) is called.
;;; (PEEK): Read character, push it back, and return it.
;;; (FOLD-CASE?): Returns a boolean if case folding is enabled.
;;; (FOLD-CASE! BOOL): Sets the case folding to BOOL.
(define port->read
(lambda (read-function filename)
(let ((line-number 1)
(offset 0)
(pushback-buffer '())
(datum-labels '())
(fold-case? #f))
(letrec ((update-position!
(lambda (ch)
(cond
((eqv? ch #\newline)
(set! line-number (+ 1 line-number)) (set! offset 0))
;; OFFSET is sometimes set to #F to denote an unknown
;; offset.
(offset (set! offset (+ 1 offset))))))
(location
(lambda () (list filename line-number offset)))
(set-datum-label!
(lambda (label value)
(set! datum-labels
(car (smap:insert datum-labels label value)))))
(get-datum-label
(lambda (label)
(smap:search datum-labels label)))
(dump-mutable
(lambda ()
(list datum-labels fold-case?)))
(restore-mutable!
(lambda (state)
(set! datum-labels (car state))
(set! fold-case? (cadr state))))
(process
(lambda (ch)
(update-position! ch)
(cond
((or (eof-object? ch) (not ch)) ch)
(fold-case? (char-downcase ch))
(else ch))))
(port
(lambda (op . args)
;; TODO: turn into string map?
(cond
((eq? op 'location) (location))
((eq? op 'read)
(process
(if (null? pushback-buffer)
(read-function)
(let ((ch (car pushback-buffer)))
(set! pushback-buffer (cdr pushback-buffer))
ch))))
((eq? op 'peek)
(let ((ch (port 'read)))
(port 'push ch)
ch))
((eq? op 'push)
(let ((ch (car args)))
(if (eqv? ch #\newline)
(begin
(set! line-number (- line-number 1))
(set! offset #f))
(set! offset (- offset 1)))
(set! pushback-buffer (cons ch pushback-buffer))))
((eq? op 'fold-case?) fold-case?)
((eq? op 'fold-case!) (set! fold-case? (car args)))
((eq? op 'set-datum-label!) (apply set-datum-label! args))
((eq? op 'get-datum-label) (apply get-datum-label args))
(else (error 'read->port 'invalid (cons op args)))))))
port))))
;;; ;;;;;;;;;;;;;;
;;; Character maps
;;; ;;;;;;;;;;;;;;
(define integer<=>
(lambda (x y)
(cond
((< x y) '<)
((= x y) '=)
(else '>))))
;;; Comparison on characters extended to #F, which is less than all
;;; characters.
(define char*<=>
(lambda (x y)
(cond
((and (not x) y) '<)
((and x (not y)) '>)
((and (not x) (not y) '=))
(else (integer<=> (char->integer x)
(char->integer y))))))
(define %charmap:<=> (set:<=>-to-map char*<=>))
(define %charmap:update (set:update %charmap:<=>))
(define charmap:update (map:update %charmap:update))
(define charmap:insert (map:insert %charmap:update))
(define charmap:search (map:search %charmap:<=>))
;;; ;;;;;;;;;;;;;;;;;;;;;;
;;; Readtable constructors
;;;
;;; Readtables are composed of a CHARMAP, which is a map from characters
;;; to actions, and a DEFAULT-ACTION, which is taken when there is no
;;; match in CHARMAP.
;;;
;;; An "action" is a procedure that takes four arguments:
;;;
;;; TABLE: The current table.
;;; CHAR: The character that was matched against the CHARMAP in TABLE.
;;; ACC: An arbitrary "accumulator" value that is different depending
;;; on the readtable in question.
;;; PORT: A port reader object.
;;; ;;;;;;;;;;;;;;;;;;;;;;
;;; (READTABLE:NEW DEFAULT-ACTION CHARMAP)
(define readtable:new cons)
(define %readtable:default-action car)
(define %readtable:charmap cdr)
;;; Run the action in TABLE assigned to CHAR, or the default action of
;;; TABLE if there is no entry for CHAR.
(define readtable:act
(lambda (table char acc port)
(let ((node (charmap:search (%readtable:charmap table)
char)))
(let ((action (if (null? node)
(%readtable:default-action table)
(map:val node))))
(action table char acc port)))))
;;; Run the action in TABLE with the next character from PORT.
(define readtable:next
(lambda (table acc port)
(readtable:act table (port 'read) acc port)))
;;; Return a new readtable where CHAR is bound to ACTION.
(define readtable:update
(lambda (table char action)
(readtable:new (%readtable:default-action table)
(car (charmap:insert
(%readtable:charmap table) char action)))))
;;; Update TABLE to act on all characters in LST with ACTION.
(define readtable:update-list
(lambda (table lst action)
(fold (lambda (char table)
(readtable:update table char action))
table
lst)))
;;; Construct new readtable with no characters in its map and
;;; DEFAULT-ACTION as the default action.
(define readtable:empty/default
(lambda (default-action)
(readtable:new default-action '())))
;;; Each value in FUNCTIONS is a list (PROCEDURE ARGS...) which is called
;;; like (PROCEDURE TABLE ARGS...) and returns a table.
(define readtable:process
(lambda (table . functions)
(fold (lambda (function table)
(apply (car function) table (cdr function)))
table
functions)))
;;; ;;;;;;;;;;;;;;;;;;
;;; Default actions
;;; ;;;;;;;;;;;;;;;;;;
;;; Return an error.
(define readtable:error
(lambda emsg
(lambda (table char acc port)
(error emsg char acc table port))))
;;; Discard the current character and continue reading the readtable.
(define readtable:skip
(lambda (table char acc port)
(readtable:act table (port 'read) acc port)))
;;; Discard char and return constant.
(define readtable:return
(lambda (return)
(lambda (table char acc port)
return)))
;;; Jump to a new readtable, discard it's return, and continue reading
;;; in the table.
(define readtable:jump-discard
(lambda (newtable)
(lambda (oldtable char acc port)
(readtable:act newtable (port 'read) '() port)
(readtable:act oldtable (port 'read) acc port))))
;;; Jump to a new readtable with the same characters.
(define readtable:jump
(lambda (newtable)
(lambda (oldtable char acc port)
(readtable:act newtable char acc port))))
;;; Jump to a new readtable, reading the new character, with the old
;;; readtable as ACC.
(define readtable:next/old-as-acc
(lambda (newtable)
(lambda (oldtable __ _ port)
(readtable:next newtable oldtable port))))
;;; Jump to a new readtable, reading the new character.
(define readtable:jump/next
(lambda (newtable)
(lambda (oldtable _ acc port)
(readtable:next newtable acc port))))
;;; ;;;;;;;;;;;;;;;;;
;;; Identifier reader
;;; ;;;;;;;;;;;;;;;;;
(define read:ident
(lambda (name location)
(lambda (op . args)
(cond
((eq? op 'type) 'ident)
((eq? op 'value) name)
(else (error 'read:ident "invalid operation" op args))))))
(define read:ident-builder
(lambda (start-char location)
(let ((char-list (linked-list:new)))
(char-list 'push start-char)
(lambda (op . args)
(cond
((eq? op 'finalize->ident)
(read:ident (list->string (char-list 'to-list)) location))
(else (apply char-list op args)))))))
;;; Push back CHAR and return ACC.
(define readtable:return-acc-as-ident
(lambda (table char acc port)
(port 'push char)
(acc 'finalize->ident)))
;;; Push CHAR to ACC and continue reading from TABLE.
(define readtable:push-char
(lambda (table char acc port)
(acc 'push-tail char)
(readtable:act table (port 'read) acc port)))
;;; Define a readtable that constructs an identifier by accepting all
;;; characters that are not listed.
(define readtable:exclude-from-identifiers
(lambda (table excluded)
(fold (lambda (char table)
(readtable:update table char readtable:return-acc-as-ident))
table
excluded)))
;;; ASCII whitespace.
(define readtable:ASCII-whitespace
(list #\newline
#\space
(integer->char #x09)
(integer->char #x0B)
(integer->char #x0C)
(integer->char #x0D)))
;;; Readtable for identifiers.
(define readtable:identifier
(readtable:process
(readtable:empty/default readtable:push-char)
(list readtable:exclude-from-identifiers
readtable:ASCII-whitespace)
(list readtable:exclude-from-identifiers
(list #\| %bol %eol #\' #\; #f))))
;;; Read an identifier starting with CHAR.
(define readtable:read-ident
(lambda (table char acc port)
(readtable:act readtable:identifier
(port 'read)
(read:ident-builder char
(port 'location))
port)))
;;; ;;;;;;;;;;;;;;;;;;;;
;;; Comments and whitespace reader
;;; ;;;;;;;;;;;;;;;;;;;;
;;; Readtable for a line comment.
(define readtable:read-to-newline
(readtable:process
(readtable:empty/default readtable:skip)
(list readtable:update #\newline (readtable:return #f))))
;;; ;;;;;;;;;;;
;;; List reader
;;;
;;; The reader updates the previous readtable to handle ). This means
;;; that this read table does not have to handle end-of-line, whitespace,
;;; etc.
;;; ;;;;;;;;;;;
;;; Read the end of an improper list.
(define readtable:read-improper-cdr
(lambda (table acc port)
(let ((val
(readtable:act (readtable:update table
%eol
(readtable:error
'read-improper-cdr
"proper list must have cdr"))
(port 'read)
#f
port)))
(acc 'set-cdr! val)
(let ((table (readtable:process
(readtable:empty/default (readtable:error
'read-improper-cdr
"improper list has 1 cdr"))
(list readtable:update-list
readtable:ASCII-whitespace
readtable:skip)
(list readtable:update %eol
(lambda dummy 'end-of-list)))))
(readtable:act table (port 'read) acc port)))))
;;; Generic reader loop for a list. It takes as input the table that has
;;; already been updated with end of list and improper list handlers.
(define readtable:read-list-loop
(lambda (table port)
(let ((acc (linked-list:new)))
(letrec ((loop
(lambda ()
(let ((value (readtable:act table
(port 'read)
acc
port)))
(cond
((eqv? value 'end-of-list) (acc 'to-list))
(else (acc 'push-tail value)
(loop)))))))
(loop)))))
;;; Readtable for a list, generic to proper and improper list
;;; readers.
(define readtable:table-for-list
(lambda (oldtable on-dot)
(readtable:process
oldtable
(list readtable:update %eol (readtable:return 'end-of-list))
(list readtable:update #\.
(lambda (table char acc port)
(let ((entire-identifier (readtable:read-ident
table
char
#f
port)))
(if (equal? entire-identifier ".")
(on-dot table acc port)
entire-identifier)))))))
;;; Read a proper or improper list.
(define readtable:read-list
(lambda (oldtable _ __ port)
(readtable:read-list-loop (readtable:table-for-list
oldtable
readtable:read-improper-cdr)
port)))
(define readtable:read-proper-list
(lambda (table port)
(readtable:read-list-loop (readtable:table-for-list
table
(readtable:error
'read-proper-list
"expected proper list"))
port)))
;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Reader for stuff that start with "#"
;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define readtable:digits
(list #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9))
(define readtable:vector
(lambda (_ __ toplevel port)
(list 'vector (readtable:read-proper-list toplevel port))))
;;; Block comment reader.
;;;
;;; The outermost block comment reader is passed the toplevel reader as
;;; ACC. When the outermost block is finished, it will tail-call ACC.
;;; (It is basically the continuation of the reader.)
;;;
;;; When a nested block comment is found, it is passed #F as ACC, which
;;; it will not call. It will return an unspecified value.
;;;
;;; Since the read tables are not procedures, references to other tables
;;; in the same LETREC declaration must be protected with explicit LAMBDAs.
;;; Macros could make this much easier to read.
(define readtable:block-comment
(letrec ((potential-end
(readtable:process
(readtable:empty/default
(lambda (this char acc port) (readtable:act
loop
char
acc
port)))
(list readtable:update #\#
(lambda (this char acc port)
(if acc
(readtable:next acc #f port))))))
(potential-start
(readtable:process
(readtable:empty/default
(lambda (this char acc port) (readtable:act
loop
char
acc
port)))
(list readtable:update #\|
(lambda (this char acc port)
(readtable:next loop #f port)
(readtable:next loop acc port)))))
(loop
(readtable:process
(readtable:empty/default readtable:skip)
(list readtable:update #\#
(lambda (this char acc port)
(readtable:next potential-start
acc
port)))
(list readtable:update #\|
(lambda (this char acc port)
(readtable:next potential-end
acc
port))))))
loop))
;;; Encapsulate LINKED-LIST object with an additional value for the
;;; toplevel table.
(define linked-list/toplevel:new
(lambda (toplevel)
(let ((ll (linked-list:new)))
(lambda (op . args)
(cond
((eq? op 'toplevel) toplevel)
(else (apply ll op args)))))))
;;; Readtable for the number part of a datum label / reference. A label
;;; looks like "#[NUMBER]=" and a reference looks like "#[NUMBER]#".
(define readtable:datum-label-next
(readtable:process
(readtable:empty/default (readtable:error 'datum-label-next
"invalid datum label/ref"))
(list readtable:update-list readtable:digits readtable:push-char)
(list readtable:update #\=
(lambda (_ __ acc port)
;; All datum labels are saved as strings.
(let ((label (list->string (acc 'to-list))))
;; Datum labels are pairs, one with the label and the other
;; with the data that is to be used. The label is there to
;; detect when a datum label refers to iself: i.e.
;; #0=#0# .
;;
;; The CAR is the label and the CDR is the thing it refers
;; to.
(let ((datum-label (cons label '())))
(port 'set-datum-label! label datum-label)
(let ((next-value (readtable:next (acc 'toplevel)
#f
port)))
;; After reading the part after the datum label, check
;; that the labeled datum is not self a reference to
;; the datum label.
(if (and (pair? next-value)
(equal? (car next-value) label))
(error 'datum-label-next "datum label cannot be itself")
(set-cdr! datum-label next-value))
next-value)))))
(list readtable:update #\#
(lambda (_ __ acc port)
(let ((label (list->string (acc 'to-list))))
(let ((datum-label (port 'get-datum-label label)))
(if (null? datum-label)
(error 'datum-label-next
"unknown reference to datum label" label)
(map:val datum-label))))))))
;;; Reads the next toplevel datum, discards it, and then continues at the
;;; toplevel.
;;;
;;; TODO: The R7RS reader can cause side-effects due to #!FOLD-CASE. This
;;; must be supressed in datum comments. A method could be added to PORT
;;; that saves and restores mutable state (besides stream position).
(define readtable:datum-comment
(lambda (_ __ toplevel port)
(readtable:next toplevel #f port)
(readtable:next toplevel #f port)))
(define readtable:hash
(readtable:process
(readtable:empty/default (readtable:error 'hash "unimplemented"))
(list readtable:update #\| (readtable:jump/next readtable:block-comment))
(list readtable:update #\; readtable:datum-comment)
(list readtable:update-list readtable:digits ; Datum labels
(lambda (_ char toplevel port)
(readtable:act readtable:datum-label-next
char
(linked-list/toplevel:new toplevel)
port)))
(list readtable:update %bol readtable:vector)))
;;; ;;;;;;;;;;;;;;;;
;;; Toplevel reader.
;;; ;;;;;;;;;;;;;;;;
;;; This is defined as a function so that it dynamically loads each
;;; sub-readtable.
(define readtable:top
(lambda ()
(readtable:process
(readtable:empty/default readtable:read-ident)
(list readtable:update-list
readtable:ASCII-whitespace
readtable:skip)
(list readtable:update #f (readtable:return 'eof))
(list readtable:update %bol readtable:read-list)
(list readtable:update %eol (readtable:error 'top "unbalanced list"))
(list readtable:update #\# (readtable:next/old-as-acc
readtable:hash))
(list readtable:update #\;
(readtable:jump-discard readtable:read-to-newline)))))
(define intset:insert (set:insert (set:update integer<=>)))
(define intset:in (set:in integer<=>))
(define uncycle
(lambda (value)
(let ((cntr 0)
(used-counters '())
(pointers '()))
(letrec ((uncycle
(lambda (value)
(cond
((pair? value)
(let ((pair (assq value pointers)))
(if (pair? pair)
(begin
(set! used-counters
(car (intset:insert used-counters (cdr pair))))
(list 'ref (cdr pair)))
(begin
(set! pointers (cons (cons value cntr)
pointers))
(let ((cur-cntr cntr))
(set! cntr (+ 1 cntr))
(let ((returned (cons (uncycle (car value))
(uncycle (cdr value)))))
(if (not (null? (intset:in used-counters cur-cntr)))
(list 'def cur-cntr '= returned)
returned)))))))
((procedure? value)
(let ((type (value 'type)))
(cond
((eq? type 'ident) (value 'value))
(else (vector 'unrepresentable type)))))
(else value)))))
(uncycle value)))))
;;; ;;;;;;;;;;;
;;; Test reader
;;; ;;;;;;;;;;;
(define %list->read
(lambda (seq)
(port->read
(lambda ()
(if (null? seq)
#f
(let ((ch (car seq)))
(set! seq (cdr seq))
ch)))
"test")))
(define read-all
(lambda (str)
(let ((reader (%list->read (string->list str))))
(letrec ((loop
(lambda ()
(if (not (reader 'peek))
#t
(let ((value (readtable:act
(readtable:top) (reader 'read)
#f
reader)))
(display (list "return" (uncycle value)))
(newline)
(loop))))))
(loop)))))
(read-all "x yy zz ; this is a comment\nx call/cc ")
(read-all "(a b c def (ghi j) k )")
(read-all "( a . b )")
(read-all "( a .b . c)")
(read-all "#( a b y)")
(read-all "(x y #| this is a block\n comment\n |# z w)")
(read-all "#( a b #| this is a #| nested block |# comment|# z w)")
(read-all "#(a b #(c #|close#|comment|#|#y))")
(read-all "(this has a #;(call with (current continuation)) datum comment)")
(read-all "#0=(#0# not unix)")
|