blob: bb6795579d65b9122c14bb5a9c326d2f75b11820 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
|
(define char-downcase*
(lambda (ch)
(if (char? ch)
(char-downcase ch)
ch)))
(define port->reader
(lambda (port)
(list (lambda () (read-char port))
eof-object?)))
(define reader/pushback-buffer
(lambda (read-function eof?)
(let ((pushback-buffer '()))
(letrec ((reader
(lambda ()
(if (null? pushback-buffer)
(read-function)
(let ((ch (car pushback-buffer)))
(set! pushback-buffer (cdr pushback-buffer))
ch))))
(pushback
(lambda (ch)
(if (not (eof? ch))
(set! pushback-buffer (cons ch pushback-buffer))))))
(list pushback reader eof?)))))
(define reader-words
(lambda (part-of-a-word? pushback reader eof?)
(letrec ((delimiter?
(lambda (ch)
(or (eof? ch)
(not (part-of-a-word? ch))
(eqv? ch #\space))))
(read-until-delimiter
(lambda (lst)
(let ((ch (reader)))
(if (delimiter? ch)
(begin
(pushback ch)
(list->string (reverse lst)))
(read-until-delimiter (cons ch lst))))))
(reader-wrapper
(lambda ()
(let ((ch (reader)))
(cond
((not (char? ch)) #f)
((char-whitespace? ch) (reader-wrapper))
(else (read-until-delimiter (list ch))))))))
(list reader-wrapper))))
(define port->reader-words
(lambda (port)
(apply reader-words
char-alphabetic?
(apply reader/pushback-buffer
(port->reader port)))))
(define reader-pairs
(lambda (reader)
(let ((previous-word #f))
(lambda ()
(if (not previous-word)
(set! previous-word (reader)))
(if previous-word
(let ((next-word (reader))
(previous-word* previous-word))
(if next-word
(begin
(set! previous-word next-word)
(cons previous-word* next-word))
#f))
#f)))))
(define port->reader-of-pairs
(lambda (port)
(apply reader-pairs (port->reader-words port))))
(define dump-pairs
(lambda (file)
(let ((port (open-input-file file)))
(let ((reader (port->reader-of-pairs port)))
(letrec ((loop
(lambda ()
(let ((pair (reader)))
(if pair
(begin
(display pair)
(loop)))))))
(loop)
(newline))))))
(define collect-occurences
(lambda (file)
(let ((port (open-input-file file))
(table (make-hash-table)))
(let ((reader (port->reader-of-pairs port)))
(letrec ((handle
(lambda (first next)
(hash-table-update!
table
first
(lambda (next-table)
(hash-table-update!/default
next-table
next
(lambda (count) (+ count 1))
1)
next-table)
(lambda () (make-hash-table)))
(looper)))
(looper
(lambda ()
(let ((pair (reader)))
(if pair
(handle (car pair) (cdr pair)))))))
(looper)
(close-input-port port)
table)))))
(define display-occurences
(lambda (table)
(hash-table-walk table
(lambda (first next-table)
(hash-table-walk next-table
(lambda (next count)
(display (list first next count))))))
(newline)))
(define occurence-table->probabilities
(lambda (next-table)
(let ((total (hash-table-fold next-table
(lambda (_ val total)
(+ val total))
0)))
(let ((acc
(hash-table-fold next-table
(lambda (key val acc)
(let ((summed-probability (car acc))
(alist (cdr acc)))
(let ((summed-probability
(+ summed-probability
(/ val total))))
(cons summed-probability
(cons (cons key summed-probability)
alist)))))
(cons 0 '()))))
(reverse (cdr acc))))))
(define occurences->probabilities
(lambda (table)
(let ((probability-table
(hash-table-fold table
(lambda (name next-table new-table)
(hash-table-set! new-table
name
(occurence-table->probabilities
next-table))
new-table)
(make-hash-table))))
probability-table)))
(define dump-probabilities
(lambda (table)
(hash-table-walk table
(lambda (name probs)
(display (list name probs))))
(newline)))
;;; Lehmer random number generator
(cond-expand
(chicken (import (chicken random))
(define random generate-random-real))
(else (define %random-seed 10000)
(define random
(let ((m 65537)
(a 75))
(lambda ()
(set! %random-seed (remainder (* a %random-seed)
m))
(/ %random-seed (- m 1)))))))
(define probability-list->guess
(lambda (lst normalized-random-number)
(if (null? lst)
(error (list "bad number or list" lst normalized-random-number))
(let ((name (caar lst))
(sum-prob (cdar lst)))
(if (< sum-prob normalized-random-number)
(probability-list->guess (cdr lst)
normalized-random-number)
name)))))
(define generate-next-word
(lambda (probabilities previous-word)
(probability-list->guess (hash-table-ref probabilities
previous-word)
(random))))
(define make-table-from-file
(lambda (filename)
(occurences->probabilities
(collect-occurences filename))))
(define guess-sentence
(lambda (start-word table number)
(letrec ((collector
(lambda (start-word number collected)
(if (<= number 0)
(reverse collected)
(let ((name (generate-next-word table start-word)))
(collector name (- number 1) (cons name
collected)))))))
(collector start-word number (cons start-word '())))))
|