2024-09-13 22:48:27 -04:00
|
|
|
(define char-downcase*
|
|
|
|
(lambda (ch)
|
|
|
|
(if (char? ch)
|
|
|
|
(char-downcase ch)
|
|
|
|
ch)))
|
|
|
|
|
|
|
|
(define port->reader
|
|
|
|
(lambda (port)
|
|
|
|
(list (lambda () (read-char port))
|
|
|
|
eof-object?)))
|
|
|
|
|
|
|
|
(define reader/pushback-buffer
|
|
|
|
(lambda (read-function eof?)
|
|
|
|
(let ((pushback-buffer '()))
|
|
|
|
(letrec ((reader
|
|
|
|
(lambda ()
|
|
|
|
(if (null? pushback-buffer)
|
|
|
|
(read-function)
|
|
|
|
(let ((ch (car pushback-buffer)))
|
|
|
|
(set! pushback-buffer (cdr pushback-buffer))
|
|
|
|
ch))))
|
|
|
|
(pushback
|
|
|
|
(lambda (ch)
|
|
|
|
(if (not (eof? ch))
|
|
|
|
(set! pushback-buffer (cons ch pushback-buffer))))))
|
|
|
|
(list pushback reader eof?)))))
|
|
|
|
|
|
|
|
(define reader-words
|
|
|
|
(lambda (part-of-a-word? pushback reader eof?)
|
|
|
|
(letrec ((delimiter?
|
|
|
|
(lambda (ch)
|
|
|
|
(or (eof? ch)
|
|
|
|
(not (part-of-a-word? ch))
|
|
|
|
(eqv? ch #\space))))
|
|
|
|
(read-until-delimiter
|
|
|
|
(lambda (lst)
|
|
|
|
(let ((ch (reader)))
|
|
|
|
(if (delimiter? ch)
|
|
|
|
(begin
|
|
|
|
(pushback ch)
|
|
|
|
(list->string (reverse lst)))
|
|
|
|
(read-until-delimiter (cons ch lst))))))
|
|
|
|
(reader-wrapper
|
|
|
|
(lambda ()
|
|
|
|
(let ((ch (reader)))
|
|
|
|
(cond
|
|
|
|
((not (char? ch)) #f)
|
|
|
|
((char-whitespace? ch) (reader-wrapper))
|
|
|
|
(else (read-until-delimiter (list ch))))))))
|
|
|
|
(list reader-wrapper))))
|
|
|
|
|
|
|
|
(define port->reader-words
|
|
|
|
(lambda (port)
|
|
|
|
(apply reader-words
|
|
|
|
char-alphabetic?
|
|
|
|
(apply reader/pushback-buffer
|
|
|
|
(port->reader port)))))
|
|
|
|
|
|
|
|
(define reader-pairs
|
|
|
|
(lambda (reader)
|
|
|
|
(let ((previous-word #f))
|
|
|
|
(lambda ()
|
|
|
|
(if (not previous-word)
|
|
|
|
(set! previous-word (reader)))
|
|
|
|
(if previous-word
|
|
|
|
(let ((next-word (reader))
|
|
|
|
(previous-word* previous-word))
|
|
|
|
(if next-word
|
|
|
|
(begin
|
|
|
|
(set! previous-word next-word)
|
|
|
|
(cons previous-word* next-word))
|
|
|
|
#f))
|
|
|
|
#f)))))
|
|
|
|
|
|
|
|
(define port->reader-of-pairs
|
|
|
|
(lambda (port)
|
|
|
|
(apply reader-pairs (port->reader-words port))))
|
|
|
|
|
|
|
|
(define dump-pairs
|
|
|
|
(lambda (file)
|
|
|
|
(let ((port (open-input-file file)))
|
|
|
|
(let ((reader (port->reader-of-pairs port)))
|
|
|
|
(letrec ((loop
|
|
|
|
(lambda ()
|
|
|
|
(let ((pair (reader)))
|
|
|
|
(if pair
|
|
|
|
(begin
|
|
|
|
(display pair)
|
|
|
|
(loop)))))))
|
|
|
|
(loop)
|
|
|
|
(newline))))))
|
|
|
|
|
|
|
|
(define collect-occurences
|
|
|
|
(lambda (file)
|
|
|
|
(let ((port (open-input-file file))
|
|
|
|
(table (make-hash-table)))
|
|
|
|
(let ((reader (port->reader-of-pairs port)))
|
|
|
|
(letrec ((handle
|
|
|
|
(lambda (first next)
|
|
|
|
(hash-table-update!
|
|
|
|
table
|
|
|
|
first
|
|
|
|
(lambda (next-table)
|
|
|
|
(hash-table-update!/default
|
|
|
|
next-table
|
|
|
|
next
|
|
|
|
(lambda (count) (+ count 1))
|
|
|
|
1)
|
|
|
|
next-table)
|
|
|
|
(lambda () (make-hash-table)))
|
|
|
|
(looper)))
|
|
|
|
(looper
|
|
|
|
(lambda ()
|
|
|
|
(let ((pair (reader)))
|
|
|
|
(if pair
|
|
|
|
(handle (car pair) (cdr pair)))))))
|
|
|
|
(looper)
|
|
|
|
(close-input-port port)
|
|
|
|
table)))))
|
|
|
|
|
|
|
|
(define display-occurences
|
|
|
|
(lambda (table)
|
|
|
|
(hash-table-walk table
|
|
|
|
(lambda (first next-table)
|
|
|
|
(hash-table-walk next-table
|
|
|
|
(lambda (next count)
|
|
|
|
(display (list first next count))))))
|
|
|
|
(newline)))
|
|
|
|
|
|
|
|
(define occurence-table->probabilities
|
|
|
|
(lambda (next-table)
|
|
|
|
(let ((total (hash-table-fold next-table
|
|
|
|
(lambda (_ val total)
|
|
|
|
(+ val total))
|
|
|
|
0)))
|
|
|
|
(let ((acc
|
|
|
|
(hash-table-fold next-table
|
|
|
|
(lambda (key val acc)
|
|
|
|
(let ((summed-probability (car acc))
|
|
|
|
(alist (cdr acc)))
|
|
|
|
(let ((summed-probability
|
|
|
|
(+ summed-probability
|
|
|
|
(/ val total))))
|
|
|
|
(cons summed-probability
|
|
|
|
(cons (cons key summed-probability)
|
|
|
|
alist)))))
|
|
|
|
(cons 0 '()))))
|
|
|
|
(reverse (cdr acc))))))
|
|
|
|
|
|
|
|
(define occurences->probabilities
|
|
|
|
(lambda (table)
|
|
|
|
(let ((probability-table
|
|
|
|
(hash-table-fold table
|
|
|
|
(lambda (name next-table new-table)
|
|
|
|
(hash-table-set! new-table
|
|
|
|
name
|
|
|
|
(occurence-table->probabilities
|
|
|
|
next-table))
|
|
|
|
new-table)
|
|
|
|
(make-hash-table))))
|
|
|
|
probability-table)))
|
|
|
|
|
|
|
|
(define dump-probabilities
|
|
|
|
(lambda (table)
|
|
|
|
(hash-table-walk table
|
|
|
|
(lambda (name probs)
|
|
|
|
(display (list name probs))))
|
|
|
|
(newline)))
|
|
|
|
|
|
|
|
;;; Lehmer random number generator
|
2024-09-14 11:28:56 -04:00
|
|
|
|
|
|
|
(define %random-seed 10000)
|
|
|
|
|
2024-09-13 22:48:27 -04:00
|
|
|
(define random
|
|
|
|
(let ((m 65537)
|
2024-09-14 11:28:56 -04:00
|
|
|
(a 75))
|
2024-09-13 22:48:27 -04:00
|
|
|
(lambda ()
|
2024-09-14 11:28:56 -04:00
|
|
|
(set! %random-seed (remainder (* a %random-seed)
|
|
|
|
m))
|
|
|
|
(/ %random-seed (- m 1)))))
|
2024-09-13 22:48:27 -04:00
|
|
|
|
|
|
|
(define probability-list->guess
|
|
|
|
(lambda (lst normalized-random-number)
|
|
|
|
(if (null? lst)
|
|
|
|
(error (list "bad number or list" lst normalized-random-number))
|
|
|
|
(let ((name (caar lst))
|
|
|
|
(sum-prob (cdar lst)))
|
|
|
|
(if (< sum-prob normalized-random-number)
|
|
|
|
(probability-list->guess (cdr lst)
|
|
|
|
normalized-random-number)
|
|
|
|
name)))))
|
|
|
|
|
|
|
|
(define generate-next-word
|
|
|
|
(lambda (probabilities previous-word)
|
|
|
|
(probability-list->guess (hash-table-ref probabilities
|
|
|
|
previous-word)
|
|
|
|
(random))))
|
|
|
|
|
|
|
|
(define make-table-from-file
|
|
|
|
(lambda (filename)
|
|
|
|
(occurences->probabilities
|
|
|
|
(collect-occurences filename))))
|
|
|
|
|
|
|
|
(define guess-sentence
|
|
|
|
(lambda (start-word table number)
|
|
|
|
(letrec ((collector
|
|
|
|
(lambda (start-word number collected)
|
|
|
|
(if (<= number 0)
|
|
|
|
(reverse collected)
|
|
|
|
(let ((name (generate-next-word table start-word)))
|
|
|
|
(collector name (- number 1) (cons name
|
|
|
|
collected)))))))
|
|
|
|
(collector start-word number (cons start-word '())))))
|
|
|
|
|