markov-scm/markov.impl.scm

215 lines
7.1 KiB
Scheme

(define char-downcase*
(lambda (ch)
(if (char? ch)
(char-downcase ch)
ch)))
(define port->reader
(lambda (port)
(list (lambda () (read-char port))
eof-object?)))
(define reader/pushback-buffer
(lambda (read-function eof?)
(let ((pushback-buffer '()))
(letrec ((reader
(lambda ()
(if (null? pushback-buffer)
(read-function)
(let ((ch (car pushback-buffer)))
(set! pushback-buffer (cdr pushback-buffer))
ch))))
(pushback
(lambda (ch)
(if (not (eof? ch))
(set! pushback-buffer (cons ch pushback-buffer))))))
(list pushback reader eof?)))))
(define reader-words
(lambda (part-of-a-word? pushback reader eof?)
(letrec ((delimiter?
(lambda (ch)
(or (eof? ch)
(not (part-of-a-word? ch))
(eqv? ch #\space))))
(read-until-delimiter
(lambda (lst)
(let ((ch (reader)))
(if (delimiter? ch)
(begin
(pushback ch)
(list->string (reverse lst)))
(read-until-delimiter (cons ch lst))))))
(reader-wrapper
(lambda ()
(let ((ch (reader)))
(cond
((not (char? ch)) #f)
((char-whitespace? ch) (reader-wrapper))
(else (read-until-delimiter (list ch))))))))
(list reader-wrapper))))
(define port->reader-words
(lambda (port)
(apply reader-words
char-alphabetic?
(apply reader/pushback-buffer
(port->reader port)))))
(define reader-pairs
(lambda (reader)
(let ((previous-word #f))
(lambda ()
(if (not previous-word)
(set! previous-word (reader)))
(if previous-word
(let ((next-word (reader))
(previous-word* previous-word))
(if next-word
(begin
(set! previous-word next-word)
(cons previous-word* next-word))
#f))
#f)))))
(define port->reader-of-pairs
(lambda (port)
(apply reader-pairs (port->reader-words port))))
(define dump-pairs
(lambda (file)
(let ((port (open-input-file file)))
(let ((reader (port->reader-of-pairs port)))
(letrec ((loop
(lambda ()
(let ((pair (reader)))
(if pair
(begin
(display pair)
(loop)))))))
(loop)
(newline))))))
(define collect-occurences
(lambda (file)
(let ((port (open-input-file file))
(table (make-hash-table)))
(let ((reader (port->reader-of-pairs port)))
(letrec ((handle
(lambda (first next)
(hash-table-update!
table
first
(lambda (next-table)
(hash-table-update!/default
next-table
next
(lambda (count) (+ count 1))
1)
next-table)
(lambda () (make-hash-table)))
(looper)))
(looper
(lambda ()
(let ((pair (reader)))
(if pair
(handle (car pair) (cdr pair)))))))
(looper)
(close-input-port port)
table)))))
(define display-occurences
(lambda (table)
(hash-table-walk table
(lambda (first next-table)
(hash-table-walk next-table
(lambda (next count)
(display (list first next count))))))
(newline)))
(define occurence-table->probabilities
(lambda (next-table)
(let ((total (hash-table-fold next-table
(lambda (_ val total)
(+ val total))
0)))
(let ((acc
(hash-table-fold next-table
(lambda (key val acc)
(let ((summed-probability (car acc))
(alist (cdr acc)))
(let ((summed-probability
(+ summed-probability
(/ val total))))
(cons summed-probability
(cons (cons key summed-probability)
alist)))))
(cons 0 '()))))
(reverse (cdr acc))))))
(define occurences->probabilities
(lambda (table)
(let ((probability-table
(hash-table-fold table
(lambda (name next-table new-table)
(hash-table-set! new-table
name
(occurence-table->probabilities
next-table))
new-table)
(make-hash-table))))
probability-table)))
(define dump-probabilities
(lambda (table)
(hash-table-walk table
(lambda (name probs)
(display (list name probs))))
(newline)))
;;; Lehmer random number generator
(define %random-seed 10000)
(define random
(let ((m 65537)
(a 75))
(lambda ()
(set! %random-seed (remainder (* a %random-seed)
m))
(/ %random-seed (- m 1)))))
(define probability-list->guess
(lambda (lst normalized-random-number)
(if (null? lst)
(error (list "bad number or list" lst normalized-random-number))
(let ((name (caar lst))
(sum-prob (cdar lst)))
(if (< sum-prob normalized-random-number)
(probability-list->guess (cdr lst)
normalized-random-number)
name)))))
(define generate-next-word
(lambda (probabilities previous-word)
(probability-list->guess (hash-table-ref probabilities
previous-word)
(random))))
(define make-table-from-file
(lambda (filename)
(occurences->probabilities
(collect-occurences filename))))
(define guess-sentence
(lambda (start-word table number)
(letrec ((collector
(lambda (start-word number collected)
(if (<= number 0)
(reverse collected)
(let ((name (generate-next-word table start-word)))
(collector name (- number 1) (cons name
collected)))))))
(collector start-word number (cons start-word '())))))