(define char-downcase* (lambda (ch) (if (char? ch) (char-downcase ch) ch))) (define port->reader (lambda (port) (list (lambda () (read-char port)) eof-object?))) (define reader/pushback-buffer (lambda (read-function eof?) (let ((pushback-buffer '())) (letrec ((reader (lambda () (if (null? pushback-buffer) (read-function) (let ((ch (car pushback-buffer))) (set! pushback-buffer (cdr pushback-buffer)) ch)))) (pushback (lambda (ch) (if (not (eof? ch)) (set! pushback-buffer (cons ch pushback-buffer)))))) (list pushback reader eof?))))) (define reader-words (lambda (part-of-a-word? pushback reader eof?) (letrec ((delimiter? (lambda (ch) (or (eof? ch) (not (part-of-a-word? ch)) (eqv? ch #\space)))) (read-until-delimiter (lambda (lst) (let ((ch (reader))) (if (delimiter? ch) (begin (pushback ch) (list->string (reverse lst))) (read-until-delimiter (cons ch lst)))))) (reader-wrapper (lambda () (let ((ch (reader))) (cond ((not (char? ch)) #f) ((char-whitespace? ch) (reader-wrapper)) (else (read-until-delimiter (list ch)))))))) (list reader-wrapper)))) (define port->reader-words (lambda (port) (apply reader-words char-alphabetic? (apply reader/pushback-buffer (port->reader port))))) (define reader-pairs (lambda (reader) (let ((previous-word #f)) (lambda () (if (not previous-word) (set! previous-word (reader))) (if previous-word (let ((next-word (reader)) (previous-word* previous-word)) (if next-word (begin (set! previous-word next-word) (cons previous-word* next-word)) #f)) #f))))) (define port->reader-of-pairs (lambda (port) (apply reader-pairs (port->reader-words port)))) (define dump-pairs (lambda (file) (let ((port (open-input-file file))) (let ((reader (port->reader-of-pairs port))) (letrec ((loop (lambda () (let ((pair (reader))) (if pair (begin (display pair) (loop))))))) (loop) (newline)))))) (define collect-occurences (lambda (file) (let ((port (open-input-file file)) (table (make-hash-table))) (let ((reader (port->reader-of-pairs port))) (letrec ((handle (lambda (first next) (hash-table-update! table first (lambda (next-table) (hash-table-update!/default next-table next (lambda (count) (+ count 1)) 1) next-table) (lambda () (make-hash-table))) (looper))) (looper (lambda () (let ((pair (reader))) (if pair (handle (car pair) (cdr pair))))))) (looper) (close-input-port port) table))))) (define display-occurences (lambda (table) (hash-table-walk table (lambda (first next-table) (hash-table-walk next-table (lambda (next count) (display (list first next count)))))) (newline))) (define occurence-table->probabilities (lambda (next-table) (let ((total (hash-table-fold next-table (lambda (_ val total) (+ val total)) 0))) (let ((acc (hash-table-fold next-table (lambda (key val acc) (let ((summed-probability (car acc)) (alist (cdr acc))) (let ((summed-probability (+ summed-probability (/ val total)))) (cons summed-probability (cons (cons key summed-probability) alist))))) (cons 0 '())))) (reverse (cdr acc)))))) (define occurences->probabilities (lambda (table) (let ((probability-table (hash-table-fold table (lambda (name next-table new-table) (hash-table-set! new-table name (occurence-table->probabilities next-table)) new-table) (make-hash-table)))) probability-table))) (define dump-probabilities (lambda (table) (hash-table-walk table (lambda (name probs) (display (list name probs)))) (newline))) ;;; Lehmer random number generator (define random (let ((m 65537) (a 75) (previous 10000)) (lambda () (set! previous (remainder (* a previous) m)) (/ previous (- m 1))))) (define probability-list->guess (lambda (lst normalized-random-number) (if (null? lst) (error (list "bad number or list" lst normalized-random-number)) (let ((name (caar lst)) (sum-prob (cdar lst))) (if (< sum-prob normalized-random-number) (probability-list->guess (cdr lst) normalized-random-number) name))))) (define generate-next-word (lambda (probabilities previous-word) (probability-list->guess (hash-table-ref probabilities previous-word) (random)))) (define make-table-from-file (lambda (filename) (occurences->probabilities (collect-occurences filename)))) (define guess-sentence (lambda (start-word table number) (letrec ((collector (lambda (start-word number collected) (if (<= number 0) (reverse collected) (let ((name (generate-next-word table start-word))) (collector name (- number 1) (cons name collected))))))) (collector start-word number (cons start-word '())))))