diff options
| author | 2024-09-13 22:48:27 -0400 | |
|---|---|---|
| committer | 2024-09-13 22:48:27 -0400 | |
| commit | 45466ce4c445ce591a9a3ef31a63315728ed2166 (patch) | |
| tree | cbbb250ec881d0a317b4d6901d8854ef22df3009 /markov.impl.scm | |
markov generate the bible
Diffstat (limited to 'markov.impl.scm')
| -rw-r--r-- | markov.impl.scm | 212 |
1 files changed, 212 insertions, 0 deletions
diff --git a/markov.impl.scm b/markov.impl.scm new file mode 100644 index 0000000..a1c3148 --- /dev/null +++ b/markov.impl.scm @@ -0,0 +1,212 @@ +(define char-downcase* + (lambda (ch) + (if (char? ch) + (char-downcase ch) + ch))) + +(define port->reader + (lambda (port) + (list (lambda () (read-char port)) + eof-object?))) + +(define reader/pushback-buffer + (lambda (read-function eof?) + (let ((pushback-buffer '())) + (letrec ((reader + (lambda () + (if (null? pushback-buffer) + (read-function) + (let ((ch (car pushback-buffer))) + (set! pushback-buffer (cdr pushback-buffer)) + ch)))) + (pushback + (lambda (ch) + (if (not (eof? ch)) + (set! pushback-buffer (cons ch pushback-buffer)))))) + (list pushback reader eof?))))) + +(define reader-words + (lambda (part-of-a-word? pushback reader eof?) + (letrec ((delimiter? + (lambda (ch) + (or (eof? ch) + (not (part-of-a-word? ch)) + (eqv? ch #\space)))) + (read-until-delimiter + (lambda (lst) + (let ((ch (reader))) + (if (delimiter? ch) + (begin + (pushback ch) + (list->string (reverse lst))) + (read-until-delimiter (cons ch lst)))))) + (reader-wrapper + (lambda () + (let ((ch (reader))) + (cond + ((not (char? ch)) #f) + ((char-whitespace? ch) (reader-wrapper)) + (else (read-until-delimiter (list ch)))))))) + (list reader-wrapper)))) + +(define port->reader-words + (lambda (port) + (apply reader-words + char-alphabetic? + (apply reader/pushback-buffer + (port->reader port))))) + +(define reader-pairs + (lambda (reader) + (let ((previous-word #f)) + (lambda () + (if (not previous-word) + (set! previous-word (reader))) + (if previous-word + (let ((next-word (reader)) + (previous-word* previous-word)) + (if next-word + (begin + (set! previous-word next-word) + (cons previous-word* next-word)) + #f)) + #f))))) + +(define port->reader-of-pairs + (lambda (port) + (apply reader-pairs (port->reader-words port)))) + +(define dump-pairs + (lambda (file) + (let ((port (open-input-file file))) + (let ((reader (port->reader-of-pairs port))) + (letrec ((loop + (lambda () + (let ((pair (reader))) + (if pair + (begin + (display pair) + (loop))))))) + (loop) + (newline)))))) + +(define collect-occurences + (lambda (file) + (let ((port (open-input-file file)) + (table (make-hash-table))) + (let ((reader (port->reader-of-pairs port))) + (letrec ((handle + (lambda (first next) + (hash-table-update! + table + first + (lambda (next-table) + (hash-table-update!/default + next-table + next + (lambda (count) (+ count 1)) + 1) + next-table) + (lambda () (make-hash-table))) + (looper))) + (looper + (lambda () + (let ((pair (reader))) + (if pair + (handle (car pair) (cdr pair))))))) + (looper) + (close-input-port port) + table))))) + +(define display-occurences + (lambda (table) + (hash-table-walk table + (lambda (first next-table) + (hash-table-walk next-table + (lambda (next count) + (display (list first next count)))))) + (newline))) + +(define occurence-table->probabilities + (lambda (next-table) + (let ((total (hash-table-fold next-table + (lambda (_ val total) + (+ val total)) + 0))) + (let ((acc + (hash-table-fold next-table + (lambda (key val acc) + (let ((summed-probability (car acc)) + (alist (cdr acc))) + (let ((summed-probability + (+ summed-probability + (/ val total)))) + (cons summed-probability + (cons (cons key summed-probability) + alist))))) + (cons 0 '())))) + (reverse (cdr acc)))))) + +(define occurences->probabilities + (lambda (table) + (let ((probability-table + (hash-table-fold table + (lambda (name next-table new-table) + (hash-table-set! new-table + name + (occurence-table->probabilities + next-table)) + new-table) + (make-hash-table)))) + probability-table))) + +(define dump-probabilities + (lambda (table) + (hash-table-walk table + (lambda (name probs) + (display (list name probs)))) + (newline))) + +;;; Lehmer random number generator +(define random + (let ((m 65537) + (a 75) + (previous 10000)) + (lambda () + (set! previous (remainder (* a previous) + m)) + (/ previous (- m 1))))) + +(define probability-list->guess + (lambda (lst normalized-random-number) + (if (null? lst) + (error (list "bad number or list" lst normalized-random-number)) + (let ((name (caar lst)) + (sum-prob (cdar lst))) + (if (< sum-prob normalized-random-number) + (probability-list->guess (cdr lst) + normalized-random-number) + name))))) + +(define generate-next-word + (lambda (probabilities previous-word) + (probability-list->guess (hash-table-ref probabilities + previous-word) + (random)))) + +(define make-table-from-file + (lambda (filename) + (occurences->probabilities + (collect-occurences filename)))) + +(define guess-sentence + (lambda (start-word table number) + (letrec ((collector + (lambda (start-word number collected) + (if (<= number 0) + (reverse collected) + (let ((name (generate-next-word table start-word))) + (collector name (- number 1) (cons name + collected))))))) + (collector start-word number (cons start-word '()))))) + |
