aboutsummaryrefslogtreecommitdiffstats
path: root/markov.impl.scm
diff options
context:
space:
mode:
authorGravatar Peter McGoron 2024-09-13 22:48:27 -0400
committerGravatar Peter McGoron 2024-09-13 22:48:27 -0400
commit45466ce4c445ce591a9a3ef31a63315728ed2166 (patch)
treecbbb250ec881d0a317b4d6901d8854ef22df3009 /markov.impl.scm
markov generate the bible
Diffstat (limited to 'markov.impl.scm')
-rw-r--r--markov.impl.scm212
1 files changed, 212 insertions, 0 deletions
diff --git a/markov.impl.scm b/markov.impl.scm
new file mode 100644
index 0000000..a1c3148
--- /dev/null
+++ b/markov.impl.scm
@@ -0,0 +1,212 @@
+(define char-downcase*
+ (lambda (ch)
+ (if (char? ch)
+ (char-downcase ch)
+ ch)))
+
+(define port->reader
+ (lambda (port)
+ (list (lambda () (read-char port))
+ eof-object?)))
+
+(define reader/pushback-buffer
+ (lambda (read-function eof?)
+ (let ((pushback-buffer '()))
+ (letrec ((reader
+ (lambda ()
+ (if (null? pushback-buffer)
+ (read-function)
+ (let ((ch (car pushback-buffer)))
+ (set! pushback-buffer (cdr pushback-buffer))
+ ch))))
+ (pushback
+ (lambda (ch)
+ (if (not (eof? ch))
+ (set! pushback-buffer (cons ch pushback-buffer))))))
+ (list pushback reader eof?)))))
+
+(define reader-words
+ (lambda (part-of-a-word? pushback reader eof?)
+ (letrec ((delimiter?
+ (lambda (ch)
+ (or (eof? ch)
+ (not (part-of-a-word? ch))
+ (eqv? ch #\space))))
+ (read-until-delimiter
+ (lambda (lst)
+ (let ((ch (reader)))
+ (if (delimiter? ch)
+ (begin
+ (pushback ch)
+ (list->string (reverse lst)))
+ (read-until-delimiter (cons ch lst))))))
+ (reader-wrapper
+ (lambda ()
+ (let ((ch (reader)))
+ (cond
+ ((not (char? ch)) #f)
+ ((char-whitespace? ch) (reader-wrapper))
+ (else (read-until-delimiter (list ch))))))))
+ (list reader-wrapper))))
+
+(define port->reader-words
+ (lambda (port)
+ (apply reader-words
+ char-alphabetic?
+ (apply reader/pushback-buffer
+ (port->reader port)))))
+
+(define reader-pairs
+ (lambda (reader)
+ (let ((previous-word #f))
+ (lambda ()
+ (if (not previous-word)
+ (set! previous-word (reader)))
+ (if previous-word
+ (let ((next-word (reader))
+ (previous-word* previous-word))
+ (if next-word
+ (begin
+ (set! previous-word next-word)
+ (cons previous-word* next-word))
+ #f))
+ #f)))))
+
+(define port->reader-of-pairs
+ (lambda (port)
+ (apply reader-pairs (port->reader-words port))))
+
+(define dump-pairs
+ (lambda (file)
+ (let ((port (open-input-file file)))
+ (let ((reader (port->reader-of-pairs port)))
+ (letrec ((loop
+ (lambda ()
+ (let ((pair (reader)))
+ (if pair
+ (begin
+ (display pair)
+ (loop)))))))
+ (loop)
+ (newline))))))
+
+(define collect-occurences
+ (lambda (file)
+ (let ((port (open-input-file file))
+ (table (make-hash-table)))
+ (let ((reader (port->reader-of-pairs port)))
+ (letrec ((handle
+ (lambda (first next)
+ (hash-table-update!
+ table
+ first
+ (lambda (next-table)
+ (hash-table-update!/default
+ next-table
+ next
+ (lambda (count) (+ count 1))
+ 1)
+ next-table)
+ (lambda () (make-hash-table)))
+ (looper)))
+ (looper
+ (lambda ()
+ (let ((pair (reader)))
+ (if pair
+ (handle (car pair) (cdr pair)))))))
+ (looper)
+ (close-input-port port)
+ table)))))
+
+(define display-occurences
+ (lambda (table)
+ (hash-table-walk table
+ (lambda (first next-table)
+ (hash-table-walk next-table
+ (lambda (next count)
+ (display (list first next count))))))
+ (newline)))
+
+(define occurence-table->probabilities
+ (lambda (next-table)
+ (let ((total (hash-table-fold next-table
+ (lambda (_ val total)
+ (+ val total))
+ 0)))
+ (let ((acc
+ (hash-table-fold next-table
+ (lambda (key val acc)
+ (let ((summed-probability (car acc))
+ (alist (cdr acc)))
+ (let ((summed-probability
+ (+ summed-probability
+ (/ val total))))
+ (cons summed-probability
+ (cons (cons key summed-probability)
+ alist)))))
+ (cons 0 '()))))
+ (reverse (cdr acc))))))
+
+(define occurences->probabilities
+ (lambda (table)
+ (let ((probability-table
+ (hash-table-fold table
+ (lambda (name next-table new-table)
+ (hash-table-set! new-table
+ name
+ (occurence-table->probabilities
+ next-table))
+ new-table)
+ (make-hash-table))))
+ probability-table)))
+
+(define dump-probabilities
+ (lambda (table)
+ (hash-table-walk table
+ (lambda (name probs)
+ (display (list name probs))))
+ (newline)))
+
+;;; Lehmer random number generator
+(define random
+ (let ((m 65537)
+ (a 75)
+ (previous 10000))
+ (lambda ()
+ (set! previous (remainder (* a previous)
+ m))
+ (/ previous (- m 1)))))
+
+(define probability-list->guess
+ (lambda (lst normalized-random-number)
+ (if (null? lst)
+ (error (list "bad number or list" lst normalized-random-number))
+ (let ((name (caar lst))
+ (sum-prob (cdar lst)))
+ (if (< sum-prob normalized-random-number)
+ (probability-list->guess (cdr lst)
+ normalized-random-number)
+ name)))))
+
+(define generate-next-word
+ (lambda (probabilities previous-word)
+ (probability-list->guess (hash-table-ref probabilities
+ previous-word)
+ (random))))
+
+(define make-table-from-file
+ (lambda (filename)
+ (occurences->probabilities
+ (collect-occurences filename))))
+
+(define guess-sentence
+ (lambda (start-word table number)
+ (letrec ((collector
+ (lambda (start-word number collected)
+ (if (<= number 0)
+ (reverse collected)
+ (let ((name (generate-next-word table start-word)))
+ (collector name (- number 1) (cons name
+ collected)))))))
+ (collector start-word number (cons start-word '())))))
+