markov generate the bible

This commit is contained in:
Peter McGoron 2024-09-13 22:48:27 -04:00
commit 45466ce4c4
7 changed files with 300 additions and 0 deletions

13
Makefile Normal file
View File

@ -0,0 +1,13 @@
.PHONY: run
run: kjv.txt markov.chicken.so markov.import.scm
csi toplevel.scm
markov.chicken.scm: markov.impl.scm
markov.chicken.so markov.import.scm: markov.chicken.scm
csc -s -J markov.chicken.scm -O3
kjv.txt:
csi -b kjv2delimited.scm
clean:
rm -f kjv.txt markov.chicken.so markov.import.scm

15
README.rst Normal file
View File

@ -0,0 +1,15 @@
=============
Markov Scheme
=============
Markov chain text generation in R5RS.
The generator should run in any Scheme, but it runs much faster in
Chicken.
Run ``make`` to compile the generator and enter a toplevel where you can
use the markov generator to generate bible verses.
License: Apache-2.0. The SQLite database file is GPL-3.0-or-later.
Requires eggs ``srfi-69`` and ``sql-de-lite``.

BIN
bible-sqlite.db Normal file

Binary file not shown.

30
kjv2delimited.scm Normal file
View File

@ -0,0 +1,30 @@
(import sql-de-lite)
(define (!= x y)
(not (= x y)))
(with-output-to-file "kjv.txt"
(lambda ()
(call-with-database "bible-sqlite.db"
(lambda (db)
(let ((previous-book 0)
(previous-chapter 0)
(previous-verse 0))
(query (for-each-row
(lambda (row)
(let ((book (car row))
(chapter (cadr row))
(verse (caddr row))
(text (cadddr row)))
(when (!= book previous-book)
(display "startofbook\n"))
(when (!= chapter previous-chapter)
(display "startofchapter\n"))
(when (!= verse previous-verse)
(display "startofverse\n"))
(set! previous-book book)
(set! previous-chapter chapter)
(set! previous-verse verse)
(display text)
(newline))))
(sql db "select b,c,v,t from t_kjv")))))))

4
markov.chicken.scm Normal file
View File

@ -0,0 +1,4 @@
(module markov
(make-table-from-file guess-sentence generate-next-word)
(import scheme (chicken base) srfi-69)
(include "markov.impl.scm"))

212
markov.impl.scm Normal file
View File

@ -0,0 +1,212 @@
(define char-downcase*
(lambda (ch)
(if (char? ch)
(char-downcase ch)
ch)))
(define port->reader
(lambda (port)
(list (lambda () (read-char port))
eof-object?)))
(define reader/pushback-buffer
(lambda (read-function eof?)
(let ((pushback-buffer '()))
(letrec ((reader
(lambda ()
(if (null? pushback-buffer)
(read-function)
(let ((ch (car pushback-buffer)))
(set! pushback-buffer (cdr pushback-buffer))
ch))))
(pushback
(lambda (ch)
(if (not (eof? ch))
(set! pushback-buffer (cons ch pushback-buffer))))))
(list pushback reader eof?)))))
(define reader-words
(lambda (part-of-a-word? pushback reader eof?)
(letrec ((delimiter?
(lambda (ch)
(or (eof? ch)
(not (part-of-a-word? ch))
(eqv? ch #\space))))
(read-until-delimiter
(lambda (lst)
(let ((ch (reader)))
(if (delimiter? ch)
(begin
(pushback ch)
(list->string (reverse lst)))
(read-until-delimiter (cons ch lst))))))
(reader-wrapper
(lambda ()
(let ((ch (reader)))
(cond
((not (char? ch)) #f)
((char-whitespace? ch) (reader-wrapper))
(else (read-until-delimiter (list ch))))))))
(list reader-wrapper))))
(define port->reader-words
(lambda (port)
(apply reader-words
char-alphabetic?
(apply reader/pushback-buffer
(port->reader port)))))
(define reader-pairs
(lambda (reader)
(let ((previous-word #f))
(lambda ()
(if (not previous-word)
(set! previous-word (reader)))
(if previous-word
(let ((next-word (reader))
(previous-word* previous-word))
(if next-word
(begin
(set! previous-word next-word)
(cons previous-word* next-word))
#f))
#f)))))
(define port->reader-of-pairs
(lambda (port)
(apply reader-pairs (port->reader-words port))))
(define dump-pairs
(lambda (file)
(let ((port (open-input-file file)))
(let ((reader (port->reader-of-pairs port)))
(letrec ((loop
(lambda ()
(let ((pair (reader)))
(if pair
(begin
(display pair)
(loop)))))))
(loop)
(newline))))))
(define collect-occurences
(lambda (file)
(let ((port (open-input-file file))
(table (make-hash-table)))
(let ((reader (port->reader-of-pairs port)))
(letrec ((handle
(lambda (first next)
(hash-table-update!
table
first
(lambda (next-table)
(hash-table-update!/default
next-table
next
(lambda (count) (+ count 1))
1)
next-table)
(lambda () (make-hash-table)))
(looper)))
(looper
(lambda ()
(let ((pair (reader)))
(if pair
(handle (car pair) (cdr pair)))))))
(looper)
(close-input-port port)
table)))))
(define display-occurences
(lambda (table)
(hash-table-walk table
(lambda (first next-table)
(hash-table-walk next-table
(lambda (next count)
(display (list first next count))))))
(newline)))
(define occurence-table->probabilities
(lambda (next-table)
(let ((total (hash-table-fold next-table
(lambda (_ val total)
(+ val total))
0)))
(let ((acc
(hash-table-fold next-table
(lambda (key val acc)
(let ((summed-probability (car acc))
(alist (cdr acc)))
(let ((summed-probability
(+ summed-probability
(/ val total))))
(cons summed-probability
(cons (cons key summed-probability)
alist)))))
(cons 0 '()))))
(reverse (cdr acc))))))
(define occurences->probabilities
(lambda (table)
(let ((probability-table
(hash-table-fold table
(lambda (name next-table new-table)
(hash-table-set! new-table
name
(occurence-table->probabilities
next-table))
new-table)
(make-hash-table))))
probability-table)))
(define dump-probabilities
(lambda (table)
(hash-table-walk table
(lambda (name probs)
(display (list name probs))))
(newline)))
;;; Lehmer random number generator
(define random
(let ((m 65537)
(a 75)
(previous 10000))
(lambda ()
(set! previous (remainder (* a previous)
m))
(/ previous (- m 1)))))
(define probability-list->guess
(lambda (lst normalized-random-number)
(if (null? lst)
(error (list "bad number or list" lst normalized-random-number))
(let ((name (caar lst))
(sum-prob (cdar lst)))
(if (< sum-prob normalized-random-number)
(probability-list->guess (cdr lst)
normalized-random-number)
name)))))
(define generate-next-word
(lambda (probabilities previous-word)
(probability-list->guess (hash-table-ref probabilities
previous-word)
(random))))
(define make-table-from-file
(lambda (filename)
(occurences->probabilities
(collect-occurences filename))))
(define guess-sentence
(lambda (start-word table number)
(letrec ((collector
(lambda (start-word number collected)
(if (<= number 0)
(reverse collected)
(let ((name (generate-next-word table start-word)))
(collector name (- number 1) (cons name
collected)))))))
(collector start-word number (cons start-word '())))))

26
toplevel.scm Normal file
View File

@ -0,0 +1,26 @@
(load "markov.chicken.so")
(import markov)
(display "Generating table (this may take a while)\n" (current-error-port))
(define table (make-table-from-file "kjv.txt"))
(display "Done generating.\n")
(define (generate-until-word* current collected limits)
(if (member current limits)
(reverse collected)
(generate-until-word* (generate-next-word table current)
(cons current collected)
limits)))
(define (generate-until-word limits)
(generate-until-word* (generate-next-word table (car limits))
'()
limits))
(define (make-a-bible-verse)
(generate-until-word '("startofverse" "startofchapter" "startofbook")))
(define (make-a-book-of-the-bible)
(generate-until-word '("startofbook")))
(display "Try (make-a-bible-verse) and (make-a-book-of-the-bible).\n")