aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Peter McGoron 2024-09-13 22:48:27 -0400
committerGravatar Peter McGoron 2024-09-13 22:48:27 -0400
commit45466ce4c445ce591a9a3ef31a63315728ed2166 (patch)
treecbbb250ec881d0a317b4d6901d8854ef22df3009
markov generate the bible
-rw-r--r--Makefile13
-rw-r--r--README.rst15
-rw-r--r--bible-sqlite.dbbin0 -> 43819008 bytes
-rw-r--r--kjv2delimited.scm30
-rw-r--r--markov.chicken.scm4
-rw-r--r--markov.impl.scm212
-rw-r--r--toplevel.scm26
7 files changed, 300 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..f2ba376
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,13 @@
+.PHONY: run
+
+run: kjv.txt markov.chicken.so markov.import.scm
+ csi toplevel.scm
+
+markov.chicken.scm: markov.impl.scm
+markov.chicken.so markov.import.scm: markov.chicken.scm
+ csc -s -J markov.chicken.scm -O3
+
+kjv.txt:
+ csi -b kjv2delimited.scm
+clean:
+ rm -f kjv.txt markov.chicken.so markov.import.scm
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..5764a8d
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,15 @@
+=============
+Markov Scheme
+=============
+
+Markov chain text generation in R5RS.
+
+The generator should run in any Scheme, but it runs much faster in
+Chicken.
+
+Run ``make`` to compile the generator and enter a toplevel where you can
+use the markov generator to generate bible verses.
+
+License: Apache-2.0. The SQLite database file is GPL-3.0-or-later.
+
+Requires eggs ``srfi-69`` and ``sql-de-lite``.
diff --git a/bible-sqlite.db b/bible-sqlite.db
new file mode 100644
index 0000000..34465fb
--- /dev/null
+++ b/bible-sqlite.db
Binary files differ
diff --git a/kjv2delimited.scm b/kjv2delimited.scm
new file mode 100644
index 0000000..3c62a3d
--- /dev/null
+++ b/kjv2delimited.scm
@@ -0,0 +1,30 @@
+(import sql-de-lite)
+
+(define (!= x y)
+ (not (= x y)))
+
+(with-output-to-file "kjv.txt"
+ (lambda ()
+ (call-with-database "bible-sqlite.db"
+ (lambda (db)
+ (let ((previous-book 0)
+ (previous-chapter 0)
+ (previous-verse 0))
+ (query (for-each-row
+ (lambda (row)
+ (let ((book (car row))
+ (chapter (cadr row))
+ (verse (caddr row))
+ (text (cadddr row)))
+ (when (!= book previous-book)
+ (display "startofbook\n"))
+ (when (!= chapter previous-chapter)
+ (display "startofchapter\n"))
+ (when (!= verse previous-verse)
+ (display "startofverse\n"))
+ (set! previous-book book)
+ (set! previous-chapter chapter)
+ (set! previous-verse verse)
+ (display text)
+ (newline))))
+ (sql db "select b,c,v,t from t_kjv")))))))
diff --git a/markov.chicken.scm b/markov.chicken.scm
new file mode 100644
index 0000000..4f2c6dd
--- /dev/null
+++ b/markov.chicken.scm
@@ -0,0 +1,4 @@
+(module markov
+ (make-table-from-file guess-sentence generate-next-word)
+ (import scheme (chicken base) srfi-69)
+ (include "markov.impl.scm")) \ No newline at end of file
diff --git a/markov.impl.scm b/markov.impl.scm
new file mode 100644
index 0000000..a1c3148
--- /dev/null
+++ b/markov.impl.scm
@@ -0,0 +1,212 @@
+(define char-downcase*
+ (lambda (ch)
+ (if (char? ch)
+ (char-downcase ch)
+ ch)))
+
+(define port->reader
+ (lambda (port)
+ (list (lambda () (read-char port))
+ eof-object?)))
+
+(define reader/pushback-buffer
+ (lambda (read-function eof?)
+ (let ((pushback-buffer '()))
+ (letrec ((reader
+ (lambda ()
+ (if (null? pushback-buffer)
+ (read-function)
+ (let ((ch (car pushback-buffer)))
+ (set! pushback-buffer (cdr pushback-buffer))
+ ch))))
+ (pushback
+ (lambda (ch)
+ (if (not (eof? ch))
+ (set! pushback-buffer (cons ch pushback-buffer))))))
+ (list pushback reader eof?)))))
+
+(define reader-words
+ (lambda (part-of-a-word? pushback reader eof?)
+ (letrec ((delimiter?
+ (lambda (ch)
+ (or (eof? ch)
+ (not (part-of-a-word? ch))
+ (eqv? ch #\space))))
+ (read-until-delimiter
+ (lambda (lst)
+ (let ((ch (reader)))
+ (if (delimiter? ch)
+ (begin
+ (pushback ch)
+ (list->string (reverse lst)))
+ (read-until-delimiter (cons ch lst))))))
+ (reader-wrapper
+ (lambda ()
+ (let ((ch (reader)))
+ (cond
+ ((not (char? ch)) #f)
+ ((char-whitespace? ch) (reader-wrapper))
+ (else (read-until-delimiter (list ch))))))))
+ (list reader-wrapper))))
+
+(define port->reader-words
+ (lambda (port)
+ (apply reader-words
+ char-alphabetic?
+ (apply reader/pushback-buffer
+ (port->reader port)))))
+
+(define reader-pairs
+ (lambda (reader)
+ (let ((previous-word #f))
+ (lambda ()
+ (if (not previous-word)
+ (set! previous-word (reader)))
+ (if previous-word
+ (let ((next-word (reader))
+ (previous-word* previous-word))
+ (if next-word
+ (begin
+ (set! previous-word next-word)
+ (cons previous-word* next-word))
+ #f))
+ #f)))))
+
+(define port->reader-of-pairs
+ (lambda (port)
+ (apply reader-pairs (port->reader-words port))))
+
+(define dump-pairs
+ (lambda (file)
+ (let ((port (open-input-file file)))
+ (let ((reader (port->reader-of-pairs port)))
+ (letrec ((loop
+ (lambda ()
+ (let ((pair (reader)))
+ (if pair
+ (begin
+ (display pair)
+ (loop)))))))
+ (loop)
+ (newline))))))
+
+(define collect-occurences
+ (lambda (file)
+ (let ((port (open-input-file file))
+ (table (make-hash-table)))
+ (let ((reader (port->reader-of-pairs port)))
+ (letrec ((handle
+ (lambda (first next)
+ (hash-table-update!
+ table
+ first
+ (lambda (next-table)
+ (hash-table-update!/default
+ next-table
+ next
+ (lambda (count) (+ count 1))
+ 1)
+ next-table)
+ (lambda () (make-hash-table)))
+ (looper)))
+ (looper
+ (lambda ()
+ (let ((pair (reader)))
+ (if pair
+ (handle (car pair) (cdr pair)))))))
+ (looper)
+ (close-input-port port)
+ table)))))
+
+(define display-occurences
+ (lambda (table)
+ (hash-table-walk table
+ (lambda (first next-table)
+ (hash-table-walk next-table
+ (lambda (next count)
+ (display (list first next count))))))
+ (newline)))
+
+(define occurence-table->probabilities
+ (lambda (next-table)
+ (let ((total (hash-table-fold next-table
+ (lambda (_ val total)
+ (+ val total))
+ 0)))
+ (let ((acc
+ (hash-table-fold next-table
+ (lambda (key val acc)
+ (let ((summed-probability (car acc))
+ (alist (cdr acc)))
+ (let ((summed-probability
+ (+ summed-probability
+ (/ val total))))
+ (cons summed-probability
+ (cons (cons key summed-probability)
+ alist)))))
+ (cons 0 '()))))
+ (reverse (cdr acc))))))
+
+(define occurences->probabilities
+ (lambda (table)
+ (let ((probability-table
+ (hash-table-fold table
+ (lambda (name next-table new-table)
+ (hash-table-set! new-table
+ name
+ (occurence-table->probabilities
+ next-table))
+ new-table)
+ (make-hash-table))))
+ probability-table)))
+
+(define dump-probabilities
+ (lambda (table)
+ (hash-table-walk table
+ (lambda (name probs)
+ (display (list name probs))))
+ (newline)))
+
+;;; Lehmer random number generator
+(define random
+ (let ((m 65537)
+ (a 75)
+ (previous 10000))
+ (lambda ()
+ (set! previous (remainder (* a previous)
+ m))
+ (/ previous (- m 1)))))
+
+(define probability-list->guess
+ (lambda (lst normalized-random-number)
+ (if (null? lst)
+ (error (list "bad number or list" lst normalized-random-number))
+ (let ((name (caar lst))
+ (sum-prob (cdar lst)))
+ (if (< sum-prob normalized-random-number)
+ (probability-list->guess (cdr lst)
+ normalized-random-number)
+ name)))))
+
+(define generate-next-word
+ (lambda (probabilities previous-word)
+ (probability-list->guess (hash-table-ref probabilities
+ previous-word)
+ (random))))
+
+(define make-table-from-file
+ (lambda (filename)
+ (occurences->probabilities
+ (collect-occurences filename))))
+
+(define guess-sentence
+ (lambda (start-word table number)
+ (letrec ((collector
+ (lambda (start-word number collected)
+ (if (<= number 0)
+ (reverse collected)
+ (let ((name (generate-next-word table start-word)))
+ (collector name (- number 1) (cons name
+ collected)))))))
+ (collector start-word number (cons start-word '())))))
+
diff --git a/toplevel.scm b/toplevel.scm
new file mode 100644
index 0000000..ee7bcca
--- /dev/null
+++ b/toplevel.scm
@@ -0,0 +1,26 @@
+(load "markov.chicken.so")
+(import markov)
+
+(display "Generating table (this may take a while)\n" (current-error-port))
+(define table (make-table-from-file "kjv.txt"))
+(display "Done generating.\n")
+
+(define (generate-until-word* current collected limits)
+ (if (member current limits)
+ (reverse collected)
+ (generate-until-word* (generate-next-word table current)
+ (cons current collected)
+ limits)))
+
+(define (generate-until-word limits)
+ (generate-until-word* (generate-next-word table (car limits))
+ '()
+ limits))
+
+(define (make-a-bible-verse)
+ (generate-until-word '("startofverse" "startofchapter" "startofbook")))
+
+(define (make-a-book-of-the-bible)
+ (generate-until-word '("startofbook")))
+
+(display "Try (make-a-bible-verse) and (make-a-book-of-the-bible).\n")