commit 45466ce4c445ce591a9a3ef31a63315728ed2166 Author: Peter McGoron Date: Fri Sep 13 22:48:27 2024 -0400 markov generate the bible diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f2ba376 --- /dev/null +++ b/Makefile @@ -0,0 +1,13 @@ +.PHONY: run + +run: kjv.txt markov.chicken.so markov.import.scm + csi toplevel.scm + +markov.chicken.scm: markov.impl.scm +markov.chicken.so markov.import.scm: markov.chicken.scm + csc -s -J markov.chicken.scm -O3 + +kjv.txt: + csi -b kjv2delimited.scm +clean: + rm -f kjv.txt markov.chicken.so markov.import.scm diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..5764a8d --- /dev/null +++ b/README.rst @@ -0,0 +1,15 @@ +============= +Markov Scheme +============= + +Markov chain text generation in R5RS. + +The generator should run in any Scheme, but it runs much faster in +Chicken. + +Run ``make`` to compile the generator and enter a toplevel where you can +use the markov generator to generate bible verses. + +License: Apache-2.0. The SQLite database file is GPL-3.0-or-later. + +Requires eggs ``srfi-69`` and ``sql-de-lite``. diff --git a/bible-sqlite.db b/bible-sqlite.db new file mode 100644 index 0000000..34465fb Binary files /dev/null and b/bible-sqlite.db differ diff --git a/kjv2delimited.scm b/kjv2delimited.scm new file mode 100644 index 0000000..3c62a3d --- /dev/null +++ b/kjv2delimited.scm @@ -0,0 +1,30 @@ +(import sql-de-lite) + +(define (!= x y) + (not (= x y))) + +(with-output-to-file "kjv.txt" + (lambda () + (call-with-database "bible-sqlite.db" + (lambda (db) + (let ((previous-book 0) + (previous-chapter 0) + (previous-verse 0)) + (query (for-each-row + (lambda (row) + (let ((book (car row)) + (chapter (cadr row)) + (verse (caddr row)) + (text (cadddr row))) + (when (!= book previous-book) + (display "startofbook\n")) + (when (!= chapter previous-chapter) + (display "startofchapter\n")) + (when (!= verse previous-verse) + (display "startofverse\n")) + (set! previous-book book) + (set! previous-chapter chapter) + (set! previous-verse verse) + (display text) + (newline)))) + (sql db "select b,c,v,t from t_kjv"))))))) diff --git a/markov.chicken.scm b/markov.chicken.scm new file mode 100644 index 0000000..4f2c6dd --- /dev/null +++ b/markov.chicken.scm @@ -0,0 +1,4 @@ +(module markov + (make-table-from-file guess-sentence generate-next-word) + (import scheme (chicken base) srfi-69) + (include "markov.impl.scm")) \ No newline at end of file diff --git a/markov.impl.scm b/markov.impl.scm new file mode 100644 index 0000000..a1c3148 --- /dev/null +++ b/markov.impl.scm @@ -0,0 +1,212 @@ +(define char-downcase* + (lambda (ch) + (if (char? ch) + (char-downcase ch) + ch))) + +(define port->reader + (lambda (port) + (list (lambda () (read-char port)) + eof-object?))) + +(define reader/pushback-buffer + (lambda (read-function eof?) + (let ((pushback-buffer '())) + (letrec ((reader + (lambda () + (if (null? pushback-buffer) + (read-function) + (let ((ch (car pushback-buffer))) + (set! pushback-buffer (cdr pushback-buffer)) + ch)))) + (pushback + (lambda (ch) + (if (not (eof? ch)) + (set! pushback-buffer (cons ch pushback-buffer)))))) + (list pushback reader eof?))))) + +(define reader-words + (lambda (part-of-a-word? pushback reader eof?) + (letrec ((delimiter? + (lambda (ch) + (or (eof? ch) + (not (part-of-a-word? ch)) + (eqv? ch #\space)))) + (read-until-delimiter + (lambda (lst) + (let ((ch (reader))) + (if (delimiter? ch) + (begin + (pushback ch) + (list->string (reverse lst))) + (read-until-delimiter (cons ch lst)))))) + (reader-wrapper + (lambda () + (let ((ch (reader))) + (cond + ((not (char? ch)) #f) + ((char-whitespace? ch) (reader-wrapper)) + (else (read-until-delimiter (list ch)))))))) + (list reader-wrapper)))) + +(define port->reader-words + (lambda (port) + (apply reader-words + char-alphabetic? + (apply reader/pushback-buffer + (port->reader port))))) + +(define reader-pairs + (lambda (reader) + (let ((previous-word #f)) + (lambda () + (if (not previous-word) + (set! previous-word (reader))) + (if previous-word + (let ((next-word (reader)) + (previous-word* previous-word)) + (if next-word + (begin + (set! previous-word next-word) + (cons previous-word* next-word)) + #f)) + #f))))) + +(define port->reader-of-pairs + (lambda (port) + (apply reader-pairs (port->reader-words port)))) + +(define dump-pairs + (lambda (file) + (let ((port (open-input-file file))) + (let ((reader (port->reader-of-pairs port))) + (letrec ((loop + (lambda () + (let ((pair (reader))) + (if pair + (begin + (display pair) + (loop))))))) + (loop) + (newline)))))) + +(define collect-occurences + (lambda (file) + (let ((port (open-input-file file)) + (table (make-hash-table))) + (let ((reader (port->reader-of-pairs port))) + (letrec ((handle + (lambda (first next) + (hash-table-update! + table + first + (lambda (next-table) + (hash-table-update!/default + next-table + next + (lambda (count) (+ count 1)) + 1) + next-table) + (lambda () (make-hash-table))) + (looper))) + (looper + (lambda () + (let ((pair (reader))) + (if pair + (handle (car pair) (cdr pair))))))) + (looper) + (close-input-port port) + table))))) + +(define display-occurences + (lambda (table) + (hash-table-walk table + (lambda (first next-table) + (hash-table-walk next-table + (lambda (next count) + (display (list first next count)))))) + (newline))) + +(define occurence-table->probabilities + (lambda (next-table) + (let ((total (hash-table-fold next-table + (lambda (_ val total) + (+ val total)) + 0))) + (let ((acc + (hash-table-fold next-table + (lambda (key val acc) + (let ((summed-probability (car acc)) + (alist (cdr acc))) + (let ((summed-probability + (+ summed-probability + (/ val total)))) + (cons summed-probability + (cons (cons key summed-probability) + alist))))) + (cons 0 '())))) + (reverse (cdr acc)))))) + +(define occurences->probabilities + (lambda (table) + (let ((probability-table + (hash-table-fold table + (lambda (name next-table new-table) + (hash-table-set! new-table + name + (occurence-table->probabilities + next-table)) + new-table) + (make-hash-table)))) + probability-table))) + +(define dump-probabilities + (lambda (table) + (hash-table-walk table + (lambda (name probs) + (display (list name probs)))) + (newline))) + +;;; Lehmer random number generator +(define random + (let ((m 65537) + (a 75) + (previous 10000)) + (lambda () + (set! previous (remainder (* a previous) + m)) + (/ previous (- m 1))))) + +(define probability-list->guess + (lambda (lst normalized-random-number) + (if (null? lst) + (error (list "bad number or list" lst normalized-random-number)) + (let ((name (caar lst)) + (sum-prob (cdar lst))) + (if (< sum-prob normalized-random-number) + (probability-list->guess (cdr lst) + normalized-random-number) + name))))) + +(define generate-next-word + (lambda (probabilities previous-word) + (probability-list->guess (hash-table-ref probabilities + previous-word) + (random)))) + +(define make-table-from-file + (lambda (filename) + (occurences->probabilities + (collect-occurences filename)))) + +(define guess-sentence + (lambda (start-word table number) + (letrec ((collector + (lambda (start-word number collected) + (if (<= number 0) + (reverse collected) + (let ((name (generate-next-word table start-word))) + (collector name (- number 1) (cons name + collected))))))) + (collector start-word number (cons start-word '()))))) + diff --git a/toplevel.scm b/toplevel.scm new file mode 100644 index 0000000..ee7bcca --- /dev/null +++ b/toplevel.scm @@ -0,0 +1,26 @@ +(load "markov.chicken.so") +(import markov) + +(display "Generating table (this may take a while)\n" (current-error-port)) +(define table (make-table-from-file "kjv.txt")) +(display "Done generating.\n") + +(define (generate-until-word* current collected limits) + (if (member current limits) + (reverse collected) + (generate-until-word* (generate-next-word table current) + (cons current collected) + limits))) + +(define (generate-until-word limits) + (generate-until-word* (generate-next-word table (car limits)) + '() + limits)) + +(define (make-a-bible-verse) + (generate-until-word '("startofverse" "startofchapter" "startofbook"))) + +(define (make-a-book-of-the-bible) + (generate-until-word '("startofbook"))) + +(display "Try (make-a-bible-verse) and (make-a-book-of-the-bible).\n")