aboutsummaryrefslogtreecommitdiffstats
path: root/zipf-zri.scm
diff options
context:
space:
mode:
authorGravatar Peter McGoron 2025-01-10 16:22:02 -0500
committerGravatar Peter McGoron 2025-01-10 16:22:02 -0500
commitcd78c57222dfe6ad9906b95e6c0353629469c10f (patch)
treef0932a3d2f2be771f4ae6f4f528d6a6eecdb58c0 /zipf-zri.scm
init
Diffstat (limited to 'zipf-zri.scm')
-rw-r--r--zipf-zri.scm193
1 files changed, 193 insertions, 0 deletions
diff --git a/zipf-zri.scm b/zipf-zri.scm
new file mode 100644
index 0000000..9e0f003
--- /dev/null
+++ b/zipf-zri.scm
@@ -0,0 +1,193 @@
+; SPDX-FileCopyrightText: 2020 Arvydas Silanskas
+; SPDX-License-Identifier: MIT
+;
+; zipf-zri.scm
+; Create a Zipf random distribution.
+;
+; Created by Linas Vepstas 10 July 2020
+; Nominated for inclusion in srfi-194
+;
+; Not optimized for speed!
+;
+; Implementation from ZRI algorithm presented in the paper:
+; "Rejection-inversion to generate variates from monotone discrete
+; distributions", Wolfgang Hörmann and Gerhard Derflinger
+; ACM TOMACS 6.3 (1996): 169-184
+;
+; Hörmann and Derflinger use "q" everywhere, when they really mean "s".
+; Thier "q" is not the standard q-series deformation. Its just "s".
+; The notation in the code below differs from the article to reflect
+; conventional usage.
+;
+;------------------------------------------------------------------
+;
+; The Hurwicz zeta distribution 1 / (k+q)^s for 1 <= k <= n integer
+; The Zipf distribution is recovered by setting q=0.
+;
+; The exponent `s` must be a real number not equal to 1.
+; Accuracy is diminished for |1-s|< 1e-6. The accuracy is roughly
+; equal to 1e-15 / |1-s| where 1e-15 == 64-bit double-precision ULP.
+;
+(define (make-zipf-generator/zri n s q)
+
+ ; The hat function h(x) = 1 / (x+q)^s
+ (define (hat x)
+ (expt (+ x q) (- s)))
+
+ (define _1-s (- 1 s))
+ (define oms (/ 1 _1-s))
+
+ ; The integral of hat(x)
+ ; H(x) = (x+q)^{1-s} / (1-s)
+ ; Note that H(x) is always negative.
+ (define (big-h x)
+ (/ (expt (+ q x) _1-s) _1-s))
+
+ ; The inverse function of H(x)
+ ; H^{-1}(y) = -q + (y(1-s))^{1/(1-s)}
+ (define (big-h-inv y)
+ (- (expt (* y _1-s) oms) q))
+
+ ; Lower and upper bounds for the uniform random generator.
+ (define big-h-half (- (big-h 1.5) (hat 1)))
+ (define big-h-n (big-h (+ n 0.5)))
+
+ ; Rejection cut
+ (define cut (- 1 (big-h-inv (- (big-h 1.5) (hat 1)))))
+
+ ; Uniform distribution
+ (define dist (make-random-real-generator big-h-half big-h-n))
+
+ ; Attempt to hit the dartboard. Return #f if we fail,
+ ; otherwise return an integer between 1 and n.
+ (define (try)
+ (define u (dist))
+ (define x (big-h-inv u))
+ (define kflt (floor (+ x 0.5)))
+ (define k (exact kflt))
+ (if (and (< 0 k)
+ (or
+ (<= (- k x) cut)
+ (>= u (- (big-h (+ k 0.5)) (hat k))))) k #f))
+
+ ; Did we hit the dartboard? If not, try again.
+ (define (loop-until)
+ (define k (try))
+ (if k k (loop-until)))
+
+ ; Return the generator.
+ loop-until)
+
+;------------------------------------------------------------------
+;
+; The Hurwicz zeta distribution 1 / (k+q)^s for 1 <= k <= n integer
+; The Zipf distribution is recovered by setting q=0.
+;
+; The exponent `s` must be a real number close to 1.
+; Accuracy is diminished for |1-s|> 2e-4. The accuracy is roughly
+; equal to 0.05 * |1-s|^4 due to exp(1-s) being expanded to 4 terms.
+;
+; This handles the special case of s==1 perfectly.
+(define (make-zipf-generator/one n s q)
+
+ (define _1-s (- 1 s))
+
+ ; The hat function h(x) = 1 / (x+q)^s
+ ; Written for s->1 i.e. 1/(x+q)(x+q)^{s-1}
+ (define (hat x)
+ (define xpq (+ x q))
+ (/ (expt xpq _1-s) xpq))
+
+ ; Expansion of exn(y) = [exp(y(1-s))-1]/(1-s) for s->1
+ ; Expanded to 4th order.
+ ; Should equal this:
+ ;;; (define (exn lg) (/ (- (exp (* _1-s lg)) 1) _1-s))
+ ; but more accurate for s near 1.0
+ (define (exn lg)
+ (define (trm n u lg) (* lg (+ 1 (/ (* _1-s u) n))))
+ (trm 2 (trm 3 (trm 4 1 lg) lg) lg))
+
+ ; Expansion of lg(y) = [log(1 + y(1-s))] / (1-s) for s->1
+ ; Expanded to 4th order.
+ ; Should equal this:
+ ;;; (define (lg y) (/ (log (+ 1 (* y _1-s))) _1-s))
+ ; but more accurate for s near 1.0
+ (define (lg y)
+ (define yms (* y _1-s))
+ (define (trm n u r) (- (/ 1 n) (* u r)))
+ (* y (trm 1 yms (trm 2 yms (trm 3 yms (trm 4 yms 0))))))
+
+ ; The integral of hat(x) defined at s==1
+ ; H(x) = [exp{(1-s) log(x+q)} - 1]/(1-s)
+ ; Should equal this:
+ ;;; (define (big-h x) (/ (- (exp (* _1-s (log (+ q x)))) 1) _1-s))
+ ; but expanded so that it's more accurate for s near 1.0
+ (define (big-h x)
+ (exn (log (+ q x))))
+
+ ; The inverse function of H(x)
+ ; H^{-1}(y) = -q + (1 + y(1-s))^{1/(1-s)}
+ ; Should equal this:
+ ;;; (define (big-h-inv y) (- (expt (+ 1 (* y _1-s)) (/ 1 _1-s)) q ))
+ ; but expanded so that it's more accurate for s near 1.0
+ (define (big-h-inv y)
+ (- (exp (lg y)) q))
+
+ ; Lower and upper bounds for the uniform random generator.
+ (define big-h-half (- (big-h 1.5) (hat 1)))
+
+ (define big-h-n (big-h (+ n 0.5)))
+
+ ; Rejection cut
+ (define cut (- 1 (big-h-inv (- (big-h 1.5) (/ 1 (+ 1 q))))))
+
+ ; Uniform distribution
+ (define dist (make-random-real-generator big-h-half big-h-n))
+
+ ; Attempt to hit the dartboard. Return #f if we fail,
+ ; otherwise return an integer between 1 and n.
+ (define (try)
+ (define u (dist))
+ (define x (big-h-inv u))
+ (define kflt (floor (+ x 0.5)))
+ (define k (exact kflt))
+ (if (and (< 0 k)
+ (or
+ (<= (- k x) cut)
+ (>= u (- (big-h (+ k 0.5)) (hat k))))) k #f))
+
+ ; Did we hit the dartboard? If not, try again.
+ (define (loop-until)
+ (define k (try))
+ (if k k (loop-until)))
+
+ ; Return the generator.
+ loop-until)
+
+;------------------------------------------------------------------
+;
+; (make-zipf-generator n [s [q]])
+;
+; The Hurwicz zeta distribution 1 / (k+q)^s for 1 <= k <= n integer
+; The Zipf distribution is recovered by setting q=0.
+; If `q` is not specified, 0 is assumed.
+; If `s` is not specified, 1 is assumed.
+;
+; Valid for real -10 < s < 100 (otherwise overflows likely)
+; Valid for real -0.5 < q < 2e8 (otherwise overflows likely)
+; Valid for integer 1 <= k < int-max
+;
+; Example usage:
+; (define zgen (make-zipf-generator 50 1.01 0))
+; (generator->list zgen 10)
+;
+(define make-zipf-generator
+ (case-lambda
+ ((n)
+ (make-zipf-generator n 1.0 0.0))
+ ((n s)
+ (make-zipf-generator n s 0.0))
+ ((n s q)
+ (if (< 1e-5 (abs (- 1 s)))
+ (make-zipf-generator/zri n s q)
+ (make-zipf-generator/one n s q)))))