aboutsummaryrefslogtreecommitdiffstats
path: root/zipf-zri.scm
blob: 5bd4c7f96ebd4a43b216c1b3c4f774f328d444b7 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
; SPDX-FileCopyrightText: 2020 Arvydas Silanskas
; SPDX-FileCopyrightText: 2024 Peter McGoron
; SPDX-License-Identifier: MIT
;
; zipf-zri.scm
; Create a Zipf random distribution.
;
; Created by Linas Vepstas 10 July 2020
; Nominated for inclusion in srfi-194
;
; Not optimized for speed!
;
; Implementation from ZRI algorithm presented in the paper:
; "Rejection-inversion to generate variates from monotone discrete
; distributions", Wolfgang Hörmann and Gerhard Derflinger
; ACM TOMACS 6.3 (1996): 169-184
;
; Hörmann and Derflinger use "q" everywhere, when they really mean "s".
; Thier "q" is not the standard q-series deformation. Its just "s".
; The notation in the code below differs from the article to reflect
; conventional usage.
;
;------------------------------------------------------------------
;
; The Hurwicz zeta distribution 1 / (k+q)^s for 1 <= k <= n integer
; The Zipf distribution is recovered by setting q=0.
;
; The exponent `s` must be a real number not equal to 1.
; Accuracy is diminished for |1-s|< 1e-6. The accuracy is roughly
; equal to 1e-15 / |1-s| where 1e-15 == 64-bit double-precision ULP.
;

(: make-zipf-generator/zri (integer number number -> (-> number)))
(define (make-zipf-generator/zri n s q)

  ; The hat function h(x) = 1 / (x+q)^s
  (define (hat x)
    (expt (+ x q) (- s)))

  (define _1-s (- 1 s))
  (define oms (/ 1 _1-s))

  ; The integral of hat(x)
  ; H(x) = (x+q)^{1-s} / (1-s)
  ; Note that H(x) is always negative.
  (define (big-h x)
    (/ (expt (+ q x) _1-s) _1-s))

  ; The inverse function of H(x)
  ; H^{-1}(y) = -q + (y(1-s))^{1/(1-s)}
  (define (big-h-inv y)
    (- (expt (* y _1-s) oms) q))

  ; Lower and upper bounds for the uniform random generator.
  (define big-h-half (- (big-h 1.5) (hat 1)))
  (define big-h-n (big-h (+ n 0.5)))

  ; Rejection cut
  (define cut (- 1 (big-h-inv (- (big-h 1.5) (hat 1)))))

  ; Uniform distribution
  (define dist (make-random-real-generator big-h-half big-h-n))

  ; Attempt to hit the dartboard. Return #f if we fail,
  ; otherwise return an integer between 1 and n.
  (define (try)
    (define u (dist))
    (define x (big-h-inv u))
    (define kflt (floor (+ x 0.5)))
    (define k (exact kflt))
    (if (and (< 0 k)
             (or
               (<= (- k x) cut)
               (>= u (- (big-h (+ k 0.5)) (hat k))))) k #f))

  ; Did we hit the dartboard? If not, try again.
  (define (loop-until)
    (define k (try))
    (if k k (loop-until)))

  ; Return the generator.
  loop-until)

;------------------------------------------------------------------
;
; The Hurwicz zeta distribution 1 / (k+q)^s for 1 <= k <= n integer
; The Zipf distribution is recovered by setting q=0.
;
; The exponent `s` must be a real number close to 1.
; Accuracy is diminished for |1-s|> 2e-4. The accuracy is roughly
; equal to 0.05 * |1-s|^4 due to exp(1-s) being expanded to 4 terms.
;
; This handles the special case of s==1 perfectly.

(: make-zipf-generator/one (integer number number -> (-> number)))
(define (make-zipf-generator/one n s q)

  (define _1-s (- 1 s))

  ; The hat function h(x) = 1 / (x+q)^s
  ; Written for s->1 i.e. 1/(x+q)(x+q)^{s-1}
  (define (hat x)
    (define xpq (+ x q))
    (/ (expt xpq _1-s) xpq))

  ; Expansion of exn(y) = [exp(y(1-s))-1]/(1-s) for s->1
  ; Expanded to 4th order.
  ; Should equal this:
  ;;; (define (exn lg) (/ (- (exp (* _1-s lg)) 1) _1-s))
  ; but more accurate for s near 1.0
  (define (exn lg)
    (define (trm n u lg) (* lg (+ 1 (/ (* _1-s u) n))))
    (trm 2 (trm 3 (trm 4 1 lg) lg) lg))

  ; Expansion of lg(y) = [log(1 + y(1-s))] / (1-s) for s->1
  ; Expanded to 4th order.
  ; Should equal this:
  ;;; (define (lg y) (/ (log (+ 1 (* y _1-s))) _1-s))
  ; but more accurate for s near 1.0
  (define (lg y)
    (define yms (* y _1-s))
    (define (trm n u r) (- (/ 1 n) (* u r)))
    (* y (trm 1 yms (trm 2 yms (trm 3 yms (trm 4 yms 0))))))

  ; The integral of hat(x) defined at s==1
  ; H(x) = [exp{(1-s) log(x+q)} - 1]/(1-s)
  ; Should equal this:
  ;;;  (define (big-h x) (/ (- (exp (* _1-s (log (+ q x)))) 1)  _1-s))
  ; but expanded so that it's more accurate for s near 1.0
  (define (big-h x)
    (exn (log (+ q x))))

  ; The inverse function of H(x)
  ; H^{-1}(y) = -q + (1 + y(1-s))^{1/(1-s)}
  ; Should equal this:
  ;;; (define (big-h-inv y) (- (expt (+ 1 (* y _1-s)) (/ 1 _1-s)) q ))
  ; but expanded so that it's more accurate for s near 1.0
  (define (big-h-inv y)
    (- (exp (lg y)) q))

  ; Lower and upper bounds for the uniform random generator.
  (define big-h-half (- (big-h 1.5) (hat 1)))

  (define big-h-n (big-h (+ n 0.5)))

  ; Rejection cut
  (define cut (- 1 (big-h-inv (- (big-h 1.5) (/ 1 (+ 1 q))))))

  ; Uniform distribution
  (define dist (make-random-real-generator big-h-half big-h-n))

  ; Attempt to hit the dartboard. Return #f if we fail,
  ; otherwise return an integer between 1 and n.
  (define (try)
    (define u (dist))
    (define x (big-h-inv u))
    (define kflt (floor (+ x 0.5)))
    (define k (exact kflt))
    (if (and (< 0 k)
             (or
               (<= (- k x) cut)
               (>= u (- (big-h (+ k 0.5)) (hat k))))) k #f))

  ; Did we hit the dartboard? If not, try again.
  (define (loop-until)
    (define k (try))
    (if k k (loop-until)))

  ; Return the generator.
  loop-until)

;------------------------------------------------------------------
;
; (make-zipf-generator n [s [q]])
;
; The Hurwicz zeta distribution 1 / (k+q)^s for 1 <= k <= n integer
; The Zipf distribution is recovered by setting q=0.
; If `q` is not specified, 0 is assumed.
; If `s` is not specified, 1 is assumed.
;
; Valid for real -10 < s < 100 (otherwise overflows likely)
; Valid for real -0.5 < q < 2e8 (otherwise overflows likely)
; Valid for integer 1 <= k < int-max
;
; Example usage:
;    (define zgen (make-zipf-generator 50 1.01 0))
;    (generator->list zgen 10)

(: make-zipf-generator
   (integer #!optional number number -> (-> number)))
(define make-zipf-generator
  (case-lambda
    ((n)
     (make-zipf-generator n 1.0 0.0))
    ((n s)
     (make-zipf-generator n s 0.0))
    ((n s q)
     (if (< 1e-5 (abs (- 1 s)))
         (make-zipf-generator/zri n s q)
         (make-zipf-generator/one n s q)))))