1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
type bin = {
center : float;
count : int
}
type histogram = {
max_bins : int;
num_bins : int;
bins : bin list;
range : (float * float) option;
total_count : int;
}
let max_bins h =
h.max_bins
let num_bins h =
h.num_bins
let bins h =
h.bins
let range h =
h.range
let total_count h =
h.total_count
let rec insert value = function
| [] -> [{ center = value ; count = 1 }], true
| h :: t ->
if value < h.center then
{ center = value ; count = 1 } :: h :: t, true
else if value = h.center then
{ h with count = h.count + 1 } :: t, false
else
let t, num_bins_is_incr = insert value t in
h :: t, num_bins_is_incr
let rec min_diff_index i index min_diff = function
| a :: b :: t ->
let diff = b.center -. a.center in
assert ( diff > 0. );
if diff < min_diff then
min_diff_index (i+1) i diff (b :: t)
else
min_diff_index (i+1) index min_diff (b :: t)
| [ _ ] -> index
| [] -> assert false
let min_diff_index = function
| a :: b :: t ->
let diff = b.center -. a.center in
assert ( diff > 0. );
min_diff_index 1 0 diff (b :: t)
| [ _ ]
| [] -> assert false
let merge_bins lo hi =
assert (lo.center < hi.center);
let sum_count = lo.count + hi.count in
let center =
(lo.center *. (float lo.count) +. hi.center *. (float hi.count)) /.
(float sum_count)
in
{ center; count = sum_count }
let merge_bins_at_index =
let rec loop i index = function
| a :: b :: t ->
if i = index then
let bin = merge_bins a b in
bin :: t
else
a :: (loop (i + 1) index (b :: t))
| [ _ ]
| [] -> assert false
in
fun index bins ->
loop 0 index bins
let create max_bins =
if max_bins < 2 then
raise (Invalid_argument (Printf.sprintf "max_bins: %d" max_bins))
else {
max_bins;
num_bins = 0;
bins = [];
total_count = 0;
range = None
}
let add value histogram =
let range =
match histogram.range with
| Some (mn, mx) -> Some (min value mn, max value mx)
| None -> Some (value, value)
in
let total_count = histogram.total_count + 1 in
let bins, is_augmented = insert value histogram.bins in
if histogram.num_bins = histogram.max_bins then
if is_augmented then
let index = min_diff_index bins in
let bins = merge_bins_at_index index bins in
{ histogram with bins; range; total_count }
else
{ histogram with bins; range; total_count }
else
if is_augmented then
{ histogram with bins; range; total_count;
num_bins = histogram.num_bins + 1; }
else
{ histogram with bins; range; total_count }
let rec binary_merge a b =
match a, b with
| a_h :: a_t, b_h :: b_t ->
if a_h.center < b_h.center then
a_h :: (binary_merge a_t b)
else if a_h.center > b_h.center then
b_h :: (binary_merge a b_t)
else
let merged = { a_h with count = a_h.count + b_h.count } in
merged :: (binary_merge a_t b_t)
| [], _ :: _ -> b
| _ :: _, [] -> a
| [], [] -> []
let rec k_ary_merge_half accu = function
| a :: b :: t ->
let ab = binary_merge a b in
k_ary_merge_half (ab :: accu) t
| [a] -> (a :: accu)
| [] -> accu
let rec k_ary_merge t =
match k_ary_merge_half [] t with
| [a] -> a
| t -> k_ary_merge t
let rec reduce bins ~num_bins ~max_bins =
if num_bins > max_bins then
let index = min_diff_index bins in
let bins = merge_bins_at_index index bins in
reduce bins ~num_bins:(num_bins - 1) ~max_bins
else
bins
let merge h_list max_bins =
let bins, _, total_count, range = List.fold_left
(fun (t_bins, t_num_bins, t_total_count, t_range)
{ bins; num_bins; total_count; range; _} ->
let t_range =
match t_range, range with
| Some (t_mn, t_mx), Some (mn, mx) ->
Some ((min t_mn mn), (max t_mx mx))
| None, Some _ -> range
| Some _, None -> t_range
| None, None -> None
in
bins :: t_bins,
t_num_bins + num_bins,
t_total_count + total_count,
t_range
) ([], 0, 0, None) h_list in
let merged_bins = k_ary_merge bins in
let num_bins = List.length merged_bins in
let bins = reduce merged_bins ~num_bins ~max_bins in
let num_bins = List.length bins in
{ bins;
num_bins;
max_bins;
total_count;
range }
let addc value count hist =
let singleton = {
bins = [{ center = value ; count }];
total_count = count;
range = Some (value, value);
num_bins = 1;
max_bins = 1;
} in
merge [hist; singleton] hist.max_bins
let pos_quadratic_root ~a ~b ~c =
if a = 0.0 then
-.c /. b
else
let discriminant = b *. b -. 4. *. a *. c in
((sqrt discriminant) -. b) /. (2. *. a)
exception Empty
let sum =
let rec find_i b i sum = function
| ({ center = p_i; count = m_i } as bin_i) ::
({ center = p_i1; _ } as bin_i1) :: t ->
if p_i <= b && b < p_i1 then
bin_i, bin_i1, sum
else
find_i b (i+1) (sum +. (float m_i)) (bin_i1 :: t)
| _ -> raise Not_found
in
fun histogram b ->
let {center = p_i; count = m_i}, {center = p_i1; count = m_i1 }, sum_i0 =
find_i b 0 0.0 histogram.bins in
let m_i = float m_i in
let m_i1 = float m_i1 in
let bpp = (b -. p_i) /. (p_i1 -. p_i) in
let m_b = m_i +. (m_i1 -. m_i) *. bpp in
let s = (m_i +. m_b) *. bpp /. 2. in
s +. sum_i0 +. m_i /. 2.
let uniform =
let rec loop span cum_sum_at_centers j accu =
let s = (float j) *. span in
match cum_sum_at_centers with
| (cum_sum_0, {center = p_0; count = m_0}) ::
((cum_sum_1, {center = p_1; count = m_1}) as bin_1) :: rest ->
if s < cum_sum_0 then
loop span cum_sum_at_centers (j + 1) accu
else if cum_sum_0 <= s && s < cum_sum_1 then
let d = s -. cum_sum_0 in
let c = -2. *. d in
let b = float (2 * m_0) in
let a = float (m_1 - m_0) in
let z = pos_quadratic_root ~a ~b ~c in
let u = p_0 +. (p_1 -. p_0) *. z in
loop span cum_sum_at_centers (j + 1) ((j, u) :: accu)
else
loop span (bin_1 :: rest) j accu
| [ _ ] -> List.rev accu
| [] -> assert false
in
let cum_sum_at_centers hist =
let bin, hist_rest, cum_sum =
match hist with
| ({count; _} as bin) :: rest -> bin, rest, (float count) /. 2.
| _ -> raise Empty
in
let _, _, cum_sum_at_centers = List.fold_left (
fun (cum_sum, prev_count, cum_sum_at_centers) ({count; _} as bin) ->
let cum_sum = cum_sum +. (float (prev_count + count)) /. 2. in
let cum_sum_at_centers = (cum_sum, bin) :: cum_sum_at_centers in
cum_sum, count, cum_sum_at_centers
) (cum_sum, bin.count, [cum_sum, bin]) hist_rest in
List.rev cum_sum_at_centers
in
fun hist b ->
if b < 1 then
raise (Invalid_argument "uniform")
else
let cum_sum_at_centers = cum_sum_at_centers hist.bins in
let span = (float hist.total_count) /. (float b) in
loop span cum_sum_at_centers 0 []
let mean { bins; total_count; _ } =
if total_count = 0 then
raise Empty
else
let m = List.fold_left (
fun sum { center; count } ->
sum +. center *. (float count)
) 0.0 bins in
m /. (float total_count)
let mean_stdev histogram =
if histogram.total_count = 0 then
raise Empty
else
let mean = mean histogram in
let v = List.fold_left (
fun sum { center; count } ->
let diff = center -. mean in
sum +. diff *. diff *. (float count)
) 0.0 histogram.bins
in
let stdev = sqrt (v /. (float histogram.total_count)) in
mean, stdev