1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
type 'a t = 'a Stats_intf.emp
let truncate (type elt) (module O : Basic_intf.Ordered with type t = elt)
(distribution : elt t) (p : float) =
if p <. 0.0 || p >. 1.0 then invalid_arg "truncate" ;
Array.sort O.compare distribution ;
let len = Array.length distribution in
let plen = int_of_float (p *. float len) in
Array.sub distribution 0 plen
let quantile (type elt) (module O : Basic_intf.Ordered with type t = elt)
(distribution : elt t) (p : float) =
if p <. 0.0 || p >. 1.0 then invalid_arg "quantile" ;
Array.sort O.compare distribution ;
let len = Array.length distribution in
let plen = int_of_float (p *. float (len - 1)) in
distribution.(plen)
let map = Array.map
let of_generative ~nsamples (sampler : ('s, 'a) Stats_intf.gen) rng_state =
Array.init nsamples (fun _ -> sampler rng_state)
let of_raw_data array = array
let to_raw_data array = `Empirical array
module type Empirical_statistics = sig
type r
val empirical_mean : r t -> r
val empirical_mean_generic :
(module Basic_intf.Module with type t = 'elt and type R.t = r) ->
'elt t ->
'elt
val empirical_variance : r t -> r
end
module Make_basic_statistics (Reals : Basic_intf.Reals) :
Empirical_statistics with type r = Reals.t = struct
type r = Reals.t
let empirical_mean (distribution : Reals.t t) =
let len = Array.length distribution in
if len = 0 then Reals.zero
else
let ilen = Reals.(one / of_int len) in
let sum = Array.fold_left Reals.( + ) Reals.zero distribution in
Reals.(ilen * sum)
let empirical_mean_generic (type elt)
(module L : Basic_intf.Module with type t = elt and type R.t = Reals.t)
(distribution : elt t) =
let len = Array.length distribution in
if len = 0 then L.zero
else
let ilen = Reals.(one / of_int (Array.length distribution)) in
let sum = Array.fold_left L.add L.zero distribution in
L.smul ilen sum
let empirical_variance (distribution : Reals.t t) =
let len = Array.length distribution in
if len = 0 then Reals.zero
else
let mean = empirical_mean distribution in
let ilen = Reals.(one / of_int len) in
let sum =
Array.fold_left
(fun acc elt -> Reals.(acc + npow (elt - mean) 2))
Reals.zero
distribution
in
Reals.(ilen * sum)
end
module Float = Make_basic_statistics (Basic_impl.Reals.Float)
module Rational = Make_basic_statistics (Basic_impl.Reals.Rational)
let remove_outliers ~nsigmas (dist : float t) =
if nsigmas <. 0.0 then invalid_arg "remove_outliers" ;
let mean = Float.empirical_mean dist in
let var = Float.empirical_variance dist in
let std = sqrt var in
let delta = std *. nsigmas in
dist |> Array.to_list
|> List.filter (fun x ->
Basic_impl.Reals.Float.(abs_float (x -. mean) <= delta))
|> Array.of_list