Source: fstats.ml (p.core_profiler.v0.15.0.doc.src.core

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135(** This module is basically copied straight from [Jane.Rstats], however: - [decay] removed (to avoid Option.value branch in [update_in_place]) - [update_in_place] optimised so that it doesn't allocate (see below) This copy can be killed when the original is available publicly. *) open Core open Poly type t = { (* Note: we keep samples as a float instead of an int so that all floats in the record are kept unboxed. *) mutable samples : float; (* sum of sample^0 *) mutable sum: float; (* sum of sample^1 *) mutable sqsum: float; (* sum of sample^2 *) (* The naive algorithm of (sqsum - sum^2) exhibits catastrophic cancellation, so we use an alternative algorithm that keeps a running total of the variance and gets much better numerical precision. See "Weighted incremental algorithm" and "Parallel algorithm" at http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance *) mutable varsum : float; (* sum of running_variance *) mutable max : float; (* largest sample *) mutable min : float; (* smallest sample *) } let create () = { sum = 0. ; sqsum = 0. ; varsum = 0. ; samples = 0. ; max = Float.neg_infinity ; min = Float.infinity } let samples t = Float.to_int t.samples let total t = t.sum let min t = t.min let max t = t.max let mean t = t.sum /. t.samples let var t = if t.samples <= 1. then Float.nan else Float.max 0. ((t.varsum /. t.samples) *. (t.samples /. (t.samples -. 1.))) let stdev t = sqrt (var t) (* we need [update_in_place] to be inlined in order to eliminate the float allocation at the callsites where we have an int to hand instead of a float. *) let[@inline] update_in_place t value = if t.samples <= 0. then begin (* [Rstats.safe_mean] allocates, even after it's been inlined. It seems that in general, expressions of form [if expr then some_float else other_float] cause allocation, even in cases where it is unnecessary. So, I handle the [t.samples <= 0] separately, so that [safe_mean] is not needed. *) t.samples <- 1.; t.sum <- value; t.sqsum <- value *. value; t.max <- value; t.min <- value; t.varsum <- 0. end else begin (* [Option.value decay ~default_decay:...] suffers the same problem. *) let old_mean = mean t in t.samples <- t.samples +. 1.; t.sum <- t.sum +. value; t.sqsum <- t.sqsum +. value *. value; (* [Float.max] suffers the same problem as above and causes allocation even after it's been inlined. *) if value > t.max then t.max <- value; if value < t.min then t.min <- value; let new_mean = mean t in (* Numerically stable method for computing variance. On wikipedia page: # Alternatively, "M2 = M2 + weight * delta * (x−mean)" *) t.varsum <- t.varsum +. (value -. old_mean) *. (value -. new_mean) end let copy t = { t with sum = t.sum } let%test_module "Fstats" = (module struct let data = [|198215.02492520443; 28715.0284862834887; 434619.094190333097; 800678.330169520807 ; 200186.54372400351; 137503.258498826239; 566498.60534151; 549219.475914424169 ; 780230.679805230233; 712168.552241884521; 512045.175157501479 ; 606136.851468109642; 368469.614224194782; 213372.100528741139 ; 487759.722525204881; 545327.353652161313; 565759.781024767901 ; 227130.713477647136; 14526.9831253076572; 87168.3680568782729 ; 317822.864412072755; 328746.783061697963; 446049.964182617899 ; 451270.307992378599; 822506.373272555647; 947812.349198815064 ; 563960.680863914196; 73057.735605084; 475515.868111219897; 79103.4644861585693 ; 61060.2804668050376; 821842.058883985155; 162383.377334053017 ; 151034.116153264389; 357173.747924180352; 417551.514353964303 ; 758440.286012416356; 480593.8769512953; 109763.567296876703 ; 154961.955787061859; 50902.5130336880611; 273048.455977052858 ; 673477.375928052119; 790059.438551203464; 817997.314074333408 ; 280563.866073483776; 858501.471649471; 908670.036968784756; 843433.873243822 ; 717604.357264731894; 257166.21131005112; 587352.255237122881; 679376.01970596856 ; 93196.2210949568544; 343319.788271304453; 757660.644341278588 ; 403271.576879935688; 974099.221967302146; 964390.741413959884 ; 807222.013931629714; 670868.156459537; 656612.853921575472; 545398.269980843412|] let stats = create () let () = Array.iter data ~f:(update_in_place stats) let%test "samples" = samples stats = 63 let%test "min" = min stats = 14526.9831253076572 let%test "max" = max stats = 974099.221967302146 let%test "total" = total stats = 29970575.106168244 let%test "mean" = mean stats = 475723.414383622934 let%test "var" = var stats = 78826737609.7966156 let%test "stdev" = stdev stats = 280760.997308736958 let%test_unit "copy" = let stats2 = copy stats in update_in_place stats2 0.; [%test_eq: int] (samples stats) 63; [%test_eq: int] (samples stats2) 64 end) let%bench_module "Fstats" = (module struct let stats = create () let%bench "update_in_place" = update_in_place stats 5. end)