1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
open! Import
include Dict_intf
module Make (Io : Io.S) = struct
module Io = Io
module Errs = Io_errors.Make (Io)
module Ao = Append_only_file.Make (Io) (Errs)
type t = {
capacity : int;
cache : (string, int) Hashtbl.t;
index : (int, string) Hashtbl.t;
ao : Ao.t;
mutable last_refill_offset : int63;
}
let empty_buffer t = Ao.empty_buffer t.ao
type nonrec int32 = int32 [@@deriving irmin ~to_bin_string ~decode_bin]
let append_string t v =
let len = Int32.of_int (String.length v) in
let buf = int32_to_bin_string len ^ v in
Ao.append_exn t.ao buf
let refill t =
let open Result_syntax in
let from = t.last_refill_offset in
let new_size = Ao.end_poff t.ao in
let len = Int63.to_int Int63.Syntax.(new_size - from) in
t.last_refill_offset <- new_size;
let+ raw = Ao.read_to_string t.ao ~off:from ~len in
let pos_ref = ref 0 in
let rec aux n =
if !pos_ref >= len then ()
else
let v = decode_bin_int32 raw pos_ref in
let len = Int32.to_int v in
let v = String.sub raw !pos_ref len in
pos_ref := !pos_ref + len;
Hashtbl.add t.cache v n;
Hashtbl.add t.index n v;
(aux [@tailcall]) (n + 1)
in
(aux [@tailcall]) (Hashtbl.length t.cache)
let refresh_end_poff t new_end_poff =
let open Result_syntax in
let* () = Ao.refresh_end_poff t.ao new_end_poff in
refill t
let index t v =
[%log.debug "[dict] index %S" v];
try Some (Hashtbl.find t.cache v)
with Not_found ->
let id = Hashtbl.length t.cache in
if id > t.capacity then None
else (
append_string t v;
Hashtbl.add t.cache v id;
Hashtbl.add t.index id v;
Some id)
let find t id =
[%log.debug "[dict] find %d" id];
let v = try Some (Hashtbl.find t.index id) with Not_found -> None in
v
let default_capacity = 100_000
let v_empty ao =
let cache = Hashtbl.create 997 in
let index = Hashtbl.create 997 in
let last_refill_offset = Int63.zero in
{ capacity = default_capacity; index; cache; ao; last_refill_offset }
let create_rw ~overwrite ~path:filename =
let open Result_syntax in
let* ao = Ao.create_rw ~overwrite ~path:filename in
Ok (v_empty ao)
let v_filled ao =
let open Result_syntax in
let t = v_empty ao in
let* () = refill t in
Ok t
let open_rw ~size ~ filename =
let open Result_syntax in
let* ao = Ao.open_rw ~path:filename ~end_poff:size ~dead_header_size in
v_filled ao
let open_ro ~size ~ filename =
let open Result_syntax in
let* ao = Ao.open_ro ~path:filename ~end_poff:size ~dead_header_size in
v_filled ao
let end_poff t = Ao.end_poff t.ao
let flush t = Ao.flush t.ao
let fsync t = Ao.fsync t.ao
let close t = Ao.close t.ao
end