1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
include Dict_intf
open! Import
module Make (V : Version.S) (IO : IO.S) : S = struct
type t = {
capacity : int;
cache : (string, int) Hashtbl.t;
index : (int, string) Hashtbl.t;
mutable io : IO.t;
mutable open_instances : int;
}
let int32_to_bin = Irmin.Type.(unstage (to_bin_string int32))
let decode_int32 = Irmin.Type.(unstage (decode_bin int32))
let append_string t v =
let len = Int32.of_int (String.length v) in
let buf = int32_to_bin len ^ v in
IO.append t.io buf
let refill ~from t =
let len = Int63.to_int (IO.offset t.io -- from) in
let raw = Bytes.create len in
let n = IO.read t.io ~off:from raw in
assert (n = len);
let raw = Bytes.unsafe_to_string raw in
let rec aux n offset =
if offset >= len then ()
else
let _, v = decode_int32 raw offset in
let len = Int32.to_int v in
let v = String.sub raw (offset + 4) len in
Hashtbl.add t.cache v n;
Hashtbl.add t.index n v;
(aux [@tailcall]) (n + 1) (offset + 4 + len)
in
(aux [@tailcall]) (Hashtbl.length t.cache) 0
let sync_offset t =
let former_offset = IO.offset t.io in
let former_generation = IO.generation t.io in
let h = IO.force_headers t.io in
if former_generation <> h.generation then (
IO.close t.io;
let io =
IO.v ~fresh:false ~readonly:true ~version:(Some V.version)
(IO.name t.io)
in
t.io <- io;
Hashtbl.clear t.cache;
Hashtbl.clear t.index;
refill ~from:Int63.zero t)
else if h.offset > former_offset then refill ~from:former_offset t
let sync t =
if IO.readonly t.io then sync_offset t
else invalid_arg "only a readonly instance should call this function"
let flush t = IO.flush t.io
let index t v =
Log.debug (fun l -> l "[dict] index %S" v);
try Some (Hashtbl.find t.cache v)
with Not_found ->
let id = Hashtbl.length t.cache in
if id > t.capacity then None
else (
if IO.readonly t.io then raise S.RO_not_allowed;
append_string t v;
Hashtbl.add t.cache v id;
Hashtbl.add t.index id v;
Some id)
let find t id =
Log.debug (fun l -> l "[dict] find %d" id);
let v = try Some (Hashtbl.find t.index id) with Not_found -> None in
v
let clear t =
match V.version with
| `V1 -> IO.truncate t.io
| `V2 ->
IO.clear t.io;
Hashtbl.clear t.cache;
Hashtbl.clear t.index
let v ?(fresh = true) ?(readonly = false) ?(capacity = 100_000) file =
let io = IO.v ~fresh ~version:(Some V.version) ~readonly file in
let cache = Hashtbl.create 997 in
let index = Hashtbl.create 997 in
let t = { capacity; index; cache; io; open_instances = 1 } in
refill ~from:Int63.zero t;
t
let close t =
t.open_instances <- t.open_instances - 1;
if t.open_instances = 0 then (
if not (IO.readonly t.io) then flush t;
IO.close t.io;
Hashtbl.reset t.cache;
Hashtbl.reset t.index)
let valid t =
if t.open_instances <> 0 then (
t.open_instances <- t.open_instances + 1;
true)
else false
end