1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
open! Import
open Snapshot_intf
let rm_index path =
let path_index = Filename.concat path "index" in
Sys.readdir path_index
|> Array.iter (fun name -> Unix.unlink (Filename.concat path_index name));
Unix.rmdir path_index;
Unix.rmdir path
module Make (Args : Args) = struct
module Hashes = Irmin.Hash.Set.Make (Args.Hash)
open Args
module Inode_pack = Inode.Pack
module Pack_index = Pack_index.Make (Hash)
let pp_hash = Irmin.Type.pp Hash.t
let pp_key = Irmin.Type.pp Inode_pack.Key.t
let pp_kind = Irmin.Type.pp Pack_value.Kind.t
let pp_snapshot = Irmin.Type.pp Inode.Snapshot.inode_t
module Export = struct
module Value_unit = struct
type t = unit [@@deriving irmin]
let encode _ = ""
let encoded_size = 0
let decode _ _ = ()
end
module Index =
Index_unix.Make (Pack_index.Key) (Value_unit) (Index.Cache.Unbounded)
type t = {
fm : Fm.t;
dispatcher : Dispatcher.t;
log_size : int;
inode_pack : read Inode_pack.t;
contents_pack : read Contents_pack.t;
}
let v config contents_pack inode_pack =
let fm = Fm.open_ro config |> Fm.Errs.raise_if_error in
let dispatcher = Dispatcher.v fm |> Fm.Errs.raise_if_error in
let log_size = Conf.index_log_size config in
{ fm; dispatcher; log_size; inode_pack; contents_pack }
let close t = Fm.close t.fm
let key_of_hash hash t =
Inode_pack.index_direct_with_kind t hash |> Option.get
let length_of_hash hash t =
let key, _ = key_of_hash hash t in
match Pack_key.inspect key with
| Indexed _ ->
assert false
| Direct { length; _ } -> length
let io_read_and_decode_entry_prefix ~off t =
let entry_prefix : Inode_pack.Entry_prefix.t =
Inode_pack.read_and_decode_entry_prefix ~off t.dispatcher
in
let length =
match Inode_pack.Entry_prefix.total_entry_length entry_prefix with
| Some length -> length
| None ->
length_of_hash entry_prefix.hash t.inode_pack
in
let key = Pack_key.v_direct ~hash:entry_prefix.hash ~offset:off ~length in
(key, entry_prefix.kind)
let decode_children_offsets ~off ~len t =
let buf = Bytes.create len in
let accessor = Dispatcher.create_accessor_exn t.dispatcher ~off ~len in
Dispatcher.read_exn t.dispatcher accessor buf;
let entry_of_offset offset =
[%log.debug "key_of_offset: %a" Int63.pp offset];
io_read_and_decode_entry_prefix ~off:offset t
in
let entry_of_hash hash = key_of_hash hash t.inode_pack in
Inode.Raw.decode_children_offsets ~entry_of_offset ~entry_of_hash
(Bytes.unsafe_to_string buf)
(ref 0)
type visit = { visited : Hash.t -> bool; set_visit : Hash.t -> unit }
let iter t v f_contents f_inodes (root_key, root_kind) =
let total_visited = ref 0 in
let set_visit h =
incr total_visited;
v.set_visit h
in
let rec aux (key, kind) =
match Pack_key.inspect key with
| Indexed _ ->
assert false
| Direct { length; offset; hash } ->
if v.visited hash then Lwt.return_unit
else (
set_visit hash;
[%log.debug "visit hash: %a, %a" pp_hash hash pp_kind kind];
match kind with
| Contents -> (
let value =
Contents_pack.unsafe_find ~check_integrity:false
t.contents_pack key
in
match value with
| None ->
Fmt.failwith "contents not found in store. Key: %a "
pp_key key
| Some value ->
let snapshot_blob = value in
f_contents snapshot_blob)
| Inode_v1_unstable | Inode_v1_stable | Inode_v2_root
| Inode_v2_nonroot -> (
let children =
decode_children_offsets ~off:offset ~len:length t
in
let* () = Lwt_list.iter_s (fun key -> aux key) children in
let value =
Inode_pack.unsafe_find ~check_integrity:false t.inode_pack
key
in
match value with
| None ->
Fmt.failwith "node not found in store. Key: %a " pp_key
key
| Some value ->
let snapshot_inode = Inode.to_snapshot value in
[%log.debug
"iter inode snapshot: %a" pp_snapshot snapshot_inode];
f_inodes snapshot_inode)
| Commit_v1 | Commit_v2 ->
assert false
| Dangling_parent_commit -> assert false)
in
let root_key =
match Pack_key.inspect root_key with
| Indexed hash -> key_of_hash hash t.inode_pack |> fst
| Direct _ -> root_key
in
let* () = aux (root_key, root_kind) in
Lwt.return !total_visited
let run_in_memory t f_contents f_inodes root_key =
[%log.info "iter in memory"];
let visited_hash = Hashes.create ~initial_slots:100_000 () in
let visited h = Hashes.mem visited_hash h in
let set_visit h =
match Hashes.add visited_hash h with
| `Duplicate ->
Fmt.failwith "should not visit hash twice. Hash: %a " pp_hash h
| `Ok -> ()
in
iter t { visited; set_visit } f_contents f_inodes root_key
let run_on_disk path t f_contents f_inodes root_key =
[%log.info "iter on disk"];
let index =
Index.v ~fresh:true ~readonly:false ~log_size:t.log_size path
in
let visited h = Index.mem index h in
let set_visit h =
if visited h then
Fmt.failwith "Should not visit hash twice. Hash: %a " pp_hash h
else Index.replace index h ()
in
let* total = iter t { visited; set_visit } f_contents f_inodes root_key in
Index.close index;
rm_index path;
Lwt.return total
let run ?on_disk =
match on_disk with
| None -> run_in_memory
| Some (`Path path) -> run_on_disk path
end
module Import = struct
module Value = struct
type t = int63 * int [@@deriving irmin]
let encoded_size = (64 / 8) + (32 / 8)
let encode ((off, len) : t) =
let buf = Bytes.create encoded_size in
Bytes.set_int64_be buf 0 (Int63.to_int64 off);
Bytes.set_int32_be buf 8 (Int32.of_int len);
Bytes.unsafe_to_string buf
let decode s pos : t =
let buf = Bytes.unsafe_of_string s in
let off = Bytes.get_int64_be buf pos |> Int63.of_int64 in
let len = Bytes.get_int32_be buf (pos + 8) |> Int32.to_int in
(off, len)
end
module Index =
Index_unix.Make (Pack_index.Key) (Value) (Index.Cache.Unbounded)
type path = string
type t = {
inode_pack : read Inode_pack.t;
contents_pack : read Contents_pack.t;
visited : Hash.t -> Hash.t Pack_key.t;
set_visit : Hash.t -> Hash.t Pack_key.t -> unit;
index : (path * Index.t) option;
}
let save_contents t b : Hash.t Pack_key.t Lwt.t =
let* key =
Contents_pack.batch t.contents_pack (fun writer ->
Contents_pack.add writer b)
in
let hash = Inode.Key.to_hash key in
t.set_visit hash key;
Lwt.return key
let save_inodes t i : Hash.t Pack_key.t Lwt.t =
let inode = Inode.of_snapshot t.inode_pack ~index:t.visited i in
let key = Inode.save ~allow_non_root:true t.inode_pack inode in
let hash = Inode.Key.to_hash key in
t.set_visit hash key;
Lwt.return key
let hash_not_found h =
Fmt.failwith
"You are trying to save to the backend an inode that contains pointers \
to objects unknown to the backend. Hash: %a"
pp_hash h
let save_reuse_index inodes =
[%log.info "save reuse index "];
let set_visit _ _ = () in
let visited h =
match Inode_pack.index_direct inodes h with
| Some x -> x
| None -> hash_not_found h
in
(set_visit, visited, None)
let save_in_memory () =
[%log.info "save in memory"];
let tbl : (Hash.t, Hash.t Pack_key.t) Hashtbl.t = Hashtbl.create 10 in
let set_visit h k = Hashtbl.add tbl h k in
let visited h =
match Hashtbl.find_opt tbl h with
| Some x -> x
| None -> hash_not_found h
in
(set_visit, visited, None)
let save_on_disk log_size path =
let path = path ^ "_tmp" in
[%log.info "save on disk: %s" path];
let index = Index.v ~fresh:true ~readonly:false ~log_size path in
let set_visit h k =
let offset, length =
match Pack_key.inspect k with
| Direct { offset; length; _ } -> (offset, length)
| Indexed _ ->
assert false
in
Index.replace index h (offset, length)
in
let visited h =
try
let offset, length = Index.find index h in
let key = Pack_key.v_direct ~hash:h ~offset ~length in
key
with Not_found -> hash_not_found h
in
(set_visit, visited, Some (path, index))
let v ?on_disk log_size contents_pack inode_pack =
let set_visit, visited, index =
match on_disk with
| None -> save_in_memory ()
| Some (`Path path) -> save_on_disk log_size path
| Some `Reuse -> save_reuse_index inode_pack
in
{ inode_pack; contents_pack; visited; set_visit; index }
let close t =
Option.iter
(fun (path, index) ->
Index.close index;
rm_index path)
t.index
end
end