1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
let src = Logs.Src.create "git.pack-info" ~doc:"logs git's pack-info event"
module Log = (val Logs.src_log src : Logs.LOG)
module type S = sig
module Hash : S.HASH
module Inflate : S.INFLATE
module HDec : Unpack.H with module Hash := Hash
module PDec :
Unpack.P
with module Hash := Hash
and module Inflate := Inflate
and module Hunk := HDec
type error =
[ `Unexpected_end_of_input
| `Unexpected_chunk of string
| `PDec of PDec.error ]
val pp_error : error Fmt.t
type delta =
| Unresolved of {hash: Hash.t; length: int}
| Internal of {hash: Hash.t; abs_off: int64; length: int}
| Delta of {hunks_descr: HDec.hunks; inserts: int; depth: int; from: delta}
val needed : delta -> int
val pp_delta : delta Fmt.t
type path = Load of int | Patch of {hunks: int; target: int; src: path}
type 'a t =
{ index: (Hash.t, Checkseum.Crc32.t * int64 * int) Hashtbl.t
; delta: (int64, delta) Hashtbl.t
; hash_pack: Hash.t
; state: 'a }
constraint 'a = [< `Pass | `Normalized of path | `Resolved of path]
val v : Hash.t -> [`Pass] t
val normalize : length:int -> [`Pass] t -> [`Normalized of path] t
val resolve : length:int -> [`Normalized of path] t -> [`Resolved of path] t
val first_pass :
ztmp:Cstruct.t
-> window:Inflate.window
-> ?idx:(Hash.t -> (Checkseum.Crc32.t * int64) option)
-> (unit -> Cstruct.t option Lwt.t)
-> ([`Normalized of path] t, error) result Lwt.t
end
module Make
(Hash : S.HASH)
(Inflate : S.INFLATE)
(HDec : Unpack.H with module Hash := Hash)
(PDec : Unpack.P
with module Hash := Hash
and module Inflate := Inflate
and module Hunk := HDec) =
struct
type error =
[ `Unexpected_end_of_input
| `Unexpected_chunk of string
| `PDec of PDec.error ]
let pp_error ppf = function
| `Unexpected_end_of_input -> Fmt.pf ppf "Unexpected end of PACK stream"
| `Unexpected_chunk chunk ->
Fmt.pf ppf "Unexpected chunk of PACK stream: %a"
(Fmt.hvbox
(Encore.Lole.pp_scalar ~get:String.get ~length:String.length))
chunk
| `PDec err ->
Fmt.pf ppf "Got an error while decoding PACK stream: %a" PDec.pp_error
err
type path = Load of int | Patch of {hunks: int; target: int; src: path}
type delta =
| Unresolved of {hash: Hash.t; length: int}
| Internal of {hash: Hash.t; abs_off: int64; length: int}
| Delta of {hunks_descr: HDec.hunks; inserts: int; depth: int; from: delta}
let rec delta_to_path = function
| Unresolved {length; _} -> Load length
| Internal {length; _} -> Load length
| Delta {hunks_descr; inserts; from; _} ->
let src = delta_to_path from in
Patch {hunks= inserts; target= hunks_descr.HDec.target_length; src}
let rec merge_path a b =
match a, b with
| Load a, Load b -> Load (max a b)
| Load a, Patch {hunks; target; src} | Patch {hunks; target; src}, Load a
->
Patch {hunks; target= max target a; src}
| ( Patch {hunks= hunks_a; target= target_a; src= src_a}
, Patch {hunks= hunks_b; target= target_b; src= src_b} ) ->
Patch
{ hunks= max hunks_a hunks_b
; target= max target_a target_b
; src= merge_path src_a src_b }
let needed t =
let rec go acc = function
| Unresolved {length; _} -> max length acc
| Internal {length; _} -> max length acc
| Delta {hunks_descr; from; _} ->
go (max hunks_descr.HDec.target_length acc) from
in
go 0 t
let rec pp_delta ppf = function
| Unresolved {hash; length} ->
Fmt.pf ppf "(Unresolved@ { @[<hov>hash = %a;@ length = %d;@] })"
Hash.pp hash length
| Internal {hash; abs_off; length} ->
Fmt.pf ppf
"(Internal { @[<hov>hash = %al@ abs_off = %Ld;@ length = %d;@] })"
Hash.pp hash abs_off length
| Delta {hunks_descr; inserts; depth; from} ->
Fmt.pf ppf
"(Delta { @[<hov>hunks_descr = %a;@ inserts = %d;@ depth = %d;@ \
delta = %a;@] })"
HDec.pp_hunks hunks_descr inserts depth pp_delta from
type 'a t =
{ index: (Hash.t, Checkseum.Crc32.t * int64 * int) Hashtbl.t
; delta: (int64, delta) Hashtbl.t
; hash_pack: Hash.t
; state: 'a }
constraint 'a = [< `Pass | `Normalized of path | `Resolved of path]
let v hash_pack =
{ index= Hashtbl.create 128
; delta= Hashtbl.create 128
; hash_pack
; state= `Pass }
let rec merge abs_off path acc =
match path, acc with
| Unresolved {length; _}, Load x | Internal {length; _}, Load x ->
Load (max length x)
| Unresolved {length; _}, Patch {hunks; target; src}
|Internal {length; _}, Patch {hunks; target; src} ->
Patch {hunks; target= max target length; src}
| ( Delta
{ hunks_descr= {HDec.source_length; target_length; _}
; inserts
; from; _ }
, Load x ) ->
let src = merge_path (delta_to_path from) (Load source_length) in
Patch {hunks= inserts; target= max x target_length; src}
| ( Delta
{ hunks_descr= {HDec.reference= HDec.Offset rel_off; target_length; _}
; inserts
; from; _ }
, Patch {hunks; target; src} ) ->
let abs_off = Int64.sub abs_off rel_off in
let src = merge abs_off from src in
Patch {hunks= max hunks inserts; target= max target target_length; src}
| ( Delta
{ hunks_descr= {HDec.reference= HDec.Hash _; target_length; _}
; inserts
; from; _ }
, Patch {hunks; target; src} ) ->
let src = merge 0L from src in
Patch {hunks= max hunks inserts; target= max target target_length; src}
let normalize paths = Hashtbl.fold merge paths (Load 0)
let first_pass ~ztmp ~window ?(idx = fun _hash -> None) stream =
let state = PDec.default ztmp window in
let empty = Cstruct.create 0 in
let index = Hashtbl.create 128 in
let delta = Hashtbl.create 128 in
let chunk state =
let ctx = Hash.init () in
let hdr =
Fmt.strf "%s %d\000"
( match PDec.kind state with
| PDec.Commit -> "commit"
| PDec.Tag -> "tag"
| PDec.Tree -> "tree"
| PDec.Blob -> "blob"
| _ -> assert false )
(PDec.length state)
in
let ctx = Hash.feed_string ctx hdr in
let ctx = Hash.feed_cstruct ctx chunk in
ctx
in
let open Lwt.Infix in
let rec go ?(src = empty) ?ctx ?insert_hunks state =
match PDec.eval src state with
| `Hunk (state, HDec.Insert raw) ->
let insert_hunks =
match insert_hunks with
| Some count -> count + Cstruct.len raw
| None -> Cstruct.len raw
in
go ~src ~insert_hunks (PDec.continue state)
| `Hunk (state, _) -> go ~src ?ctx ?insert_hunks (PDec.continue state)
| `Error (_, err) -> Lwt.return (Error (`PDec err))
| `Flush state ->
let chunk, len = PDec.output state in
let ctx =
match ctx with
| Some ctx -> Hash.feed_cstruct ctx (Cstruct.sub chunk 0 len)
| None -> ctx_with_header (Cstruct.sub chunk 0 len) state
in
go ~src ~ctx (PDec.flush 0 (Cstruct.len chunk) state)
| `Object state ->
let () =
match PDec.kind state with
| PDec.Hunk
({HDec.reference= HDec.Offset rel_off; _} as hunks_descr) ->
let abs_off = Int64.(sub (PDec.offset state) rel_off) in
let inserts =
match insert_hunks with Some x -> x | None -> 0
in
let from =
try Hashtbl.find delta abs_off with Not_found ->
invalid_arg "invalid pack stream"
in
let depth_source =
match from with Delta {depth; _} -> depth | _ -> 0
in
Hashtbl.add delta (PDec.offset state)
(Delta {hunks_descr; inserts; depth= depth_source + 1; from})
| PDec.Hunk
( {HDec.reference= HDec.Hash hash; source_length; _} as
hunks_descr ) ->
let inserts =
match insert_hunks with Some x -> x | None -> 0
in
let from =
try
let _, abs_off =
match idx hash with
| Some x -> x
| None ->
Hashtbl.find index hash
|> fun (crc, abs_off, _) -> crc, abs_off
in
Hashtbl.find delta abs_off
with Not_found -> Unresolved {hash; length= source_length}
in
let depth_source =
match from with Delta {depth; _} -> depth | _ -> 0
in
Hashtbl.add delta (PDec.offset state)
(Delta {hunks_descr; inserts; depth= depth_source + 1; from})
| _ -> (
match ctx with
| Some ctx ->
let hash = Hash.get ctx in
Log.info (fun l ->
l ~header:"first_pass" "Save object %a." Hash.pp hash ) ;
Hashtbl.add index hash
(PDec.crc state, PDec.offset state, PDec.length state) ;
Hashtbl.add delta (PDec.offset state)
(Internal
{ hash
; abs_off= PDec.offset state
; length= PDec.length state })
| None ->
let ctx = ctx_with_header empty state in
let hash = Hash.get ctx in
Log.info (fun l ->
l ~header:"first_pass" "Save object %a." Hash.pp hash ) ;
Hashtbl.add index (Hash.get ctx)
(PDec.crc state, PDec.offset state, PDec.length state) ;
Hashtbl.add delta (PDec.offset state)
(Internal
{ hash
; abs_off= PDec.offset state
; length= PDec.length state }) )
in
go ~src (PDec.next_object state)
| `End (_, hash_pack) -> (
stream ()
>>= function
| Some raw ->
Log.err (fun l ->
l ~header:"first_pass"
"Expected end of pack stream but retrieve: %a."
(Fmt.hvbox
(Encore.Lole.pp_scalar ~get:Cstruct.get_char
~length:Cstruct.len))
raw ) ;
Lwt.return (Error (`Unexpected_chunk (Cstruct.to_string raw)))
| None ->
Log.debug (fun l ->
l ~header:"first_pass" "End of the PACK stream." ) ;
Lwt.return
(Ok
{ index
; delta
; hash_pack
; state= `Normalized (normalize delta) }) )
| `Await state -> (
Log.debug (fun l -> l ~header:"first_pass" "Waiting more input.") ;
stream ()
>>= function
| Some src ->
Log.debug (fun l ->
l ~header:"first_pass"
"Receive a chunk of the PACK stream (length: %d)."
(Cstruct.len src) ) ;
go ~src ?ctx ?insert_hunks
(PDec.refill 0 (Cstruct.len src) state)
| None ->
Log.err (fun l ->
l ~header:"first_pass" "Receive end of the PACK stream." ) ;
Lwt.return (Error `Unexpected_end_of_input) )
in
go state
let resolve ~length t =
if Hashtbl.length t.index = length then
{t with state= `Resolved (normalize t.delta)}
else invalid_arg "promote: invalid argument"
let normalize ~length t =
if Hashtbl.length t.delta = length then
{t with state= `Normalized (normalize t.delta)}
else invalid_arg "promote: invalid argument"
end