Source file loose.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
(*
 * Copyright (c) 2013-2017 Thomas Gazagnaire <thomas@gazagnaire.org>
 * and Romain Calascibetta <romain.calascibetta@gmail.com>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 *)

open Lwt.Infix

let src = Logs.Src.create "git.loose" ~doc:"logs git's loose event"

module Log = (val Logs.src_log src : Logs.LOG)

module type S = sig
  module FS : S.FS
  module Hash : S.HASH
  module Deflate : S.DEFLATE
  module Inflate : S.INFLATE

  module Value :
    Value.S
    with module Hash := Hash
     and module Deflate := Deflate
     and module Inflate := Inflate

  include module type of Value

  type error =
    [ Error.Decoder.t
    | Inflate.error Error.Inf.t
    | Deflate.error Error.Def.t
    | FS.error Error.FS.t ]

  type kind = [`Commit | `Tree | `Tag | `Blob]

  val pp_error : error Fmt.t
  val mem : fs:FS.t -> root:Fpath.t -> Hash.t -> bool Lwt.t

  (* Read -> Inflate -> Decode *)

  val read :
       fs:FS.t
    -> root:Fpath.t
    -> window:Inflate.window
    -> ztmp:Cstruct.t
    -> dtmp:Cstruct.t
    -> raw:Cstruct.t
    -> Hash.t
    -> (t, error) result Lwt.t

  (* Read -> Inflate *)

  val read_inflated :
       fs:FS.t
    -> root:Fpath.t
    -> window:Inflate.window
    -> ztmp:Cstruct.t
    -> dtmp:Cstruct.t
    -> raw:Cstruct.t
    -> Hash.t
    -> (kind * Cstruct.t, error) result Lwt.t

  (* Read -> Inflate *)

  val read_inflated_without_allocation :
       fs:FS.t
    -> root:Fpath.t
    -> window:Inflate.window
    -> ztmp:Cstruct.t
    -> dtmp:Cstruct.t
    -> raw:Cstruct.t
    -> result:Cstruct.t
    -> Hash.t
    -> (kind * Cstruct.t, error) result Lwt.t

  val list : fs:FS.t -> root:Fpath.t -> Hash.t list Lwt.t

  val size :
       fs:FS.t
    -> root:Fpath.t
    -> window:Inflate.window
    -> ztmp:Cstruct.t
    -> dtmp:Cstruct.t
    -> raw:Cstruct.t
    -> Hash.t
    -> (int64, error) result Lwt.t

  (* Encode -> Deflate -> Write *)

  val write :
       fs:FS.t
    -> root:Fpath.t
    -> temp_dir:Fpath.t
    -> etmp:Cstruct.t
    -> ?level:int
    -> ztmp:Cstruct.t
    -> raw:Cstruct.t
    -> t
    -> (Hash.t * int, error) result Lwt.t

  (* Deflate -> Write *)

  val write_deflated :
       fs:FS.t
    -> root:Fpath.t
    -> temp_dir:Fpath.t
    -> ?level:int
    -> raw:Cstruct.t
    -> kind:kind
    -> Cstruct.t
    -> (Hash.t, error) result Lwt.t
end

module Make
    (Hash : S.HASH)
    (FS : S.FS)
    (Inflate : S.INFLATE)
    (Deflate : S.DEFLATE) =
struct
  module FS = Helper.FS (FS)
  module Value = Value.Make (Hash) (Inflate) (Deflate)
  include Value

  type inf_error = Inflate.error Error.Inf.t
  type def_error = Deflate.error Error.Def.t
  type fs_error = FS.error Error.FS.t
  type error = [fs_error | Error.Decoder.t | inf_error | def_error]
  type kind = [`Commit | `Tree | `Tag | `Blob]

  let pp_error ppf = function
    | #Error.Decoder.t as err -> Error.Decoder.pp_error ppf err
    | #inf_error as err -> Error.Inf.pp_error Inflate.pp_error ppf err
    | #def_error as err -> Error.Def.pp_error Deflate.pp_error ppf err
    | #fs_error as err -> Error.FS.pp_error FS.pp_error ppf err

  let explode hash =
    ( Fmt.strf "%02x" (Hash.read hash 0)
    , let buf = Buffer.create ((Hash.digest_size - 1) * 2) in
      let ppf = Fmt.with_buffer buf in
      for i = 1 to Hash.digest_size - 1 do
        Fmt.pf ppf "%02x%!" (Hash.read hash i)
      done ;
      Buffer.contents buf )

  let mem ~fs ~root:dotgit hash =
    let first, rest = explode hash in
    let path = Fpath.(dotgit / "objects" / first / rest) in
    Log.debug (fun l ->
        l "Checking if the object %a is a loose file (%a)." Hash.pp hash
          Fpath.pp path ) ;
    FS.File.exists fs path >|= function Ok v -> v | Error _ -> false

  (* XXX(dinosaure): make this function more resilient: if [of_hex] fails),
     avoid the path. *)
  let list ~fs ~root:dotgit =
    let path = Fpath.(dotgit / "objects") in
    FS.Dir.contents fs ~rel:true path
    >>= function
    | Error err ->
        Log.err (fun l ->
            l "Got an error while listing the contents of %a: %a" Fpath.pp path
              FS.pp_error err ) ;
        Lwt.return []
    | Ok firsts ->
        Lwt_list.fold_left_s
          (fun acc first ->
            FS.Dir.contents fs ~rel:true Fpath.(path // first)
            >|= function
            | Ok paths ->
                List.fold_left
                  (fun acc path ->
                    let hash = Fpath.(to_string first ^ to_string path) in
                    (* XXX(dinosaure): we should not use [fpath] here but
                       something else. Indeed, [Fpath.to_string] is platform
                       dependent. But in this case, it should be fine when
                       [first] is one segment. *)
                    try Hash.of_hex hash :: acc with _e ->
                      (* XXX(samoht): avoid catch-all *)
                      Log.warn (fun l ->
                          l "Retrieving a malformed file: %s / %s."
                            (Fpath.to_string first) (Fpath.to_string path) ) ;
                      acc )
                  acc paths
            | Error _ -> acc )
          [] firsts

  type ('result, 'decoder) decoder =
    (module
     Helper.DECODER
       with type t = 'result
        and type decoder = 'decoder
        and type error = [Error.Decoder.t | `Inflate of Inflate.error])

  let gen (type state result) ~fs ~root:dotgit (state : state) ~raw
      (decoder : (result, state) decoder) hash =
    let module D = (val decoder) in
    let module Decoder = Helper.Decoder (D) (FS) in
    let first, rest = explode hash in
    let file = Fpath.(dotgit / "objects" / first / rest) in
    Log.debug (fun l -> l "Reading the loose object %a." Fpath.pp file) ;
    Decoder.of_file fs file raw state
    >|= function
    | Ok _ as v -> v
    | Error (`Decoder (#Error.Decoder.t as err)) ->
        Error.(v @@ Error.Decoder.with_path file err)
    | Error (`Decoder (`Inflate _ as err)) ->
        Error.(v @@ Inf.with_path file err)
    | Error #fs_error as err -> err

  let read ~fs ~root ~window ~ztmp ~dtmp ~raw hash =
    let state = D.default (window, ztmp, dtmp) in
    gen ~fs ~root state ~raw (module D) hash

  module HeaderAndBody = struct
    type e = [`Commit | `Blob | `Tag | `Tree] * Cstruct.t
    type 'a t = 'a Angstrom.t

    let kind = Value.A.kind
    let int64 = Value.A.length

    let to_end cs =
      let open Angstrom in
      let pos = ref 0 in
      fix
      @@ fun m ->
      available
      >>= function
      | 0 -> (
          peek_char
          >>= function
          | Some _ -> m | None -> commit *> return (Cstruct.sub cs 0 !pos) )
      | n ->
          take n
          >>= fun chunk ->
          (* XXX(dinosaure): this code [blit] only what is possible to copy to
             [cs]. It can be happen than we don't store all of the git object
             in [cs] but in specific context (when we want to decode a source
             of a delta-ification), this is what we want, store only what is
             needed and limit the memory consumption.

             This code is close to the [~result] argument of [decoder] and, in
             fact, if we don't want to store the git object in a specific user
             defined buffer, we ensure to allocate what is needed to store all
             of the git object. *)
          let n' = min n (Cstruct.len cs - !pos) in
          Cstruct.blit_from_string chunk 0 cs !pos n' ;
          pos := !pos + n ;
          commit *> if n = 0 then return cs else m

    let sp = ' '
    let nl = '\000'

    let decoder ~result =
      let open Angstrom in
      kind
      <* char sp
      >>= fun kind ->
      int64
      <* char nl
      <* commit
      >>= fun length ->
      ( match result with
      | Some result -> to_end result
      | None -> to_end (Cstruct.create (Int64.to_int length)) )
      >>| fun cs -> kind, cs
  end

  module I =
    Helper.MakeInflater
      (Inflate)
      (struct
        include HeaderAndBody

        let p = decoder ~result:None
      end)

  let read_inflated ~fs ~root ~window ~ztmp ~dtmp ~raw hash =
    let state = I.default (window, ztmp, dtmp) in
    gen ~fs ~root state ~raw (module I) hash

  let read_inflated_without_allocation ~fs ~root ~window ~ztmp ~dtmp ~raw
      ~result hash =
    let module P =
      Helper.MakeInflater
        (Inflate)
        (struct
          include HeaderAndBody

          let p = decoder ~result:(Some result)
        end)
    in
    let state = P.default (window, ztmp, dtmp) in
    gen ~fs ~root state ~raw (module P) hash

  module HeaderOnly = struct
    type e = [`Commit | `Blob | `Tag | `Tree] * int64
    type 'a t = 'a Angstrom.t

    let kind = HeaderAndBody.kind
    let int64 = HeaderAndBody.int64

    let p =
      let open Angstrom in
      kind
      <* take 1
      >>= fun kind -> int64 <* advance 1 >>| fun length -> kind, length
  end

  module S = Helper.MakeInflater (Inflate) (HeaderOnly)

  let size ~fs ~root ~window ~ztmp ~dtmp ~raw hash =
    let state = S.default (window, ztmp, dtmp) in
    gen ~fs ~root state ~raw (module S) hash
    >|= function Ok (_, v) -> Ok v | Error _ as err -> err

  module EDeflated = struct
    module E = struct
      type state = {state: Deflate.t; v: Cstruct.t}
      type result = unit
      type error = Deflate.error

      let rec eval raw {state; v} =
        match Deflate.eval ~src:v ~dst:raw state with
        | `Await state -> eval raw {v; state= Deflate.finish state}
        | `Flush state -> Lwt.return (`Flush {v; state})
        | `Error (state, error) -> Lwt.return (`Error ({v; state}, error))
        | `End state -> Lwt.return (`End ({v; state}, ()))

      let used t = Deflate.used_out t.state
      let flush x y {v; state} = {v; state= Deflate.flush x y state}
    end

    include Helper.Encoder (E) (FS)
  end

  let write_deflated ~fs ~root:dotgit ~temp_dir ?(level = 4) ~raw ~kind value =
    let header =
      Fmt.kstrf Cstruct.of_string "%s %d\000%!"
        ( match kind with
        | `Commit -> "commit"
        | `Blob -> "blob"
        | `Tree -> "tree"
        | `Tag -> "tag" )
        (Cstruct.len value)
    in
    let digest value' =
      let ctx = Hash.init () in
      let ctx = Hash.feed_bigstring ctx (Cstruct.to_bigarray value') in
      Hash.get ctx
    in
    let value' = Cstruct.concat [header; value] in
    let state =
      { EDeflated.E.v= value'
      ; state= Deflate.no_flush 0 (Cstruct.len value') (Deflate.default level)
      }
    in
    let hash = digest value' in
    let first, rest = explode hash in
    let path = Fpath.(dotgit / "objects" / first) in
    FS.Dir.create fs path
    >>= function
    | Error err -> Lwt.return Error.(v @@ FS.err_create path err)
    | Ok (true | false) -> (
        let path = Fpath.(path / rest) in
        EDeflated.to_file fs ~temp_dir path raw state
        >|= function
        | Ok () -> Ok hash
        | Error #fs_error as err -> err
        | Error (`Encoder err) -> Error.(v @@ Def.err_deflate_file path err) )

  module EInflated = struct
    module E = struct
      type state = E.encoder
      type result = int
      type error = E.error

      let used = E.used
      let flush = E.flush

      let eval raw state =
        match E.eval raw state with
        | `Flush state -> Lwt.return (`Flush state)
        | `Error error -> Lwt.return (`Error (state, error))
        | `End state -> Lwt.return (`End state)
    end

    include Helper.Encoder (E) (FS)
  end

  let write ~fs ~root:dotgit ~temp_dir ~etmp ?(level = 4) ~ztmp
      ~raw value =
    let hash = digest value in
    let first, rest = explode hash in
    let encoder = E.default (etmp, value, level, ztmp) in
    let path = Fpath.(dotgit / "objects" / first / rest) in
    Log.debug (fun l -> l "Writing a new loose object %a." Fpath.pp path) ;
    FS.Dir.create fs Fpath.(dotgit / "objects" / first)
    >>= function
    | Error err -> Lwt.return Error.(v @@ FS.err_create path err)
    | Ok (true | false) -> (
        EInflated.to_file fs ~temp_dir path raw encoder
        >|= function
        | Error #fs_error as err -> err
        | Error (`Encoder err) -> Error.(v @@ Def.with_path path err)
        | Ok r ->
            Log.debug (fun l -> l "Wrote the object %s/%s" first rest) ;
            Ok (hash, r) )
end