Source file get.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
(*********************************************************************************)
(*                Statocaml                                                      *)
(*                                                                               *)
(*    Copyright (C) 2025 INRIA All rights reserved.                              *)
(*    Author: Maxence Guesdon (INRIA Saclay)                                     *)
(*      with Gabriel Scherer (INRIA Paris) and Florian Angeletti (INRIA Paris)   *)
(*                                                                               *)
(*    This program is free software; you can redistribute it and/or modify       *)
(*    it under the terms of the GNU General Public License as                    *)
(*    published by the Free Software Foundation, version 3 of the License.       *)
(*                                                                               *)
(*    This program is distributed in the hope that it will be useful,            *)
(*    but WITHOUT ANY WARRANTY; without even the implied warranty of             *)
(*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the               *)
(*    GNU General Public License for more details.                               *)
(*                                                                               *)
(*    You should have received a copy of the GNU General Public                  *)
(*    License along with this program; if not, write to the Free Software        *)
(*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA                   *)
(*    02111-1307  USA                                                            *)
(*                                                                               *)
(*    As a special exception, you have permission to link this program           *)
(*    with the OCaml compiler and distribute executables, as long as you         *)
(*    follow the requirements of the GNU GPL in regard to all of the             *)
(*    software in the executable aside from the OCaml compiler.                  *)
(*                                                                               *)
(*    Contact: Maxence.Guesdon@inria.fr                                          *)
(*                                                                               *)
(*********************************************************************************)

(** *)

module Log = Statocaml.Log
module W = Ocf.Wrapper

module type Http_t = sig
    val conf : Conf.t
    val user : string
    val repo : string
    val cache_policy_ref : Cache.policy ref
    val token : string
    include Ldp.Http.Http_ct with type t = Yojson.Safe.t
  end

let mk_http conf =
  let policy_ref, cache =
      let (policy_ref, cache) = Cache.mk_cache conf.Conf.cache_dir in
      let module C = Ldp.Http.Make_cache (val cache) in
      policy_ref, (module C:Ldp.Http.Cache)
  in
  let%lwt h = Ldp_tls.make
    ~cache_impl:cache
      ~dbg:(fun s -> Statocaml.Log.debug (fun m -> m "%s" s); Lwt.return_unit)
      ()
  in
  let%lwt token =
    let str = conf.token in
    let len = String.length str in
    if len > 0 then
      match String.get str 0 with
      | '$' ->
          let var = String.sub str 1 (len - 1) in
          let token = Option.value ~default:"" (Sys.getenv_opt var) in
          Lwt.return token
      | '.' ->
          (try%lwt Lwt_io.(with_file ~mode:input str read_line)
           with e ->
               Log.err (fun m -> m "While reading token file %S: %s"
                  str (Printexc.to_string e));
               Lwt.return ""
          )
      | _ -> Lwt.return str
    else
      Lwt.return str
  in
  let module H = struct
    let conf = conf
    let user = conf.user
    let repo = conf.repo
    let cache_policy_ref = policy_ref
    let token = token
    module HJ = Ldp.Http.Http_ct (val h)
      (struct
         type t = Yojson.Safe.t
         let ct = Ldp.Types.content_type_of_string "application/json"
         let to_string j = Yojson.Safe.to_string j
         let of_string = function
         | "" -> `Assoc []
         | s -> Yojson.Safe.from_string s
       end)
    include HJ
  end
  in
  Lwt.return (module H : Http_t)

(*
let input_command com =
  let p = Lwt_process.(open_process_in (shell com)) in
  let ic = p#stdout in
  let%lwt str = Lwt_io.read ic in
  match%lwt p#close with
  | Unix.WEXITED 0 -> Lwt.return (Some str)
  | _ -> Lwt.return_none

let issues ?limit () =
  let fields = String.concat "," Types.issue_fields in
  let limit = Option.value ~default:""
    (Option.map (fun d -> Printf.sprintf " --limit=%d" d) limit)
  in
  let com = Printf.sprintf "gh issue list --json %S%s --state all --repo ocaml/ocaml"
    fields limit
  in
  match%lwt input_command com with
  | None -> failwith (Printf.sprintf "No result: %s" com)
  | Some str ->
      let issues = Ocf.(option (Wrapper.list Types.issue_wrapper)) [] in
      let g = Ocf.as_group issues in
      Ocf.from_string g str;
      Lwt.return (Ocf.get issues)
*)

let add_base_headers ?(accept="application/vnd.github+json") (module H:Http_t) h =
  let module He = Cohttp.Header in
  let h = He.add h "authorization" (Printf.sprintf "Bearer %s" H.token) in
  let h = He.add h "X-github-api-version" "2022-11-28" in
  let h = He.add h "accept" accept in
  h

let query_delay = Cache.query_delay

let get (module H:Http_t) ?(headers=Cohttp.Header.init()) iri :
  (H.t option * Ldp.Types.non_rdf_resource, Ldp.Types.error) result Lwt.t =
  let headers = add_base_headers (module H) headers in
  Log.debug (fun m -> m "http-get %a" Iri.pp iri);
  let%lwt res = H.get ~headers iri in
  Lwt.return res

let get_paged_stream http ?(limit=max_int) ?(per_page=100) base_iri wrapper =
  let page_iri n =
    let i = Iri.query_set base_iri "page" (string_of_int n) in
    Iri.query_set i "per_page" (string_of_int per_page)
  in
  let get_page n =
    let iri = page_iri n in
    match%lwt get http iri with
    | Error e -> failwith (Ldp.Types.string_of_error e)
    | Ok (None, _) -> failwith "No json"
    | Ok (Some (`List []), _) -> Lwt.return ([], None)
    | Ok (Some ((`List elts) as _json), _) ->
        (*Log.debug (fun m -> m "json=%s" (Yojson.Safe.pretty_to_string json));*)
        let f json =
          let o = Ocf.list wrapper [] in
          let t = Ocf.as_group o in
          try Ocf.from_json t (`List [json]); List.hd (Ocf.get o)
          with e ->
              Log.debug (fun m -> m "%s\n%s\njson=%s"
                 (Printexc.to_string e)
                   (Printexc.get_backtrace ())
                   (Yojson.Safe.pretty_to_string json));
              raise e
        in
        let l = List.map f elts in
        let next = if List.length l < per_page then None else Some (n+1) in
        Lwt.return (l, next)
    | Ok (Some json, _) ->
        Ocf.invalid_value json
  in
  let returned = ref 0 in
  let items = ref [] in
  let next_page = ref (Some 1) in
  let next () =
    if !returned >= limit then
      Lwt.return_none
    else
      match !items, !next_page with
      | [], None -> Lwt.return_none
      | [], Some n ->
          (
           match%lwt get_page n with
           | [],_ -> next_page := None; Lwt.return_none
           | h :: q, next ->
               items := q ;
               next_page := next ;
               incr returned ;
               Lwt.return_some h
          )
      | h :: q, _ ->
          items := q ;
          incr returned ;
          Lwt.return_some h
  in
  Lwt_stream.from next

let get_paged http ?limit ?per_page base_iri wrapper =
  let stream = get_paged_stream http ?limit ?per_page base_iri wrapper in
  Lwt_stream.to_list stream

let http_user_repo http = let module H = (val http : Http_t) in H.user, H.repo

let commit_iri (user,repo) sha = Iri.of_string
  (Printf.sprintf "https://api.github.com/repos/%s/%s/commits/%s" user repo sha)

let commit_with_comments http (commit : Types.commit) =
  let%lwt comments = get_paged http commit.Types.comments_url Types.comment_wrapper in
  Lwt.return { commit with comments }

let commit http sha =
  let iri = commit_iri (http_user_repo http) sha in
   match%lwt get http iri with
  | Error e -> failwith (Ldp.Types.string_of_error e)
  | Ok (None, _) -> failwith "No json"
  | Ok (Some json, _) ->
      let o = Ocf.option Types.commit_wrapper Types.default_commit in
      let t = Ocf.as_group o in
      (try Ocf.from_json t json
       with e ->
           prerr_endline (Printf.sprintf "commit: json=%s" (Yojson.Safe.pretty_to_string json));
           raise e
      );
      let c = Ocf.get o in
      let%lwt c = commit_with_comments http c in
      Lwt.return c

let commits_iri (user,repo) = Iri.of_string
  (Printf.sprintf "https://api.github.com/repos/%s/%s/commits" user repo)

let commits http ?(iri=commits_iri (http_user_repo http)) ?limit () =
  let%lwt l = get_paged http ?limit iri Types.commit_wrapper in
  let%lwt l = Lwt_list.map_s (fun (c:Types.commit) -> commit http c.sha) l in
  Lwt.return l

let ptime_to_since_date d =
  let ((y,m,d),((h,mi,s),_)) = Ptime.to_date_time d in
  Printf.sprintf "%04d-%02d-%02dT%02d:%02d:%02dZ" y m d h mi s

let iri_add_since_param iri d = Iri.query_set iri "since" (ptime_to_since_date d)

(* beware that the full commits are not fetched and the {!commit} should be
  used to get full information. This stream can be used to fetch the most
  recent commits.*)
let commit_stream ?since ?per_page http =
  let base_iri = commits_iri (http_user_repo http) in
  let base_iri = match since with
    | None -> base_iri
    | Some d -> iri_add_since_param base_iri d
  in
  get_paged_stream http ?per_page base_iri Types.commit_wrapper

let issue_with_comments http (issue : Types.issue) =
  let%lwt comments = get_paged http issue.Types.comments_url Types.comment_wrapper in
  Lwt.return { issue with comments }

let issue_with_events http (issue : Types.issue) =
  let%lwt events = get_paged http issue.Types.events_url Types.event_wrapper in
  Lwt.return { issue with events }

let issue_timeline_iri (user,repo) (i : Types.issue) = Iri.of_string
  (Printf.sprintf "https://api.github.com/repos/%s/%s/issues/%d/timeline" user repo i.number)

let issue_with_timeline http (issue : Types.issue) =
  let iri = issue_timeline_iri (http_user_repo http) issue in
  let%lwt timeline = get_paged http iri Types.event_wrapper in
  Lwt.return { issue with timeline }

let issue_with_duration issue =
  match issue.Types.state, issue.closed_at with
  | `Open, _ -> issue
  | `Closed, None ->
      Log.warn (fun m -> m "Issue %s: closed but with no closed_at" issue.Types.id);
      issue
  | `Closed, Some closed_date ->
      let span = Ptime.diff closed_date issue.created_at in
      { issue with duration = Some span }

let pull_request_with_review_comments http (pr : Types.pull_request) =
  let%lwt review_comments = get_paged http pr.Types.review_comments_url Types.review_comment_wrapper in
  Lwt.return { pr with review_comments }

let pull_request_with_commits http (pr : Types.pull_request) =
  let%lwt commits = commits http ~iri:pr.Types.commits_url () in
  Lwt.return { pr with commits }

let prs_iri (user, repo) =
  Iri.of_string (Printf.sprintf "https://api.github.com/repos/%s/%s/pulls?state=all" user repo)

let issue_with_pull_request http (i:Types.issue) =
  match i.pull_request with
  | None -> Lwt.return i
  | Some pr ->
      match%lwt get http pr.url with
      | Error e -> failwith (Ldp.Types.string_of_error e)
      | Ok (None, _) -> failwith "No json"
      | Ok (Some json, _) ->
          let pr =
            let o = Ocf.option Types.pull_request_wrapper Types.default_pull_request in
            let t = Ocf.as_group o in
            try Ocf.from_json t json; Ocf.get o
            with e ->
                prerr_endline (Printf.sprintf "pull_request: json=%s"
                 (Yojson.Safe.pretty_to_string json));
                raise e
          in
          let%lwt pr = pull_request_with_review_comments http pr in
          let%lwt pr = pull_request_with_commits http pr in
          Lwt.return { i with pull_request = Some pr }

let issue_iri (user,repo) id = Iri.of_string
  (Printf.sprintf "https://api.github.com/repos/%s/%s/issues/%d" user repo id)

let issue http id =
  let iri = issue_iri (http_user_repo http) id in
  match%lwt get http iri with
  | Error e -> failwith (Ldp.Types.string_of_error e)
  | Ok (None, _) -> failwith "No json"
  | Ok (Some json, _) ->
      let i =
        let o = Ocf.option Types.issue_wrapper Types.default_issue in
        let t = Ocf.as_group o in
        try Ocf.from_json t json; Ocf.get o
        with e ->
            prerr_endline (Printf.sprintf "issue %d: json=%s" id
             (Yojson.Safe.pretty_to_string json));
            raise e
      in
      let%lwt i = issue_with_comments http i in
      let%lwt i = issue_with_events http i in
      let%lwt i = issue_with_timeline http i in
      let%lwt i = issue_with_pull_request http i in
      let i = issue_with_duration i in
      Lwt.return i

let issues_iri (user, repo) = Iri.of_string
  (Printf.sprintf "https://api.github.com/repos/%s/%s/issues?state=all" user repo)

let issues http ?since ?limit () =
  let iri =
    let iri = issues_iri (http_user_repo http) in
    match since with
    | None -> iri
    | Some date -> iri_add_since_param iri date
  in
  let%lwt l = get_paged http ?limit iri Types.issue_wrapper in
  let%lwt l = Lwt_list.map_s (issue_with_comments http) l in
  let%lwt l = Lwt_list.map_s (issue_with_events http) l in
  let%lwt l = Lwt_list.map_s (issue_with_timeline http) l in
  let%lwt l = Lwt_list.map_s (issue_with_pull_request http) l in
  let l = List.map issue_with_duration l in
  Lwt.return l

let full_user_by_id_iri id = Iri.of_string
  (Printf.sprintf "https://api.github.com/user/%d" id)

let full_user http (user : Types.user)  =
  let%lwt res =
    match%lwt get http user.url with
    | Error (Ldp.Http.Query_error (_,404,_)) when user.id <> 0 ->
        (* try getting by id, user may have changed its login *)
        let iri = full_user_by_id_iri user.id in
        (match%lwt get http iri with
         | Error e -> Lwt.return_error (Ldp.Types.string_of_error e)
         | Ok (None, _) -> Lwt.return_error "No json"
         | Ok (Some json, _) -> Lwt.return_ok (json,true)
        )
    | Error e -> Lwt.return_error (Ldp.Types.string_of_error e)
    | Ok (None, _) -> Lwt.return_error "No json"
    | Ok (Some json, _) -> Lwt.return_ok (json,false)
  in
  match res with
  | Error msg -> failwith msg
  | Ok (json, renamed) ->
      let o = Ocf.option Types.full_user_wrapper Types.default_full_user in
      let t = Ocf.as_group o in
      (try Ocf.from_json t json
       with e ->
           Log.debug (fun m -> m "full_user: json=%s" (Yojson.Safe.pretty_to_string json));
           raise e
      );
      let fu = Ocf.get o in
      if renamed then
        Log.info (fun m -> m "User %S (%d) seems to have changed its login to %S"
           user.login user.id fu.login);
      Lwt.return fu

let user_iri login = Iri.of_string (Printf.sprintf "https://api.github.com/users/%s" login)
let full_user_of_login http login =
  let (u:Types.user) = { Types.default_user with login ; id = 0 ; url = user_iri login } in
  full_user http u

let gather_users http ?(more=[]) issues commits =
  let module S = Statocaml in
  let aliased = ref S.Sset.empty in
  let add_user map (user:Types.user) =
    match S.Smap.find_opt user.Types.login map with
    | Some _ -> Lwt.return map
    | None ->
        match S.Sset.mem user.login !aliased with
        | true -> (* user already aliased, so it has already been added *)
            Lwt.return map
        | false ->
            match%lwt full_user http user with
            | exception e ->
                Log.err (fun m -> m "%s" (Printexc.to_string e));
                Lwt.return map
            | fu ->
                if fu.login <> user.login then
                  (* add an alias *)
                  aliased := S.Sset.add user.login !aliased;
                let map = S.Smap.add fu.login fu map in
                Lwt.return map
  in
  let%lwt users = Lwt_list.fold_right_s
    (fun login acc ->
       let (u:Types.user) = { Types.default_user with login ; url = user_iri login ; id = 0 } in
       add_user acc u)
      more S.Smap.empty
  in
  let add_from_comment map (c : Types.comment) = add_user map c.Types.user in
  let add_from_review_comment map (c : Types.review_comment) =
    match c.Types.user with
    | None -> Lwt.return map
    | Some user -> add_user map user
  in
  let add_from_commit map (c:Types.commit) =
    match c.author with
    | None -> Lwt.return map
    | Some user -> add_user map user
  in
  let iter map (issue:Types.issue) =
    let%lwt map = add_user map issue.Types.user in
    let%lwt map = Lwt_list.fold_left_s add_from_comment map issue.Types.comments in
    match issue.pull_request with
    | None -> Lwt.return map
    | Some pr ->
        let%lwt map = Lwt_list.fold_left_s add_from_review_comment map pr.Types.review_comments in
        Lwt_list.fold_left_s add_from_commit map pr.commits
  in
  let%lwt users = Lwt_list.fold_left_s iter users issues in
  Lwt_list.fold_left_s add_from_commit users commits

let sort_commit_by_author_date =
  List.sort (fun c1 c2 -> Ptime.compare
     c1.Types.git_commit.author.cu_date
     c2.Types.git_commit.author.cu_date)

let login_by_commit_hash issues =
  let module S = Statocaml in
  let f_event acc e =
    match e.Types.commit_id, e.actor with
    | None, _ | _, None -> acc
    | Some hash, Some user ->
        let v = match e.event with
          | `Referenced -> Some (`Committer user.Types.login)
          | `Merged -> None
          | _ -> None (*Some (`Committer login)*)
        in
        match v with
        | None -> acc
        | Some v -> S.Smap.add hash v acc
  in
  let f_issue acc i = List.fold_left f_event acc i.Types.events in
  List.fold_left f_issue S.Smap.empty issues

let commits_by_issue =
  let f_event acc e =
    match e.Types.commit_id with
    | None -> acc
    | Some sha -> Statocaml.Sset.add sha acc
  in
  fun (issue : Types.issue) ->
    List.fold_left f_event Statocaml.Sset.empty issue.events

let repo_events_iri (user,repo) = Iri.of_string
  (Printf.sprintf "https://api.github.com/repos/%s/%s/events" user repo)

let github_events_stream http =
  get_paged_stream http
    (repo_events_iri (http_user_repo http))
    Types.github_event_wrapper

let timeline_commit_shas =
  let f acc (ev:Types.event) =
    match ev.event, ev.commit_id with
    | `Committed, Some sha -> Statocaml.Sset.add sha acc
    | _ -> acc
  in
  List.fold_left f Statocaml.Sset.empty

let releases_iri (user, repo) = Iri.of_string
  (Printf.sprintf "https://api.github.com/repos/%s/%s/releases" user repo)

let releases http ?(iri=releases_iri (http_user_repo http)) ?limit () =
  let%lwt l = get_paged http ?limit iri Types.release_wrapper in
  Lwt.return l