Source file process.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
(*****************************************************************************)
(*                                                                           *)
(* Open Source License                                                       *)
(* Copyright (c) 2020-2022 Nomadic Labs <contact@nomadic-labs.com>           *)
(* Copyright (c) 2020 Metastate AG <hello@metastate.dev>                     *)
(*                                                                           *)
(* Permission is hereby granted, free of charge, to any person obtaining a   *)
(* copy of this software and associated documentation files (the "Software"),*)
(* to deal in the Software without restriction, including without limitation *)
(* the rights to use, copy, modify, merge, publish, distribute, sublicense,  *)
(* and/or sell copies of the Software, and to permit persons to whom the     *)
(* Software is furnished to do so, subject to the following conditions:      *)
(*                                                                           *)
(* The above copyright notice and this permission notice shall be included   *)
(* in all copies or substantial portions of the Software.                    *)
(*                                                                           *)
(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*)
(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,  *)
(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL   *)
(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*)
(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING   *)
(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER       *)
(* DEALINGS IN THE SOFTWARE.                                                 *)
(*                                                                           *)
(*****************************************************************************)

open Base

type hooks = Process_hooks.t = {
  on_log : string -> unit;
  on_spawn : string -> string list -> unit;
}

(* Information which is specific to processes that run on remote runners. *)
type remote = {runner : Runner.t; pid : int Lwt.t}

type t = {
  id : int;
  name : string;
  command : string;
  arguments : string list;
  color : Log.Color.t;
  lwt_process : Lwt_process.process_full;
  mutable handle : unit Lwt.t;
  log_status_on_exit : bool;
  stdout : Echo.t;
  stderr : Echo.t;
  hooks : hooks option;
  remote : remote option;
}

type failed_info = {
  name : string;
  command : string;
  arguments : string list;
  status : Unix.process_status option;
  expect_failure : bool;
  reason : String.t option;
}

exception Failed of failed_info

(** Converts the given [status] into a string explaining
    why the corresponding process has stopped.

    The resulting string is a subject-less sentence that
    assumes that the subject will be prepended. *)
let status_to_reason = function
  | Unix.WEXITED code -> Format.sprintf "exited with code %d" code
  | Unix.WSIGNALED code -> Format.sprintf "was killed by signal %d" code
  | Unix.WSTOPPED code -> Format.sprintf "was stopped by signal %d" code

let () =
  Printexc.register_printer @@ function
  | Failed {name; command; arguments; status; expect_failure; reason} ->
      let reason =
        Option.value
          ~default:
            (match status with
            | Some st -> status_to_reason st
            | None -> Printf.sprintf "exited")
          reason
      in
      Some
        (Printf.sprintf
           "%s%s %s (full command: %s)"
           name
           (if expect_failure then " was expected to fail," else "")
           reason
           (String.concat " " (List.map Log.quote_shell (command :: arguments))))
  | _ -> None

let get_unique_name =
  let name_counts = ref String_map.empty in
  fun name ->
    let index =
      match String_map.find_opt name !name_counts with None -> 0 | Some i -> i
    in
    name_counts := String_map.add name (index + 1) !name_counts ;
    name ^ "#" ^ string_of_int index

let fresh_id =
  let next = ref 0 in
  fun () ->
    let id = !next in
    incr next ;
    id

module ID_map = Map.Make (Int)

let live_processes = ref ID_map.empty

let wait process =
  let* status = process.lwt_process#status and* () = process.handle in
  (* If we already removed [process] from [!live_processes], we already logged
     the exit status. *)
  if ID_map.mem process.id !live_processes then (
    live_processes := ID_map.remove process.id !live_processes ;
    if process.log_status_on_exit then
      match status with
      | WEXITED code -> Log.debug "%s exited with code %d." process.name code
      | WSIGNALED code ->
          Log.debug
            "%s was killed by signal %s."
            process.name
            (Process_common.show_signal code)
      | WSTOPPED code ->
          Log.debug
            "%s was stopped by signal %s."
            process.name
            (Process_common.show_signal code)) ;
  return status

(* Read process outputs and log them.
   Also take care of removing the process from [live_processes] on termination. *)
let handle_process ~log_output process =
  let rec handle_output name ch echo lines =
    let* line = Lwt_io.read_line_opt ch in
    match line with
    | None ->
        Echo.close echo ;
        let* () = Lwt_io.close ch in
        return (List.rev lines)
    | Some line ->
        if log_output then Log.debug ~prefix:name ~color:process.color "%s" line ;
        let lines =
          match process.hooks with None -> lines | Some _ -> line :: lines
        in
        Echo.push echo line ;
        (* TODO: here we assume that all lines end with "\n",
             but it may not always be the case:
           - there may be lines ending with "\r\n";
           - the last line may not end with "\n" before the EOF. *)
        Echo.push echo "\n" ;
        handle_output name ch echo lines
  in
  let promise =
    let* stdout_lines =
      handle_output process.name process.lwt_process#stdout process.stdout []
    and* stderr_lines =
      handle_output process.name process.lwt_process#stderr process.stderr []
    and* _ =
      (* Note: [wait] calls [process.handle], which will eventually be [handle_process]
         (this function), which calls [wait] (right here). So why is there no deadlock?
         Initially, [process.handle] is set to [Lwt.return_unit].
         [handle_process] is called only once, in [spawn_with_stdin], just before
         setting [process.handle] to [handle_process].
         [handle_process] calls [wait] immediately, and [wait] calls [process.handle]
         immediately, so it is still [Lwt.return_unit].
         This is too subtle though, we need to improve this. *)
      wait process
    in
    match process.hooks with
    | None -> unit
    | Some hooks ->
        List.iter hooks.on_log stdout_lines ;
        List.iter hooks.on_log stderr_lines ;
        unit
  in
  (* As long as the process is running, we don't want to stop reading its output.
     So we return a non-cancelable promise.
     This also makes sure that if [wait] is called twice, and one of the two
     instances is canceled, the other instance will not also be canceled.
     Indeed, canceling one instance of [wait] would cancel [process.handle],
     which is also used by the other instance of [wait].
     This can happen in particular if a test is [wait]ing for a process and
     the user presses Ctrl+C, which triggers [clean_up], which calls [wait]. *)
  Lwt.no_cancel promise

(** [to_key_equal_value kv_map], given that kv_map is {K1->V1; K2->V2}
    returns the array ["K1=V1"; "K2=V2"]. See [parse_current_environment]
    for a related function *)
let to_key_equal_value (kv_map : string String_map.t) : string array =
  kv_map |> String_map.to_seq
  |> Seq.map (fun (name, value) -> name ^ "=" ^ value)
  |> Array.of_seq

let spawn_with_stdin ?runner ?(log_command = true) ?(log_status_on_exit = true)
    ?(log_output = true) ?name ?(color = Log.Color.FG.cyan)
    ?(env = String_map.empty) ?hooks command arguments =
  let name = Option.value ~default:(get_unique_name command) name in
  Option.iter (fun hooks -> hooks.on_spawn command arguments) hooks ;
  if log_command then
    Log.command ~color:Log.Color.bold ~prefix:name command arguments ;
  let lwt_command =
    match runner with
    | None -> (command, Array.of_list (command :: arguments))
    | Some runner ->
        let local_env = String_map.bindings env in
        let ssh, ssh_args =
          Runner.wrap_with_ssh_pid runner {local_env; name = command; arguments}
        in
        (ssh, Array.of_list (ssh :: ssh_args))
  in
  let lwt_process =
    match runner with
    | None ->
        let env =
          (* Merge [current_env] and [env], choosing [env] on common keys: *)
          String_map.union
            (fun _ _ new_val -> Some new_val)
            (Process_common.parse_current_environment ())
            env
          |> to_key_equal_value
        in
        Lwt_process.open_process_full ~env lwt_command
    | Some _runner -> Lwt_process.open_process_full lwt_command
  in
  let remote =
    let open Lwt.Infix in
    match runner with
    | None -> None
    | Some runner ->
        let pid =
          Lwt_io.read_line lwt_process#stdout >|= fun pid ->
          match int_of_string_opt pid with
          | Some pid -> pid
          | None ->
              raise
                (Failed
                   {
                     name;
                     command;
                     arguments;
                     status = None;
                     expect_failure = false;
                     reason = Some "unable to read remote process PID";
                   })
        in
        Some {runner; pid}
  in
  let process =
    {
      id = fresh_id ();
      name;
      command;
      arguments;
      color;
      lwt_process;
      handle = Lwt.return_unit;
      log_status_on_exit;
      stdout = Echo.create ();
      stderr = Echo.create ();
      hooks;
      remote;
    }
  in
  live_processes := ID_map.add process.id process !live_processes ;
  process.handle <- handle_process ~log_output process ;
  (process, process.lwt_process#stdin)

let spawn ?runner ?log_command ?log_status_on_exit ?log_output ?name ?color ?env
    ?hooks command arguments =
  let process, stdin =
    spawn_with_stdin
      ?runner
      ?log_command
      ?log_status_on_exit
      ?log_output
      ?name
      ?color
      ?env
      ?hooks
      command
      arguments
  in
  Background.register (Lwt_io.close stdin) ;
  process

(* Propagate the signal in case of remote runner. *)
let signal_remote_if_needed signal process =
  match process.remote with
  | None -> ()
  | Some {pid; runner} ->
      let open Lwt in
      let open Infix in
      Background.register
        ( ( pid >|= fun pid ->
            let command = "kill" in
            let arguments =
              ["-" ^ string_of_int signal; "-P"; string_of_int pid]
            in
            let shell =
              Runner.Shell.(
                redirect_stderr (cmd [] command arguments) "/dev/null")
            in
            Runner.wrap_with_ssh runner shell )
        >>= fun (ssh, ssh_args) ->
          let cmd = (ssh, Array.of_list (ssh :: ssh_args)) in
          Lwt_process.exec cmd >>= fun _ -> Lwt.return_unit )

let kill (process : t) =
  Log.debug "Send SIGKILL to %s." process.name ;
  (* 9 is the code for SIGKILL *)
  signal_remote_if_needed 9 process ;
  process.lwt_process#terminate

let terminate ?timeout (process : t) =
  Log.debug "Send SIGTERM to %s." process.name ;
  (* 15 is the code for SIGTERM *)
  signal_remote_if_needed 15 process ;
  (match process.remote with
  | None ->
      process.lwt_process#kill (if Sys.win32 then Sys.sigkill else Sys.sigterm)
  | Some _ ->
      (* Do not send SIGTERM to SSH. It will die on its own once the
         remote process exits, or we will kill it after a timeout.
         Note that [signal_remote_if_needed] did not send SIGTERM to SSH,
         it sent it (through SSH) to the process that SSH spawned on the remote. *)
      ()) ;
  match timeout with
  | None -> ()
  | Some timeout ->
      let wait_and_ignore_status =
        let* (_ : Unix.process_status) = wait process in
        unit
      in
      let kill_after_timeout =
        let* () = Lwt_unix.sleep timeout in
        kill process ;
        unit
      in
      Background.register
        (Lwt.pick [wait_and_ignore_status; kill_after_timeout])

let pid (process : t) = process.lwt_process#pid

let validate_status ?(expect_failure = false) status =
  match status with
  | Unix.WEXITED n
    when (n = 0 && not expect_failure) || (n <> 0 && expect_failure) ->
      Ok ()
  | _ -> Error (`Invalid_status (status_to_reason status))

let check ?(expect_failure = false) process =
  let* status = wait process in
  match validate_status ~expect_failure status with
  | Ok () -> unit
  | Error (`Invalid_status reason) ->
      raise
        (Failed
           {
             name = process.name;
             command = process.command;
             arguments = process.arguments;
             status = Some status;
             expect_failure;
             reason = Some reason;
           })

let run ?log_status_on_exit ?name ?color ?env ?hooks ?expect_failure command
    arguments =
  spawn ?log_status_on_exit ?name ?color ?env ?hooks command arguments
  |> check ?expect_failure

let clean_up ?(timeout = Cli.Options.cleanup_timeout) () =
  let list = ID_map.bindings !live_processes |> List.map snd in
  List.iter (terminate ~timeout) list ;
  Lwt_list.iter_p
    (fun process ->
      let* _ = wait process in
      unit)
    list

let stdout process = Echo.get_lwt_channel process.stdout

let stderr process = Echo.get_lwt_channel process.stderr

let name (process : t) = process.name

let check_and_read ?expect_failure ~channel_getter process =
  let* () = check ?expect_failure process
  and* output = Lwt_io.read (channel_getter process) in
  return output

let check_and_read_both ?expect_failure process =
  let* () = check ?expect_failure process
  and* out = Lwt_io.read (stdout process)
  and* err = Lwt_io.read (stderr process) in
  return (out, err)

let check_and_read_stdout = check_and_read ~channel_getter:stdout

let check_and_read_stderr = check_and_read ~channel_getter:stderr

let run_and_read_stdout ?log_status_on_exit ?name ?color ?env ?hooks
    ?expect_failure command arguments =
  let process =
    spawn ?log_status_on_exit ?name ?color ?env ?hooks command arguments
  in
  check_and_read_stdout ?expect_failure process

let run_and_read_stderr ?log_status_on_exit ?name ?color ?env ?hooks
    ?expect_failure command arguments =
  let process =
    spawn ?log_status_on_exit ?name ?color ?env ?hooks command arguments
  in
  check_and_read_stdout ?expect_failure process

let check_error ?exit_code ?msg process =
  let* status = wait process in
  let* err_msg = Lwt_io.read (stderr process) in
  let error =
    {
      name = process.name;
      command = process.command;
      arguments = process.arguments;
      status = Some status;
      expect_failure = true;
      reason = None;
    }
  in
  match status with
  | WEXITED n -> (
      match exit_code with
      | None when n = 0 ->
          raise (Failed {error with reason = Some " with any non-zero code"})
      | Some exit_code when n <> exit_code ->
          let reason = sf " with code %d but failed with code %d" exit_code n in
          raise (Failed {error with reason = Some reason})
      | _ ->
          Option.iter
            (fun msg ->
              if err_msg =~! msg then
                raise
                  (Failed
                     {
                       error with
                       reason =
                         Some
                           (sf " but failed with stderr =~! %s" (show_rex msg));
                     }))
            msg ;
          unit)
  | _ -> raise (Failed error)

let program_path program =
  Lwt.catch
    (fun () ->
      let* path =
        if Sys.win32 then run_and_read_stdout "where.exe" [program]
        else run_and_read_stdout "sh" ["-c"; "command -v " ^ program]
      in
      return (Some (String.trim path)))
    (fun _ -> return None)