Source file Textedit.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
(* Nat Mote
 *
 * Copyright (C) 2019-2022 r2c
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * version 2.1 as published by the Free Software Foundation, with the
 * special exception on linking described in file LICENSE.
 *
 * This library is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the file
 * LICENSE for more details.
 *)

(* This module represents and applies edits to text *)

type t = {
  path : Common.filename;
  (* 0-based byte index, inclusive *)
  start : int;
  (* 0-based byte index, exclusive *)
  end_ : int;
  replacement_text : string;
}

type edit_application_result =
  | Success of string
  | Overlap of {
      partial_result : string;
      (* nonempty *)
      discarded_edits : t list;
    }

let remove_overlapping_edits edits =
  let rec f edits discarded_edits = function
    | e1 :: e2 :: tl ->
        if e1.end_ > e2.start then
          let discarded_edits = e2 :: discarded_edits in
          f edits discarded_edits (e1 :: tl)
        else
          let edits = e1 :: edits in
          f edits discarded_edits (e2 :: tl)
    | [ edit ] ->
        let edits = edit :: edits in
        (List.rev edits, List.rev discarded_edits)
    | [] -> (List.rev edits, List.rev discarded_edits)
  in
  f [] [] edits

let apply_edit_to_text text { start; end_; replacement_text; _ } =
  let before = Str.first_chars text start in
  let after = Str.string_after text end_ in
  before ^ replacement_text ^ after

let apply_edits_to_text text edits =
  let edits = List.sort (fun e1 e2 -> e1.start - e2.start) edits in
  let edits, discarded_edits = remove_overlapping_edits edits in
  (* Switch to bottom to top order so that we don't need to track offsets as
   * we apply multiple patches *)
  let edits = List.rev edits in
  let fixed_text =
    (* Apply the fixes. These string operations are inefficient but should
     * be fine. The Python CLI version of this code is even more inefficent. *)
    List.fold_left
      (fun file_text edit -> apply_edit_to_text file_text edit)
      text edits
  in
  if discarded_edits = [] then Success fixed_text
  else Overlap { partial_result = fixed_text; discarded_edits }

let partition_edits_by_file edits =
  (* TODO Consider using Common.group_by if we update it to return edits in
   * order and add a comment specifying that changes to it must maintain that
   * behavior. *)
  let edits_by_file = Hashtbl.create 8 in
  List.iter
    (fun edit ->
      let prev =
        match Hashtbl.find_opt edits_by_file edit.path with
        | Some lst -> lst
        | None -> []
      in
      Hashtbl.replace edits_by_file edit.path (edit :: prev))
    edits;
  (* Restore the original order of the edits as they appeared in the input list.
   * This is important so that we consistently choose to apply the first edit in
   * the original input when there are edits for an identical span. *)
  Hashtbl.filter_map_inplace
    (fun _file edits -> Some (List.rev edits))
    edits_by_file;
  edits_by_file

let apply_edits ~dryrun edits =
  let edits_by_file = partition_edits_by_file edits in
  let all_discarded_edits = ref [] in
  Hashtbl.iter
    (fun file file_edits ->
      let file_text = Common.read_file file in
      let new_text =
        match apply_edits_to_text file_text file_edits with
        | Success x -> x
        | Overlap { partial_result; discarded_edits } ->
            Common.push discarded_edits all_discarded_edits;
            partial_result
      in
      (* TOPORT: when dryrun, report fixed lines *)
      if not dryrun then Common.write_file ~file new_text)
    edits_by_file;
  let modified_files = Hashtbl.to_seq_keys edits_by_file |> List.of_seq in
  let discarded_edits = List.concat !all_discarded_edits in
  (modified_files, discarded_edits)