Source file tsort.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
(* User-friendly topological sort based on Kahn's algorithm.

   Usage example: sort [("foundation", []); ("basement", ["foundation"]);]

   Authors: Daniil Baturin (2019), Martin Jambon (2020).
*)

type 'a sort_result =
  | Sorted of 'a list
  | ErrorCycle of 'a list

(* Finds "isolated" nodes,
   that is, nodes that have no dependencies *)
let find_isolated_nodes hash =
  let aux id deps acc =
    match deps with
    | [] -> id :: acc
    | _  -> acc
  in Hashtbl.fold aux hash []

(* Takes a node name list and removes all those nodes from a hash *)
let remove_nodes nodes hash =
  List.iter (Hashtbl.remove hash) nodes

(* Walks through a node:dependencies hash and removes a dependency
   from all nodes that have it in their dependency lists *)
let remove_dependency hash dep =
  let aux dep hash id =
    let deps = Hashtbl.find hash id in
    let deps =
      if List.exists ((=) dep) deps then
        CCList.remove ~eq:(=) ~key:dep deps
      else deps
    in
    begin
      Hashtbl.remove hash id;
      Hashtbl.add hash id deps
    end
  in
  let ids = CCHashtbl.keys_list hash in
  List.iter (aux dep hash) ids

(* Finds non-existent nodes,
   that is, nodes that are mentiones in the value part of the assoc list,
   but don't exist among the assoc list keys *)
let find_nonexistent_nodes nodes =
  let keys = List.fold_left (fun acc (k, _) -> k :: acc) [] nodes in
  let rec find_aux ns nonexistent =
    match ns with
    | n :: ns ->
      if List.exists ((=) n) keys then find_aux ns nonexistent
      else find_aux ns (n :: nonexistent)
    | [] -> nonexistent |> CCList.uniq ~eq:(=)
  in
  List.fold_left (fun acc (v, vs) ->
    let ns = find_aux vs [] in
    match ns with
    | [] -> acc
    | _ -> (v, ns) :: acc
  ) [] nodes

(*
   Append missing nodes to the graph, in the order in which they were
   encountered. This particular order doesn't have to be guaranteed by the
   API but seems nice to have.
*)
let add_missing_nodes graph_l graph =
  let missing =
    List.fold_left (fun acc (_, vl) ->
      List.fold_left (fun acc v ->
        if not (Hashtbl.mem graph v) then
          (v, []) :: acc
        else
          acc
      ) acc vl
    ) [] graph_l
    |> List.rev
  in
  List.iter (fun (v, vl) -> Hashtbl.replace graph v vl) missing;
  graph_l @ missing

(* The Kahn's algorithm:
    1. Find nodes that have no dependencies ("isolated") and remove them from
       the graph hash.
       Add them to the initial sorted nodes list and the list of isolated
       nodes for the first sorting pass.
    2. For every isolated node, walk through the remaining nodes and
       remove it from their dependency list.
       Nodes that only depended on it now have empty dependency lists.
    3. Find all nodes with empty dependency lists and append them to the sorted
       nodes list _and_ the list of isolated nodes to use for the next step
    4. Repeat until the list of isolated nodes is empty
    5. If the graph hash is still not empty, it means there is a cycle.
 *)
let sort nodes =
  let rec sorting_loop deps hash acc =
    match deps with
    | [] -> acc
    | dep :: deps ->
      let () = remove_dependency hash dep in
      let isolated_nodes = find_isolated_nodes hash in
      let () = remove_nodes isolated_nodes hash in
      sorting_loop
        (List.append deps isolated_nodes) hash (List.append acc isolated_nodes)
  in
  let nodes_hash = CCHashtbl.of_list nodes in
  let _nodes = add_missing_nodes nodes nodes_hash in
  let base_nodes = find_isolated_nodes nodes_hash in
  let () = remove_nodes base_nodes nodes_hash in
  let sorted_node_ids = sorting_loop base_nodes nodes_hash [] in
  let sorted_node_ids = List.append base_nodes sorted_node_ids in
  let remaining_ids = CCHashtbl.keys_list nodes_hash in
  match remaining_ids with
  | [] -> Sorted sorted_node_ids
  | _  -> ErrorCycle remaining_ids

(*
   Deal with cyclic graphs.
*)
module Graph = struct
  type ('a, 'b) t = ('a, 'b list) Hashtbl.t

  let create l : (_, _) t =
    let tbl = Hashtbl.create 100 in
    List.iter (fun (k, v) -> Hashtbl.replace tbl k v) l;
    tbl

  let transpose tbl =
    let tbl2 = Hashtbl.create 100 in
    let init v =
      if not (Hashtbl.mem tbl2 v) then
        Hashtbl.add tbl2 v []
    in
    Hashtbl.iter (fun u vl ->
      init u;
      List.iter (fun v ->
        let ul =
          try Hashtbl.find tbl2 v
          with Not_found -> []
        in
        Hashtbl.replace tbl2 v (u :: ul)
      ) vl
    ) tbl;
    tbl2

  let _to_list tbl =
    Hashtbl.fold (fun u vl acc -> (u, vl) :: acc) tbl []
end

(*
   Sort the results of 'partition' so as to follow the original node
   ordering. If not for the user, it's useful for us for testing.
*)
let sort_partition graph_l clusters =
  let priority = Hashtbl.create 100 in
  List.iteri (fun i (v, _) -> Hashtbl.replace priority v i) graph_l;
  let prio v =
    try Hashtbl.find priority v
    with Not_found -> assert false
  in
  let list_prio vl =
    match vl with
    | [] -> assert false
    | x :: _ -> prio x
  in
  let cmp u v = compare (prio u) (prio v) in
  let cmp_list ul vl = compare (list_prio ul) (list_prio vl) in
  List.map (fun l -> List.sort cmp l) clusters
  |> List.sort cmp_list

(*
   Implementation of Kosaraju's algorithm for partitioning a graph into its
   strongly connected components.
*)
let partition graph_l =
  let graph = Graph.create graph_l in
  let graph_l = add_missing_nodes graph_l graph in
  let tr_graph = Graph.transpose graph in
  let visits = Hashtbl.create 100 in
  let is_visited v = Hashtbl.mem visits v in
  let mark_visited v = Hashtbl.replace visits v () in
  let get_out_neighbors v =
    try Hashtbl.find graph v
    with Not_found -> assert false
  in
  let get_in_neighbors v =
    try Hashtbl.find tr_graph v
    with Not_found -> assert false
  in
  let rec visit acc v =
    if not (is_visited v) then (
      mark_visited v;
      let out_neighbors = get_out_neighbors v in
      let acc =
        List.fold_left (fun acc u -> visit acc u) acc out_neighbors in
      v :: acc
    )
    else
      acc
  in
  let l =
    List.fold_left (fun acc (v, _vl) ->
      visit acc v
    ) [] graph_l
  in
  let assignments = Hashtbl.create 100 in
  let is_assigned v = Hashtbl.mem assignments v in
  let rec assign v root =
    if not (is_assigned v) then (
      Hashtbl.replace assignments v root;
      let in_neighbors = get_in_neighbors v in
      List.iter (fun u ->
        assign u root
      ) in_neighbors
    )
  in
  List.iter (fun v ->
    assign v v
  ) l;
  (* end Kosaraju's algorithm *)

  let clusters = Hashtbl.create 100 in
  Hashtbl.iter (fun v id ->
    let members =
      try Hashtbl.find clusters id
      with Not_found -> []
    in
    Hashtbl.replace clusters id (v :: members)
  ) assignments;
  let partition =
    Hashtbl.fold (fun _id members acc -> members :: acc) clusters []
  in
  graph_l, sort_partition graph_l partition

let find_strongly_connected_components graph_l =
  let _completed_graph_l, components = partition graph_l in
  components

(*
   Algorithm:
   1. Identify the strongly-connected components of the input graph.
   2. Derive a DAG from merging the nodes within each component
      (condensation).
   3. Topologically-sort that DAG.
   4. Re-expand the nodes representing components into the original nodes.
*)
let sort_strongly_connected_components graph_l =
  let graph_l, components = partition graph_l in
  let index = Hashtbl.create 100 in
  let rev_index = Hashtbl.create 100 in
  List.iteri (fun id comp ->
    List.iter (fun v ->
      Hashtbl.add index v id;
      Hashtbl.add rev_index id comp
    ) comp
  ) components;

  let get_comp_id v =
    try Hashtbl.find index v
    with Not_found -> assert false
  in
  let get_comp_members id =
    try Hashtbl.find rev_index id
    with Not_found -> assert false
  in
  let condensation =
    let tbl = Hashtbl.create 100 in
    List.iter (fun (u, vl) ->
      let id = get_comp_id u in
      let idl0 =
        try Hashtbl.find tbl id
        with Not_found -> []
      in
      let idl = List.map get_comp_id vl @ idl0 in
      Hashtbl.replace tbl id idl
    ) graph_l;
    Hashtbl.fold (fun id idl acc ->
      (* Remove v->v edges because they are not supported by tsort.
         Duplicates seem ok. *)
      let filtered = List.filter ((<>) id) idl in
      (id, filtered) :: acc
    ) tbl []
  in
  let sorted_components =
    match sort condensation with
    | Sorted comp_ids -> List.map get_comp_members comp_ids
    | ErrorCycle _ -> assert false
  in
  sorted_components