Source file ConcurrentUnionFind.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
(******************************************************************************)
(*                                                                            *)
(*                                 UnionFind                                  *)
(*                                                                            *)
(*                       François Pottier, Inria Paris                        *)
(*                                                                            *)
(*  Copyright Inria. All rights reserved. This file is distributed under      *)
(*  the terms of the GNU Library General Public License version 2, with a     *)
(*  special exception on linking, as described in the file LICENSE.           *)
(*                                                                            *)
(******************************************************************************)

(* This module offers a concurrent variant of the union-find data structure.

   The data structure is based on disjoint set forests. Path compression is
   performed as usual, using ordinary write instructions (as opposed to CAS
   instructions) because data races during path compression are benign.

   Linking is by random index. Every vertex carries a unique identifier whose
   most significant bits are randomly generated. When two vertices are linked,
   a comparison between their identifiers determines the direction of the new
   link: we maintain the invariant property that the parent always has a
   smaller identifier than the child. This ensures that, even in concurrent
   scenarios, no cycle can appear. *)

(* -------------------------------------------------------------------------- *)

(* The content of a vertex is either
   - a pointer to a parent vertex, or
   - a user value. *)

(* Path compression updates the [parent] field of the [Link] object. Therefore,
   this object must be unique. A [Link] object is installed in [union] by the
   CAS instruction that replaces a [Root] object with a [Link] object;
   thereafter, the atomic reference is no longer modified. Indeed, every CAS
   instruction in the code applies to a [Root] object. *)

type 'a content =
  | Root of { value : 'a }
  | Link of { mutable parent : 'a elem }

(* The type ['a elem] represents a vertex in the union-find data structure. *)

(* Every vertex carries an identifier, as opposed to only a root vertex.
   Indeed, identifiers are used, during path compression, to prevent the
   creation of cycles. *)

and 'a elem =
  { id : int; content : 'a content Atomic.t }

(* -------------------------------------------------------------------------- *)

(* One way of generating unique identifiers is to use a single generator,
   which is shared by all domains. *)

module SharedGeneratorOfUniqueIds = struct

  let next =
    Atomic.make 0

  let fresh () =
    Atomic.fetch_and_add next 1

end

(* In order to obtain balanced forests, we want the ordering of identifiers
   to be random (while preserving the property that identifiers are unique).

   To ensure this, we combine a unique identifiers (in the least significant
   bits) and a random number (in the most significant bits). *)

module G = struct

  include SharedGeneratorOfUniqueIds

  let () =
    assert (Sys.word_size = 64)

  let fresh () =
    (* Generate a unique identifier. *)
    let id = fresh() in
    (* Generate a random number of, say, 12 bits. *)
    let salt = Random.int 4096 in
    (* Combine the two. *)
    let salt = salt lsl (63 - 12) in
    salt lor id

end

(* -------------------------------------------------------------------------- *)

(* [make v] creates a new root. *)

let make (v : 'a) : 'a elem =
  let id = G.fresh()
  and content = Atomic.make (Root { value = v }) in
  { id; content }

(* -------------------------------------------------------------------------- *)

(* [find x] attempts to find the representative vertex of the equivalence
   class of [x]. It does so by following the path from [x] towards its
   ancestors. Because of interference with other threads, it does not always
   return a root vertex, but it always returns a vertex [z] that lies in the
   same equivalence class as [x] and such that [x.id >= z.id] holds. *)

let rec find (x : 'a elem) : 'a elem =
  match Atomic.get x.content with
  | Root _ ->
      x
  | Link { parent = y } ->
      find y

(* [compress x z] performs path compression, starting at [x], ending at [z].

   Because the path from [x] to [z] can be concurrently destroyed by another
   thread, there is no guarantee that the vertex [z] is actually reached. Path
   compression continues as long as the invariant can be maintained: a parent
   must have a smaller identifier than its child.

   Once finished, [compress x z] returns [z]. *)

let rec compress x z =
  match Atomic.get x.content with
  | Root _ ->
      (* [x] is a root. Stop. *)
      z
  | Link link ->
      let y = link.parent in
      (* There is an edge of [x] to [y]. *)
      assert (x.id > y.id);
      if y.id > z.id then
        (* Replace the edge of [x] to [y] with an edge from [x] to [z].
           This is beneficial (unless there is interference by another
           thread) and preserves the invariant. *)
        let () = assert (x.id > z.id) in
        link.parent <- z;
        compress y z
      else
        (* Stop. *)
        z

(* [findc x] behaves like [find x] and performs path compression. *)

(* A simple version of it could be defined as follows:

   let findc (x : 'a elem) : 'a elem =
     let z = find x in
     compress x z

   We optimize the common case where [x] is a root. *)

let[@inline] findc (x : 'a elem) : 'a elem =
  match Atomic.get x.content with
  | Root _ ->
      x
  | Link { parent = y } ->
      let z = find y in
      compress x z

(* -------------------------------------------------------------------------- *)

(* [get x] returns the value stored at [x]'s representative vertex. *)

(* The linearization point is the atomic read whose result is [Root _]. *)

let rec get (x : 'a elem) : 'a =
  let x = findc x in
  match Atomic.get x.content with
  | Root root ->
      (* We have reached the root. Success. *)
      root.value
  | Link _ ->
      (* There has been interference. Continue. *)
      get x

(* -------------------------------------------------------------------------- *)

(* [set x] updates the value stored at [x]'s representative vertex. *)

(* The linearization point is the successful CAS. *)

let rec set (x : 'a elem) (cx' : 'a content) : unit =
  let x = findc x in
  let cx = Atomic.get x.content in
  match cx with
  | Root _
    when Atomic.compare_and_set x.content cx cx' ->
      (* We have reached and updated the root. Success. *)
      ()
  | _ ->
      (* There has been interference. Continue. *)
      set x cx'

let[@inline] set (x : 'a elem) (v : 'a) : unit =
  let cx' = Root { value = v } in
  set x cx'

(* -------------------------------------------------------------------------- *)

(* [update x] updates the value stored at [x]'s representative vertex. *)

(* The linearization point is the successful CAS. *)

let rec update (x : 'a elem) (f : 'a -> 'a) : unit =
  let x = findc x in
  let cx = Atomic.get x.content in
  match cx with
  | Root { value = v }
    when Atomic.compare_and_set x.content cx (Root { value = f v }) ->
      (* We have reached and updated the root. Success. *)
      ()
  | _ ->
      (* There has been interference. Continue. *)
      update x f

(* -------------------------------------------------------------------------- *)

(* [union x y] merges the equivalence classes of [x] and [y] by installing a
   link from one root vertex to the other. *)

(* The linearization point is the successful CAS. *)

let rec union (x : 'a elem) (y : 'a elem) : 'a option =
  (* Follow the paths out of [x] and [y] as far as possible. *)
  let x = findc x
  and y = findc y in
  if x == y then
    (* [x] and [y] are the same vertex. *)
    None
  else
    (* [x] and [y] are distinct vertices.
       They must have distinct identifiers. *)
    let () = assert (x.id <> y.id) in
    if x.id > y.id then
      (* If [x] is a root, and if we are able to to create an edge
         from [x] to [y], then declare success. There is no need to
         ensure that [y] is a root. Otherwise, try again. *)
      let cx = Atomic.get x.content in
      match cx with
      | Root { value = v }
        when Atomic.compare_and_set x.content cx (Link { parent = y }) ->
          Some v
      | _ ->
          union x y
    else
      (* This case is symmetric. *)
      let cy = Atomic.get y.content in
      match cy with
      | Root { value = v }
        when Atomic.compare_and_set y.content cy (Link { parent = x }) ->
          Some v
      | _ ->
          union x y

let[@inline] union (x : 'a elem) (y : 'a elem) : 'a option =
  if x == y then None else union x y

(* -------------------------------------------------------------------------- *)

(* [eq x y] determines whether the vertices [x] and [y] belong in the same
   equivalence class. *)

(* We follow Anderson and Woll's algorithm, as presented by Jayanti and
   Tarjan. *)

let rec eq (x : 'a elem) (y : 'a elem) : bool =
  x == y ||
  match Atomic.get x.content with
  | Root _ ->
      (* This case is subtle. [x] and [y] are distinct vertices. At the time
         where each of these vertices was found by [findc], it was a root.
         Furthermore, [x] is still a root now, so it has been a root all along.
         Therefore, at the point in time where [y] was found, both [x] and [y]
         were roots. Therefore, we can linearize this operation at that point in
         time, and return [false]. *)
      false
  | Link { parent = x } ->
      (* There has been interference. Continue. *)
      continue_eq x y

and continue_eq (x : 'a elem) (y : 'a elem) : bool =
  (* Note: find [x] first. Order matters here. *)
  let x = findc x in
  let y = findc y in
  eq x y

let[@inline] eq (x : 'a elem) (y : 'a elem) : bool =
  x == y || continue_eq x y