ConcurrentUnionFind.ml1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289(******************************************************************************) (* *) (* UnionFind *) (* *) (* François Pottier, Inria Paris *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under *) (* the terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) (* This module offers a concurrent variant of the union-find data structure. The data structure is based on disjoint set forests. Path compression is performed as usual, using ordinary write instructions (as opposed to CAS instructions) because data races during path compression are benign. Linking is by random index. Every vertex carries a unique identifier whose most significant bits are randomly generated. When two vertices are linked, a comparison between their identifiers determines the direction of the new link: we maintain the invariant property that the parent always has a smaller identifier than the child. This ensures that, even in concurrent scenarios, no cycle can appear. *) (* -------------------------------------------------------------------------- *) (* The content of a vertex is either - a pointer to a parent vertex, or - a user value. *) (* Path compression updates the [parent] field of the [Link] object. Therefore, this object must be unique. A [Link] object is installed in [union] by the CAS instruction that replaces a [Root] object with a [Link] object; thereafter, the atomic reference is no longer modified. Indeed, every CAS instruction in the code applies to a [Root] object. *) type 'a content = | Root of { value : 'a } | Link of { mutable parent : 'a elem } (* The type ['a elem] represents a vertex in the union-find data structure. *) (* Every vertex carries an identifier, as opposed to only a root vertex. Indeed, identifiers are used, during path compression, to prevent the creation of cycles. *) and 'a elem = { id : int; content : 'a content Atomic.t } (* -------------------------------------------------------------------------- *) (* One way of generating unique identifiers is to use a single generator, which is shared by all domains. *) (* In order to obtain balanced forests, we want the ordering of identifiers to be random (while preserving the property that identifiers are unique). To ensure this, we combine a unique identifiers (in the least significant bits) and a random number (in the most significant bits). *) module G = struct include SharedGeneratorOfUniqueIds let () = assert (Sys.word_size = 64) let fresh () = (* Generate a unique identifier. *) let id = fresh() in (* Generate a random number of, say, 12 bits. *) let salt = Random.int 4096 in (* Combine the two. *) let salt = salt lsl (63 - 12) in salt lor id end (* -------------------------------------------------------------------------- *) (* [make v] creates a new root. *) let make (v : 'a) : 'a elem = let id = G.fresh() and content = Atomic.make (Root { value = v }) in { id; content } (* -------------------------------------------------------------------------- *) (* [find x] attempts to find the representative vertex of the equivalence class of [x]. It does so by following the path from [x] towards its ancestors. Because of interference with other threads, it does not always return a root vertex, but it always returns a vertex [z] that lies in the same equivalence class as [x] and such that [x.id >= z.id] holds. *) let rec find (x : 'a elem) : 'a elem = match Atomic.get x.content with | Root _ -> x | Link { parent = y } -> find y (* [compress x z] performs path compression, starting at [x], ending at [z]. Because the path from [x] to [z] can be concurrently destroyed by another thread, there is no guarantee that the vertex [z] is actually reached. Path compression continues as long as the invariant can be maintained: a parent must have a smaller identifier than its child. Once finished, [compress x z] returns [z]. *) let rec compress x z = match Atomic.get x.content with | Root _ -> (* [x] is a root. Stop. *) z | Link link -> let y = link.parent in (* There is an edge of [x] to [y]. *) assert (x.id > y.id); if y.id > z.id then (* Replace the edge of [x] to [y] with an edge from [x] to [z]. This is beneficial (unless there is interference by another thread) and preserves the invariant. *) let () = assert (x.id > z.id) in link.parent <- z; compress y z else (* Stop. *) z (* [findc x] behaves like [find x] and performs path compression. *) (* A simple version of it could be defined as follows: let findc (x : 'a elem) : 'a elem = let z = find x in compress x z We optimize the common case where [x] is a root. *) let[@inline] findc (x : 'a elem) : 'a elem = match Atomic.get x.content with | Root _ -> x | Link { parent = y } -> let z = find y in compress x z (* -------------------------------------------------------------------------- *) (* [get x] returns the value stored at [x]'s representative vertex. *) (* The linearization point is the atomic read whose result is [Root _]. *) let rec get (x : 'a elem) : 'a = let x = findc x in match Atomic.get x.content with | Root root -> (* We have reached the root. Success. *) root.value | Link _ -> (* There has been interference. Continue. *) get x (* -------------------------------------------------------------------------- *) (* [set x] updates the value stored at [x]'s representative vertex. *) (* The linearization point is the successful CAS. *) let rec set (x : 'a elem) (cx' : 'a content) : unit = let x = findc x in let cx = Atomic.get x.content in match cx with | Root _ when Atomic.compare_and_set x.content cx cx' -> (* We have reached and updated the root. Success. *) () | _ -> (* There has been interference. Continue. *) set x cx' let[@inline] set (x : 'a elem) (v : 'a) : unit = let cx' = Root { value = v } in set x cx' (* -------------------------------------------------------------------------- *) (* [update x] updates the value stored at [x]'s representative vertex. *) (* The linearization point is the successful CAS. *) let rec update (x : 'a elem) (f : 'a -> 'a) : unit = let x = findc x in let cx = Atomic.get x.content in match cx with | Root { value = v } when Atomic.compare_and_set x.content cx (Root { value = f v }) -> (* We have reached and updated the root. Success. *) () | _ -> (* There has been interference. Continue. *) update x f (* -------------------------------------------------------------------------- *) (* [union x y] merges the equivalence classes of [x] and [y] by installing a link from one root vertex to the other. *) (* The linearization point is the successful CAS. *) let rec union (x : 'a elem) (y : 'a elem) : 'a option = (* Follow the paths out of [x] and [y] as far as possible. *) let x = findc x and y = findc y in if x == y then (* [x] and [y] are the same vertex. *) None else (* [x] and [y] are distinct vertices. They must have distinct identifiers. *) let () = assert (x.id <> y.id) in if x.id > y.id then (* If [x] is a root, and if we are able to to create an edge from [x] to [y], then declare success. There is no need to ensure that [y] is a root. Otherwise, try again. *) let cx = Atomic.get x.content in match cx with | Root { value = v } when Atomic.compare_and_set x.content cx (Link { parent = y }) -> Some v | _ -> union x y else (* This case is symmetric. *) let cy = Atomic.get y.content in match cy with | Root { value = v } when Atomic.compare_and_set y.content cy (Link { parent = x }) -> Some v | _ -> union x y let[@inline] union (x : 'a elem) (y : 'a elem) : 'a option = if x == y then None else union x y (* -------------------------------------------------------------------------- *) (* [eq x y] determines whether the vertices [x] and [y] belong in the same equivalence class. *) (* We follow Anderson and Woll's algorithm, as presented by Jayanti and Tarjan. *) let rec eq (x : 'a elem) (y : 'a elem) : bool = x == y || match Atomic.get x.content with | Root _ -> (* This case is subtle. [x] and [y] are distinct vertices. At the time where each of these vertices was found by [findc], it was a root. Furthermore, [x] is still a root now, so it has been a root all along. Therefore, at the point in time where [y] was found, both [x] and [y] were roots. Therefore, we can linearize this operation at that point in time, and return [false]. *) false | Link { parent = x } -> (* There has been interference. Continue. *) continue_eq x y and continue_eq (x : 'a elem) (y : 'a elem) : bool = (* Note: find [x] first. Order matters here. *) let x = findc x in let y = findc y in eq x y let[@inline] eq (x : 'a elem) (y : 'a elem) : bool = x == y || continue_eq x y