123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128(******************************************************************************)(* *)(* Inferno *)(* *)(* François Pottier, Inria Paris *)(* *)(* Copyright Inria. All rights reserved. This file is distributed under the *)(* terms of the MIT License, as described in the file LICENSE. *)(* *)(******************************************************************************)(** This module implements a simple and efficient union/find algorithm.
See Robert E. Tarjan, ``Efficiency of a Good But Not Linear Set
Union Algorithm'', JACM 22(2), 1975. *)(** The abstraction defined by this module is a set of points,
partitioned into equivalence classes. With each equivalence class,
a piece of information, of abstract type ['a], is associated; we
call it a descriptor.
A point is implemented as a cell, whose (mutable) contents consist
of a single link to either information about the equivalence class,
or another point. Thus, points form a graph, which must be acyclic,
and whose connected components are the equivalence classes. In
every equivalence class, exactly one point has no outgoing edge,
and carries information about the class instead. It is the class's
representative element.
Information about a class consists of an integer weight (the number
of elements in the class) and of the class's descriptor. *)type'apoint='alinkrefand'alink=|Infoof(* weight: *)int*(* descriptor: *)'a|Linkof'apoint(** [fresh desc] creates a fresh point and returns it. It forms an
equivalence class of its own, whose descriptor is [desc]. *)letfreshdesc=ref(Info(1,desc))(** [repr point] returns the representative element of [point]'s
equivalence class. It is found by starting at [point] and following
the links. For efficiency, the function performs path compression
at the same time. *)letrecreprpoint=match!pointwith|Linkpoint'->letpoint''=reprpoint'inifpoint''!=point'then(* [point''] is [point']'s representative element. Because we just
invoked [repr point'], [!point'] must be [Link point'']. We
write this value into [point], performing path compression.
Note that we do not perform memory allocation. *)point:=!point';point''|Info_->point(** [find point] returns the descriptor associated with [point]'s
equivalence class. *)letrecfindpoint=(* By not calling [repr] immediately, we optimize the common cases
where the path starting at [point] has length 0 or 1, at the
expense of the general case. *)match!pointwith|Info(_,desc)->desc|Linkpoint'->match!point'with|Info(_,desc)->desc|Link_->find(reprpoint)(** [union f point1 point2] merges the equivalence classes associated
with [point1] and [point2] into a single class. Then, (and only
then,) it sets the descriptor of this class to the one produced by
applying the function [f] to the descriptors of the two original
classes. It has no effect if [point1] and [point2] are already in
the same equivalence class.
The fact that [point1] and [point2] do not originally belong to the
same class guarantees that we do not create a cycle in the graph.
The weights are used to determine whether [point1] should be made
to point to [point2], or vice-versa. By making the representative
of the smaller class point to that of the larger class, we
guarantee that paths remain of logarithmic length (not accounting
for path compression, which makes them yet smaller). *)letunionfpoint1point2=letpoint1=reprpoint1andpoint2=reprpoint2inifpoint1!=point2thenmatch!point1,!point2with|Info(weight1,desc1),Info(weight2,desc2)->(* Be careful: the function [f] must not be invoked before the
equivalence classes have been merged. *)ifweight1>=weight2thenbeginpoint2:=Linkpoint1;point1:=Info(weight1+weight2,fdesc1desc2)endelsebeginpoint1:=Linkpoint2;point2:=Info(weight1+weight2,fdesc1desc2)end|Link_,_|_,Link_->assertfalse(* [repr] guarantees that [link] matches [Info _]. *)(** [equivalent point1 point2] tells whether [point1] and [point2]
belong to the same equivalence class. *)letequivalentpoint1point2=reprpoint1==reprpoint2(** [is_representative] maps exactly one member of each equivalence class
to [true]. *)letis_representativepoint=match!pointwith|Link_->false|Info_->true