123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387openUtilsLib.Utils(** Modules with this module type should provide Union-Find algorithms
and the indexed storage data structure. Note that we take the
opportunity of implementing from scratch such algorithms to allow
the [find] function returns not only the index of the
representative and the values it indexes, but also the storage
data structure, so that the [find] algorithm can modify it, in
particular with path compression.
*)moduleLog=UtilsLib.Xlog.Make(structletname="UnionFind"end)moduletypeS=sigtype'at(** The type of the indexed data structure *)(** The type of the values (content) that are indexed. It is either
an actual value of type ['a] or a link to another indexed
value. If a content at an index [i] points to [i], it is meant
that to be a variable.*)type'acontent=Link_toofint|Valueof'aexceptionUnion_Failure(** Exception raised when a the union of to indexed value can not
happen. It should be raised by the [union] function when it
amounts to make the union between to actual values [Value a] and
[Value b] and [a != b]. *)valcreate:'acontentlist->'at(** [create l [d]] returns the corresponding indexed storage data
structure where each value (or link) is indexed by its position
in [l] (starting at 1). [d] is a data that may or may not be
used to fill at init the indexed data structure. *)valextract:?start:int->int->'at->'acontentlist(** [extract ~start:s i t] returns a list of the [i] first elements
of [t] starting from position [s] (default is 1, first
position) *)valfind:int->'at->(int*'acontent)*'at(** [find i h] returns not only the index of the representative and
the values it indexes, but also the storage data structure, so
that the [find] algorithm can modify it, in particular with path
compression. *)(* the content returned by [find] should not be a link. Can we
enforce this using polymorphic variants and/or GADT? *)valunion:int->int->'at->'at(** [union i j h] returns a new indexed storage data structure where
values indexed by [i] and [j] have been unified (ie one of the
two is now linked to the index of the representative of the
other. It fails and raises the {! UnionFind.Union_Failure}
exception if both [i] and [j] representatives index actual
values [Value a] and [Value b] and [a != b]. *)valinstantiate:int->'a->'at->'at(** [instantiate i t h] returns a new indexed storage data structure
where the value indexed by [i] and [t] have been unified. It
fails and raises the {! UnionFind.Union_Failure} exception if
[i]'s representative indexes an actual values [Value a] such
that [a] differs from [t]. *)valcyclic:int->'at->bool*'at(** [cyclic i h] returns a pair [(b,h')] where [b] is [true] if [h]
has a cycle (following the [Link_to] links) containing [i] and
[false] otherwise, and where [h'] contains the same information
as [h] (possibly differently stored, for instance using path
compression while checking [h] cyclicity. *)valcopy:'at->'atvalpp:?size:int->Format.formatter->'at->unitend(** Modules with this module type should provide an indexed (by [int]
indexes) storage data structure for ['a] type values and access
and update functions.
*)moduletypeStore=sigtype'atexceptionStore_Not_found(*
(** [empty i] should return an empty indexed storage data structure
that will allow indexing {e with values from [1] to [i]}. *)
(* val empty : int -> 'a t *)
*)valmake:int->'a->'at(** [make i data] should return an indexed storage data structure
that will allow indexing {e with value [data] from [1] to
[i]}. *)valget:int->'at->'avalset:int->'a->'at->'atvalcopy:'at->'atvallength:'at->int(** [length s] returns the index [n] such that the indexed storage
data structure allows indexing from [1] to [i]. *)end(** This (functor) module implements a {! UnionFind} data structure. The
[S] parameter is used to try different implementations of indexed
data structure, in particular eventually persistent arrays as
described in {{:
http://www.lri.fr/~filliatr/ftp/publis/puf-wml07.ps}"A Persistent
Union-Find Data Structure" (Sylvain Conchon and Jean-Chrisophe
Filliâtre} *)moduleMake(S:Store):S=struct(** The type of the values (content) that are indexed. It is either
an actual value of type ['a] or a link to another indexed
value. If a content at an index [i] points to [i], it is meant
that to be a variable.*)type'acontent=Link_toofint|Valueof'atype'at={rank:intS.t;parents:'acontentS.t}(** The actual type of the data structure. The rank is used to
implement weighted union. See {{:
http://www.risc.jku.at/education/courses/ss2012/unification/slides/02_Syntactic_Unification_Improved_Algorithms.pdf}
Introduction to Unification Theory. Speeding Up (Temur
Kutsia)} *)exceptionUnion_Failureletpp_content?size?rankparentsfmtindex=letpp_content_cellfmtc=matchcwith|Link_toi->Format.fprintffmt"Linked to %d"i|Value_->Format.fprintffmt"Some value"inletpp_sizefmtsize=matchsizewithNone->()|Somes->Format.fprintffmt"/%d"sinletpp_rankfmtindex=matchrankwith|None->()|Somerk->Format.fprintffmt"\t(%d)"(S.getindexrk)inFormat.fprintffmt"%10d%a <---> %a%a"indexpp_sizesizepp_content_cell(S.getindexparents)pp_rankindexletpp_intern?size?rankfmtparents=leti=ref1inlet()=Format.fprintffmt"@[<v>"intrywhiletruedolet()=Format.fprintffmt"@[%a@]@,"(pp_content?size?rankparents)!iini:=!i+1donewithS.Store_Not_found->Format.fprintffmt"@]"letpp?sizefmtstore=pp_intern?size~rank:store.rankfmtstore.parents(* Indexing starts at 1, not at 0 *)(* TODO: Should we check that indexes belong to the range, or that
links to belong the set of indexes? *)(* TODO: specify the properties of the data structure (no cycle,
coherent numbering, [find] always returns a value, etc. *)letcreatecontents=letln=List.lengthcontentsinletres,_=List.fold_left(fun({rank=r;parents=p},k)content->Log.debug(funm->m"Setting the following content at address %d:"k);matchcontentwith|Link_toiasc->Log.debug(funm->m"Link to %d"i);(* rank is unset for contents that are initially a link *)({rank=(tryletrank=S.getirinS.seti(rank+1)(S.setk0r)withS.Store_Not_found->S.seti1r);parents=S.setkcp;},k+1)|Value_asc->Log.debug(funm->m"Some value");({rank=(trylet_=S.getkrinrwithS.Store_Not_found->S.setk0r);parents=S.setkcp;},k+1))(* ({rank=S.empty ln;parents=S.empty ln},1) *)({rank=S.makeln0;parents=S.makeln(Link_to(-1))},1)contentsinlet()=Log.debug(funm->m"After creation, content is:@,@[<v>@[@[%a@]@]"(pp~size:ln)res)inres(** [find_aux i f] returns a pair [(i',v),f'] where [i'] is the
index of the representative of the data indexed by [i]. [i=i']
means that the [i]-th element is linked to itself: it is meant
to be a variable, not an actual value. It also performs path
compression *)letrecfind_auxif=Log.debug(funm->m"Extracting %d"i);Log.debug(funm->m"find_aux work with the following content:@,@[<v> @[%a@]@]"(pp_intern?size:None?rank:None)f);matchS.getifwith|Value_asv->((i,v),f)(* An actual value was reached at index [i]. So [i] is returned
together with [v] and [f] *)|Link_tonextasvwhennext=i->((i,v),f)(* The content indexed by [i] points to [i]. [i] is then the
representative for the variable it denotes. *)|Link_tonext->(* In the other cases, we follow the links to reach the
representative and the content it indexes *)let(representative_index,representative_value),new_f=find_auxnextfin(* Then we update the storage data structure linking the context
indexed by [i] directly to the representative index *)letupdated_f=S.seti(Link_torepresentative_index)new_finLog.debug(funm->m"the \"UnionFind.find\" function indeed returns a Link_to \
itself: %B"(let()=matchrepresentative_valuewith|Link_tovariable->assert(representative_index=variable)|_->()intrue));((representative_index,representative_value),updated_f)|exceptionS.Store_Not_found->let()=Log.debug(funm->m"Could not find %d in the store."i)inraiseS.Store_Not_found(** [find i h] returns a pair [(i',v),f'] where [i'] is the index of
the representative of the data indexed by [i]. [i=i'] means that
the [i]-th element is linked to itself: it is meant to be a
variable, not an actual value. It also performs path
compression. The difference with [find_aux] is that it applyes
to the whole storage data structure (that includes data for
weighted union). *)letfindih=letrep_i,f=find_auxih.parentsin(rep_i,{hwithparents=f})(** [extract ~start:s i t] returns a list of the [i] first elements
of [t] starting from position [s] (default is 1, first
position). It is ensured that the results only contain the
values of representatives (i.e it follows the [Link_to] links
until the value of the representative before returning it). *)letextract?(start=1)icontent=Log.debug(funm->m"Going to extract %d elements starting at %d...@,@[%a@]"istart(pp?size:None)content);letrecextract_auxkres=matchk-startwith|jwhenj>0->let(_,c),_=find_aux(start-1+j)content.parentsinextract_aux(start+j-1)(c::res)|_->resinextract_aux(start+i)[](** [union i j h] returns a new storage data structure [h'] where
[h'] has an equivalent content as [h] plus the unification
between the elements indexed by [i] and [j] and plus, possibly,
some path compression. *)letunionijh=letrep_i,h'=findihinletrep_j,h''=findjh'inmatch(rep_i,rep_j)with(* in case [rep_i] (rexp. [rep_j]) is a [(i,Link_to i')] we should
have [i=i'], else there is a bug *)|(_,v_i),(_,v_j)whenv_i=v_j->h''|(_,(Value_asv_i)),(rep_j_index,Link_to_)->{h''withparents=S.setrep_j_indexv_ih''.parents}|(rep_i_index,Link_to_),(_,(Value_asv_j))->{h''withparents=S.setrep_i_indexv_jh''.parents}|(rep_i_index,Link_to_),(rep_j_index,Link_to_)->letrk_i=S.getrep_i_indexh''.rankinletrk_j=S.getrep_j_indexh''.rankinifrk_i>rk_jthen{h''withparents=S.setrep_j_index(Link_torep_i_index)h''.parents;}elseifrk_i<rk_jthen{h''withparents=S.setrep_i_index(Link_torep_j_index)h''.parents;}else{rank=S.setrep_i_index(rk_i+1)h''.rank;parents=S.setrep_j_index(Link_torep_i_index)h''.parents;}|(_,Value_),(_,Value_)->raiseUnion_Failure(* v_i=v_j is caught by the first case *)(** [find_and_instantiate_aux i t f] returns a new indexed storage
datastructure [f'] where the content at index [i] (and the ones
it points to) has been set to [Value t]. If [i]'s representative
indexes a variable or a value equal to [Value t] then the
instantiation suceeds, otherwise it raises Union_failure. It
also performs path compression. *)letrecfind_and_instantiate_auxitermf=matchS.getifwith|Valuevwhenv=term->f|Value_->raiseUnion_Failure(* An actual value was reached at index [i] and we're in the case
that it differs from [term]. So the union fails *)|Link_tonextwhennext=i->S.seti(Valueterm)f(* The content indexed by [i] points to [i]. [i] is then the
representative for the variable it denotes and can be unified
with [term]. [f] is updated. *)|Link_tonext->(* In the other cases, we follow the links to reach the
representative and the content it indexes *)letnew_f=find_and_instantiate_auxnexttermfin(* Then we update the storage data structure linking the context
indexed by [i] directly to the representative index. We know
it's safe to do it now since unification succeeded. *)letupdated_f=S.seti(Valueterm)new_finupdated_f(** [instantiate i t h] returns a new indexed storage data structure
where the value indexed by [i] and [t] have been unified. It
fails and raises the {! UnionFind.Union_Failure} exception if
[i]'s representative indexes an actual values [Value a] such
that [a] differs from [t]. *)letinstantiateith=letf=find_and_instantiate_auxith.parentsin{hwithparents=f}(* cyclic_aux includes path compression *)letreccyclic_auxifacc=matchS.getifwith|Value_->(false,i,f)|Link_tonextwhennext=i->(false,i,f)|Link_tonext->ifIntSet.memnextaccthen(true,i,f)elseletcyclic,representative_index,new_f=cyclic_auxnextf(IntSet.addnext(IntSet.addiacc))inletupdated_f=S.seti(Link_torepresentative_index)new_fin(cyclic,representative_index,updated_f)(* the cyclic function, calling cyclic_aux, compress paths
(hence also returns the parents) *)letcyclicih=letres,_,f=cyclic_auxih.parentsIntSet.emptyin(res,{hwithparents=f})letcopy{rank=r;parents=p}={rank=S.copyr;parents=S.copyp}endmoduleStoreAsMap=structtype'at='aIntMap.texceptionStore_Not_found(* let empty _ = IntMap.empty *)letrecmake_auxidacc=ifi<=0thenaccelsemake_aux(i-1)d(IntMap.addidacc)letmakend=make_auxndIntMap.emptyletgetkm=tryIntMap.findkmwithNot_found->raiseStore_Not_foundletsetkvm=IntMap.addkvmletcopym=mletlengths=IntMap.cardinalsend