123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364openUtilsLib.UtilsmoduleLog=UtilsLib.Xlog.Make(structletname="VarUnionFind"end)(** Modules with this module type should provide Union-Find algorithms
and the indexed storage data structure. Note that we take the
opportunity of implementing from scratch such algorithms to allow
the [find] function returns not only the index of the
representative and the values it indexes, but also the storage
data structure, so that the [find] algorithm can modify it, in
particular with path compression.
*)moduleUF(Value:sigtypettypevaluevalunfold:value->t->(int*valuelist)optionvalpp:Format.formatter->value->unitend)=structmoduleStore=structtype'at='aIntMap.texceptionStore_Not_foundletempty_=IntMap.emptyletgetkm=tryIntMap.findkmwithNot_found->raiseStore_Not_foundletsetkvm=IntMap.addkvmletcopym=mletiter=IntMap.iter[@@warning"-32"]end(** The type of the values (content) that are indexed. It is either
an actual value of type ['a] or a link to another indexed
value. If a content at an index [i] points to [i], it is meant
that to be a variable.*)typecontent=|Link_toofint|ValueofValue.value|Constrof(int*intlist)letrecpp_content_cellfmtc=matchcwith|Link_toi->Format.fprintffmt"Linked to %d"i|Valuev->Format.fprintffmt"Value %a"Value.ppv|Constr(i,lst)->Format.fprintffmt"Contructeur %d(%a)"i(pp_list~sep:","(funfmtj->pp_content_cellfmt(Link_toj)))lsttypet={rank:intStore.t;parents:contentStore.t;limit:int}(** The actual type of the data structure. The rank is used to
implement weighted union. See {{:
http://www.risc.jku.at/education/courses/ss2012/unification/slides/02_Syntactic_Unification_Improved_Algorithms.pdf}
Introduction to Unification Theory. Speeding Up (Temur
Kutsia)} *)letempty={rank=Store.empty();parents=Store.empty();limit=0}exceptionUnion_Failureletpp_content?rankparentsfmtindex=letpp_rankfmtindex=matchrankwith|None->()|Somerk->Format.fprintffmt"\t(%d)"(Store.getindexrk)inFormat.fprintffmt"%10d <---> %a%a"indexpp_content_cell(Store.getindexparents)pp_rankindexletpp_intern?rankfmtparents=leti=ref1inlet()=Format.fprintffmt"@[<v>"intrywhiletruedolet()=Format.fprintffmt"@[%a@]@,"(pp_content?rankparents)!iini:=!i+1donewithStore.Store_Not_found->Format.fprintffmt"@]"letppfmtstore=pp_intern~rank:store.rankfmtstore.parentsletgenerate_new_var{rank;parents;limit}=leti=limit+1in(i,{rank=Store.seti0rank;parents=Store.seti(Link_toi)parents;limit=i;})letgenerate_new_constr{rank;parents;limit}c=leti=limit+1in(i,{rank=Store.seti0rank;parents=Store.seti(Constrc)parents;limit=i;})letrank_incrementih={hwithrank=Store.seti(1+tryStore.getih.rankwithStore.Store_Not_found->0)h.rank;}(** [find_and_instantiate_aux i t f] returns a new indexed storage
datastructure [f'] where the content at index [i] (and the ones
it points to) has been set to [Value t]. If [i]'s representative
indexes a variable or a value equal to [Value t] then the
instantiation suceeds, otherwise it raises Union_failure. It
also performs path compression. *)letrecfind_and_instantiate_auxitermtablef=matchStore.getif.parentswith|Valuevwhenv=term->f|Value_->raiseUnion_Failure(* An actual value was reached at index [i] and we're in the case
that it differs from [term]. So the union fails *)|Link_tonextwhennext=i->((* The content indexed by [i] points to [i]. [i] is then the
representative for the variable it denotes and can be unified
with [term]. [f] is updated. *)matchValue.unfoldtermtablewith|None->{fwithparents=Store.seti(Valueterm)f.parents}|Some(c,args)->leti_args,new_content=List.fold_left(fun(acc,cont)arg->letvar,new_cont=generate_new_varcontin(var::acc,find_and_instantiate_auxvarargtable(rank_incrementvarnew_cont)))([],f)argsin{new_contentwithparents=Store.seti(Constr(c,List.revi_args))new_content.parents;})|Link_tonext->(* In the other cases, we follow the links to reach the
representative and the content it indexes *)letnew_f=find_and_instantiate_auxnexttermtablefin(* Then we update the storage data structure linking the context
indexed by [i] directly to the representative index. We know
it's safe to do it now since unification succeeded. *)letupdated_parents=Store.seti(Valueterm)new_f.parentsin{fwithparents=updated_parents}|Constr(c,i_args)->(matchValue.unfoldtermtablewith|None->raiseUnion_Failure|Some(c',args)whenc=c'->(tryList.fold_left2(funcontvararg->find_and_instantiate_auxvarargtablecont)fi_argsargswithInvalid_argument_->raiseUnion_Failure)|Some(_c',_)->raiseUnion_Failure)(** [instantiate i t h] returns a new indexed storage data structure
where the value indexed by [i] and [t] have been unified. It
fails and raises the {! UnionFind.Union_Failure} exception if
[i]'s representative indexes an actual values [Value a] such
that [a] differs from [t]. *)letinstantiateittableh=find_and_instantiate_auxittableh(** [find_aux i f] returns a pair [(i',v),f'] where [i'] is the
index of the representative of the data indexed by [i]. [i=i']
means that the [i]-th element is linked to itself: it is meant
to be a variable, not an actual value. It also performs path
compression *)letrecfind_auxif=matchStore.getifwith|Value_asv->((i,v),f)(* An actual value was reached at index [i]. So [i] is returned
together with [v] and [f] *)|Constr_asv->((i,v),f)(* An almost actual value was reached at index [i]. So [i] is returned
together with [v] and [f] *)|Link_tonextasvwhennext=i->((i,v),f)(* The content indexed by [i] points to [i]. [i] is then the
representative for the variable it denotes. *)|Link_tonext->(* In the other cases, we follow the links to reach the
representative and the content it indexes *)let(representative_index,representative_value),new_f=find_auxnextfin(* Then we update the storage data structure linking the context
indexed by [i] directly to the representative index *)letupdated_f=Store.seti(Link_torepresentative_index)new_finLog.debug(funm->m"the \"UnionFinf.find\" function indeed returns a Link_to \
itself: %B"(let()=matchrepresentative_valuewith|Link_tovariable->assert(representative_index=variable)|_->()intrue));((representative_index,representative_value),updated_f)(** [find i h] returns a pair [(i',v),f'] where [i'] is the index of
the representative of the data indexed by [i]. [i=i'] means that
the [i]-th element is linked to itself: it is meant to be a
variable, not an actual value. It also performs path
compression. The difference with [find_aux] is that it applyes
to the whole storage data structure (that includes data for
weighted union). *)letfindih=letrep_i,f=find_auxih.parentsin(rep_i,{hwithparents=f})(** [extract ~start:s i t] returns a list of the [i] first elements
of [t] starting from position [s] (default is 1, first
position). It is ensured that the results only contain the
values of representatives (i.e it follows the [Link_to] links
until the value of the representative before returning it). *)letextract?(start=1)i{parents=p;_}=Log.debug(funm->m"Going to extract %d elements starting at %d..."istart);letrecextract_auxkres=matchk-startwith|jwhenj>0->let(_,c),_=find_aux(start-1+j)pinextract_aux(start+j-1)(c::res)|_->resinextract_aux(start+i)[](** [union i j h] returns a new storage data structure [h'] where
[h'] has an equivalent content as [h] plus the unification
between the elements indexed by [i] and [j] and plus, possibly,
some path compression. *)letrecunionijh=letrep_i,h'=findihinletrep_j,h''=findjh'inmatch(rep_i,rep_j)with(* in case [rep_i] (rexp. [rep_j]) is a [(i,Link_to i')] we should
have [i=i'], else there is a bug *)|(_,v_i),(_,v_j)whenv_i=v_j->h''|(_rep_i_index,(Value_asv_i)),(rep_j_index,Link_to_)->{h''withparents=Store.setrep_j_indexv_ih''.parents}|(rep_i_index,Link_to_),(_rep_j_index,(Value_asv_j))->{h''withparents=Store.setrep_i_indexv_jh''.parents}|(rep_i_index,Constr_),(rep_j_index,Link_to_)->{h''withparents=Store.setrep_j_index(Link_torep_i_index)h''.parents;}|(rep_i_index,Link_to_),(rep_j_index,Constr_)->{h''withparents=Store.setrep_i_index(Link_torep_j_index)h''.parents;}|(rep_i_index,Constr(c_i,args_i)),(rep_j_index,Constr(c_j,args_j))whenc_i=c_j->leth'''=union_listargs_iargs_jh''inletrk_i=Store.getrep_i_indexh'''.rankinletrk_j=Store.getrep_j_indexh'''.rankinifrk_i>rk_jthen{h'''withparents=Store.setrep_i_index(Constr(c_i,List.revargs_i))(Store.setrep_j_index(Link_torep_i_index)h'''.parents);}elseifrk_i<rk_jthen{h'''withparents=Store.setrep_j_index(Constr(c_i,List.revargs_j))(Store.setrep_i_index(Link_torep_j_index)h'''.parents);}else{h'''withparents=Store.setrep_i_index(Constr(c_i,List.revargs_i))(Store.setrep_j_index(Link_torep_i_index)h'''.parents);rank=Store.setrep_i_index(rk_i+1)h'''.rank;}|(rep_i_index,Link_to_i'),(rep_j_index,Link_to_j')->letrk_i=Store.getrep_i_indexh''.rankinletrk_j=Store.getrep_j_indexh''.rankinifrk_i>rk_jthen{h''withparents=Store.setrep_j_index(Link_torep_i_index)h''.parents;}elseifrk_i<rk_jthen{h''withparents=Store.setrep_i_index(Link_torep_j_index)h''.parents;}else{h''withparents=Store.setrep_j_index(Link_torep_i_index)h''.parents;rank=Store.setrep_i_index(rk_i+1)h''.rank;}|(_,Value_v_i),(_,Value_v_j)->(* v_i=v_j is caught by the first case *)raiseUnion_Failure|(_,Value_),(_,Constr_)->raiseUnion_Failure|(_,Constr_),(_,Value_)->raiseUnion_Failure|(_,Constr_),(_,Constr_)->(* Constr (c,_), Constr (c,_) is caught by the 6th case *)raiseUnion_Failureandunion_listargs_iargs_jh=match(args_i,args_j)with|[],[]->h|i::tl_i,j::tl_j->union_listtl_itl_j(unionijh)|_,_->raiseUnion_Failure(* cyclic_aux includes path compression *)letreccyclic_auxifacc=matchStore.getifwith|Value_v->(false,i,f)|Link_tonextwhennext=i->(false,i,f)|Link_tonext->ifIntSet.memnextaccthen(true,i,f)elseletcyclic,representative_index,new_f=cyclic_auxnextf(IntSet.addnext(IntSet.addiacc))inletupdated_f=Store.seti(Link_torepresentative_index)new_fin(cyclic,representative_index,updated_f)|Constr(_c,args)->letnew_acc=IntSet.addiaccinList.fold_left(fun(c,l_i,l_f)arg->Log.debug(funm->m"Preparing to check cyclicity from %d"arg);ifIntSet.memargnew_accthen(true,l_i,l_f)elseletis_c,_,new_f=cyclic_auxargl_fnew_accin(is_c||c,l_i,new_f))(false,i,f)args(* the cyclic function, calling cyclic_aux, compress paths
(hence also returns the parents) *)letcyclicih=Log.debug(funm->m"Checking cyclicity from %d of:@,@[<v> @[%a@]@]"ipph);(* log_content Logs.Debug h;
Log.debug (fun m -> m "Done."); *)letres,_,f=cyclic_auxih.parentsIntSet.emptyin(res,{hwithparents=f})letcopy{rank=r;parents=p;limit}={rank=Store.copyr;parents=Store.copyp;limit}end