123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634(* This file is free software, part of containers. See file "license" for more details. *)(** {1 Prefix Tree} *)type'aiter=('a->unit)->unittype'aktree=unit->[`Nil|`Nodeof'a*'aktreelist](** {2 Signatures} *)(** {6 A Composite Word}
Words are made of characters, who belong to a total order *)moduletypeWORD=sigtypettypechar_valcompare:char_->char_->intvalto_iter:t->char_itervalof_list:char_list->tendmoduletypeS=sigtypechar_typekeytype'atvalempty:'atvalis_empty:_t->boolvaladd:key->'a->'at->'at(** Add a binding to the trie (possibly erasing the previous one) *)valremove:key->'at->'at(** Remove the key, if present *)valfind:key->'at->'aoption(** Find the value associated with the key, if any *)valfind_exn:key->'at->'a(** Same as {!find} but can fail.
@raise Not_found if the key is not present *)vallongest_prefix:key->'at->key(** [longest_prefix k m] finds the longest prefix of [k] that leads to
at least one path in [m] (it does not mean that the prefix is bound to
a value.
Example: if [m] has keys "abc0" and "abcd", then [longest_prefix "abc2" m]
will return "abc"
@since 0.17 *)valupdate:key->('aoption->'aoption)->'at->'at(** Update the binding for the given key. The function is given
[None] if the key is absent, or [Some v] if [key] is bound to [v];
if it returns [None] the key is removed, otherwise it
returns [Some y] and [key] becomes bound to [y] *)valfold:('b->key->'a->'b)->'b->'at->'b(** Fold on key/value bindings. Will use {!WORD.of_list} to rebuild keys. *)valmapi:(key->'a->'b)->'at->'bt(** Map values, giving both key and value. Will use {!WORD.of_list} to rebuild keys.
@since 0.17 *)valmap:('a->'b)->'at->'bt(** Map values, giving only the value.
@since 0.17 *)valiter:(key->'a->unit)->'at->unit(** Same as {!fold}, but for effectful functions *)valfold_values:('b->'a->'b)->'b->'at->'b(** More efficient version of {!fold}, that doesn't keep keys *)valiter_values:('a->unit)->'at->unitvalmerge:('a->'a->'aoption)->'at->'at->'at(** Merge two tries together. The function is used in
case of conflicts, when a key belongs to both tries *)valsize:_t->int(** Number of bindings *)(** {6 Conversions} *)valto_list:'at->(key*'a)listvalof_list:(key*'a)list->'atvalto_iter:'at->(key*'a)itervalof_iter:(key*'a)iter->'atvalto_iter_values:'at->'aitervalto_tree:'at->[`Charofchar_|`Valof'a|`Switch]ktree(** {6 Ranges} *)valabove:key->'at->(key*'a)iter(** All bindings whose key is bigger or equal to the given key, in
ascending order *)valbelow:key->'at->(key*'a)iter(** All bindings whose key is smaller or equal to the given key,
in decreasing order *)(**/**)valcheck_invariants:_t->bool(**/**)endmoduleMake(W:WORD):Swithtypechar_=W.char_andtypekey=W.t=structtypechar_=W.char_typekey=W.tmoduleM=Map.Make(structtypet=char_letcompare=W.compareend)type'at=|Empty|Consofchar_*'at(* simple case *)|Nodeof'aoption*'atM.t(* invariants:
- for Path(l,t) l is never empty
- for Node (None,map) map always has at least 2 elements
- for Node (Some _,map) map can be anything *)letempty=Emptylet_invariant=function|Node(None,map)whenM.is_emptymap->false|_->trueletreccheck_invariants=function|Empty->true|Cons(_,t)->check_invariantst|Node(None,map)whenM.is_emptymap->false|Node(_,map)->M.for_all(fun_v->check_invariantsv)mapletis_empty=function|Empty->true|_->falselet_idx=x(* fold [f] on [seq] with accumulator [acc], and call [finish]
on the accumulator once [seq] is exhausted *)let_fold_iter_and_thenf~finishaccseq=letacc=refaccinseq(funx->acc:=f!accx);finish!acclet_filter_map_iterfseqk=seq(funx->matchfxwith|None->()|Somey->ky)let_iter_mapfseqk=seq(funx->k(fx))let_iter_append_list_revlseq=letl=reflinseq(funx->l:=x::!l);!llet_iter_append_listlseq=List.rev_append(_iter_append_list_rev[]seq)lletiter_of_mapmapk=M.iter(funkeyv->k(key,v))map(* return common prefix, and disjoint suffixes *)letrec_merge_listsl1l2=matchl1,l2with|[],_|_,[]->[],l1,l2|c1::l1',c2::l2'->ifW.comparec1c2=0then(letpre,rest1,rest2=_merge_listsl1'l2'inc1::pre,rest1,rest2)else[],l1,l2(* sub-tree t prefixed with c *)let_consct=Cons(c,t)(* build a Node value *)let_mk_nodevaluemap=matchvaluewith|Some_->Node(value,map)|None->ifM.is_emptymapthenEmptyelseifM.cardinalmap=1then(letc,sub=M.min_bindingmapin_conscsub)elseNode(value,map)(* remove key [c] from [t] *)let_removect=matchtwith|Empty->t|Cons(c',_)->ifW.comparecc'=0thenEmptyelset|Node(value,map)->ifM.memcmapthen(letmap'=M.removecmapin_mk_nodevaluemap')elsetletupdatekeyft=(* first arg: current subtree and rebuild function; [c]: current char *)letgoto(t,rebuild)c=matchtwith|Empty->empty,funt->rebuild(_consct)|Cons(c',t')->ifW.comparecc'=0thent',funt->rebuild(_consct)else(letrebuild'new_child=rebuild(ifis_emptynew_childthentelse(letmap=M.singletoncnew_childinletmap=M.addc't'mapin_mk_nodeNonemap))inempty,rebuild')|Node(value,map)->(trylett'=M.findcmapin(* rebuild: we modify [t], so we put the new version in [map]
if it's not empty, and make the node again *)letrebuild'new_child=rebuild(ifis_emptynew_childthen_mk_nodevalue(M.removecmap)else_mk_nodevalue(M.addcnew_childmap))int',rebuild'withNot_found->letrebuild'new_child=ifis_emptynew_childthenrebuildt(* ignore *)else(letmap'=M.addcnew_childmapinrebuild(_mk_nodevaluemap'))inempty,rebuild')inletfinish(t,rebuild)=matchtwith|Empty->rebuild(_mk_node(fNone)M.empty)|Cons(c,t')->rebuild(matchfNonewith|None->t|Some_asv->_mk_nodev(M.singletonct'))|Node(value,map)->letvalue'=fvalueinrebuild(_mk_nodevalue'map)inletword=W.to_iterkeyin_fold_iter_and_thengoto~finish(t,_id)wordletaddkvt=updatek(fun_->Somev)tletremovekt=updatek(fun_->None)tletfind_exnkt=(* at subtree [t], and character [c] *)letgototc=matchtwith|Empty->raiseNot_found|Cons(c',t')->ifW.comparecc'=0thent'elseraiseNot_found|Node(_,map)->M.findcmapandfinisht=matchtwith|Node(Somev,_)->v|_->raiseNot_foundinletword=W.to_iterkin_fold_iter_and_thengoto~finishtwordletfindkt=trySome(find_exnkt)withNot_found->Nonetype'adifflist='alist->'alistlet_difflist_add:'adifflist->'a->'adifflist=funfxl'->f(x::l')letlongest_prefixkt=(* at subtree [t], and character [c] *)letgoto(t,prefix)c=matchtwith|Empty->Empty,prefix|Cons(c',t')->ifW.comparecc'=0thent',_difflist_addprefixcelseEmpty,prefix|Node(_,map)->(trylett'=M.findcmapint',_difflist_addprefixcwithNot_found->Empty,prefix)andfinish(_,prefix)=W.of_list(prefix[])inletword=W.to_iterkin_fold_iter_and_thengoto~finish(t,_id)word(* fold that also keeps the path from the root, so as to provide the list
of chars that lead to a value. The path is a difference list, ie
a function that prepends a list to some suffix *)letrec_foldfpathtacc=matchtwith|Empty->acc|Cons(c,t')->_foldf(_difflist_addpathc)t'acc|Node(v,map)->letacc=matchvwith|None->acc|Somev->faccpathvinM.fold(funct'acc->_foldf(_difflist_addpathc)t'acc)mapaccletfoldfacct=_fold(funaccpathv->letkey=W.of_list(path[])infacckeyv)_idtaccletmapift=letrecmap_prefixt=matchtwith|Empty->Empty|Cons(c,t')->Cons(c,map_(_difflist_addprefixc)t')|Node(v,map)->letv'=matchvwith|None->None|Somev->Some(f(W.of_list(prefix[]))v)inletmap'=M.mapi(funct'->letprefix'=_difflist_addprefixcinmap_prefix't')mapinNode(v',map')inmap__idtletmapft=letrecmap_=function|Empty->Empty|Cons(c,t')->Cons(c,map_t')|Node(v,map)->letv'=matchvwith|None->None|Somev->Some(fv)inletmap'=M.mapmap_mapinNode(v',map')inmap_tletiterft=_fold(fun()pathy->f(W.of_list(path[]))y)_idt()let_iter_prefix~prefixft=_fold(fun()pathy->letkey=W.of_list(prefix(path[]))infkeyy)_idt()letrecfold_valuesfacct=matchtwith|Empty->acc|Cons(_,t')->fold_valuesfacct'|Node(v,map)->letacc=matchvwith|None->acc|Somev->faccvinM.fold(fun_ct'acc->fold_valuesfacct')mapaccletiter_valuesft=fold_values(fun()x->fx)()tletrecmergeft1t2=matcht1,t2with|Empty,_->t2|_,Empty->t1|Cons(c1,t1'),Cons(c2,t2')->ifW.comparec1c2=0then_consc1(mergeft1't2')else(letmap=M.addc1t1'M.emptyinletmap=M.addc2t2'mapin_mk_nodeNonemap)|Cons(c1,t1'),Node(value,map)->(try(* collision *)lett2'=M.findc1mapinletnew_t=mergeft1't2'inletmap'=ifis_emptynew_tthenM.removec1mapelseM.addc1new_tmapin_mk_nodevaluemap'withNot_found->(* no collision *)assert(not(is_emptyt1'));Node(value,M.addc1t1'map))|Node_,Cons_->mergeft2t1(* previous case *)|Node(v1,map1),Node(v2,map2)->letv=matchv1,v2with|None,_->v2|_,None->v1|Somev1,Somev2->fv1v2inletmap'=M.merge(fun_ct1t2->matcht1,t2with|None,None->assertfalse|Somet,None|None,Somet->Somet|Somet1,Somet2->letnew_t=mergeft1t2inifis_emptynew_tthenNoneelseSomenew_t)map1map2in_mk_nodevmap'letrecsizet=matchtwith|Empty->0|Cons(_,t')->sizet'|Node(v,map)->lets=matchvwith|None->0|Some_->1inM.fold(fun_t'acc->sizet'+acc)mapsletto_listt=fold(funacckv->(k,v)::acc)[]tletof_listl=List.fold_left(funacc(k,v)->addkvacc)emptylletto_itertk=iter(funkeyv->k(key,v))tletto_iter_valuestk=iter_valuesktletof_iterseq=_fold_iter_and_then(funacc(k,v)->addkvacc)~finish:_idemptyseqletrecto_treet()=let_tree_nodexl()=`Node(x,l)inmatchtwith|Empty->`Nil|Cons(c,t')->`Node(`Charc,[to_treet'])|Node(v,map)->letx=matchvwith|None->`Switch|Somev->`Valvinletl=M.bindingsmapin`Node(x,List.map(fun(c,t')->_tree_node(`Charc)[to_treet'])l)(* stack of actions for [above] and [below] *)type'aalternative=|Yieldof'a*char_difflist|Exploreof'at*char_difflisttypedirection=|Above|Belowletrecexplore~dirkalt=matchaltwith|Yield(v,prefix)->k(W.of_list(prefix[]),v)|Explore(Empty,_)->()|Explore(Cons(c,t),prefix)->explore~dirk(Explore(t,_difflist_addprefixc))|Explore(Node(o,map),prefix)->(* if above, yield value now *)(matcho,dirwith|Somev,Above->k(W.of_list(prefix[]),v)|_->());letseq=iter_of_mapmapinletseq=_iter_map(fun(c,t')->Explore(t',_difflist_addprefixc))seqinletl'=matcho,dirwith|_,Above->_iter_append_list[]seq|None,Below->_iter_append_list_rev[]seq|Somev,Below->_iter_append_list_rev[Yield(v,prefix)]seqinList.iter(explore~dirk)l'let_list_eql1l2=tryList.for_all2(funxy->W.comparexy=0)l1l2withInvalid_argument_->falselet_key_to_listkey=List.rev(_iter_append_list_rev[](W.to_iterkey))(* range above (if [above = true]) or below a threshold .
[p c c'] must return [true] if [c'], in the tree, meets some criterion
w.r.t [c] which is a part of the key. *)let_half_range~dir~pkeytk=(* at subtree [cur = Some (t,trail)] or [None], alternatives above
[alternatives], and char [c] in [key]. *)leton_char(cur,alternatives)c=matchcurwith|None->None,alternatives|Some(Empty,_)->None,alternatives|Some(Cons(c',t'),trail)->ifW.comparecc'=0thenSome(t',_difflist_addtrailc),alternativeselseNone,alternatives|Some(Node(o,map),trail)->(* if [dir=Below], [o]'s key is below [key] and the other
alternatives in [map] *)letalternatives=matcho,dirwith|Somev,Below->Yield(v,trail)::alternatives|_->alternativesinletalternatives=letseq=iter_of_mapmapinletseq=_filter_map_iter(fun(c',t')->ifp~cur:c~other:c'thenSome(Explore(t',_difflist_addtrailc'))elseNone)seqin(* ordering:
- Above: explore alternatives in increasing order
- Below: explore alternatives in decreasing order *)matchdirwith|Above->_iter_append_listalternativesseq|Below->_iter_append_list_revalternativesseqin(trylett'=M.findcmapinSome(t',_difflist_addtrailc),alternativeswithNot_found->None,alternatives)(* run through the current path (if any) and alternatives *)andfinish(cur,alternatives)=(matchcur,dirwith|Some(t,prefix),Above->(* subtree prefixed by input key, therefore above key *)_iter_prefix~prefix(funkey'v->k(key',v))t|Some(Node(Somev,_),prefix),Below->(* yield the value for key *)assert(_list_eq(prefix[])(_key_to_listkey));k(key,v)|Some_,_|None,_->());List.iter(explore~dirk)alternativesinletword=W.to_iterkeyin_fold_iter_and_thenon_char~finish(Some(t,_id),[])wordletabovekeyt=_half_range~dir:Above~p:(fun~cur~other->W.comparecurother<0)keytletbelowkeyt=_half_range~dir:Below~p:(fun~cur~other->W.comparecurother>0)keytendmoduletypeORDERED=sigtypetvalcompare:t->t->intendmoduleMakeArray(X:ORDERED)=Make(structtypet=X.tarraytypechar_=X.tletcompare=X.compareletto_iterak=Array.iterkaletof_list=Array.of_listend)moduleMakeList(X:ORDERED)=Make(structtypet=X.tlisttypechar_=X.tletcompare=X.compareletto_iterak=List.iterkaletof_listl=lend)moduleString=Make(structtypet=stringtypechar_=charletcompare=Char.compareletto_itersk=String.iterksletof_listl=letbuf=Buffer.create(List.lengthl)inList.iter(func->Buffer.add_charbufc)l;Buffer.contentsbufend)