123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767(* This file is free software, part of containers. See file "license" for more details. *)(** {1 Prefix Tree} *)type'asequence=('a->unit)->unittype'aktree=unit->[`Nil|`Nodeof'a*'aktreelist](** {2 Signatures} *)(** {6 A Composite Word}
Words are made of characters, who belong to a total order *)moduletypeWORD=sigtypettypechar_valcompare:char_->char_->intvalto_seq:t->char_sequencevalof_list:char_list->tendmoduletypeS=sigtypechar_typekeytype'atvalempty:'atvalis_empty:_t->boolvaladd:key->'a->'at->'at(** Add a binding to the trie (possibly erasing the previous one) *)valremove:key->'at->'at(** Remove the key, if present *)valfind:key->'at->'aoption(** Find the value associated with the key, if any *)valfind_exn:key->'at->'a(** Same as {!find} but can fail.
@raise Not_found if the key is not present *)vallongest_prefix:key->'at->key(** [longest_prefix k m] finds the longest prefix of [k] that leads to
at least one path in [m] (it does not mean that the prefix is bound to
a value.
Example: if [m] has keys "abc0" and "abcd", then [longest_prefix "abc2" m]
will return "abc"
@since 0.17 *)valupdate:key->('aoption->'aoption)->'at->'at(** Update the binding for the given key. The function is given
[None] if the key is absent, or [Some v] if [key] is bound to [v];
if it returns [None] the key is removed, otherwise it
returns [Some y] and [key] becomes bound to [y] *)valfold:('b->key->'a->'b)->'b->'at->'b(** Fold on key/value bindings. Will use {!WORD.of_list} to rebuild keys. *)valmapi:(key->'a->'b)->'at->'bt(** Map values, giving both key and value. Will use {!WORD.of_list} to rebuild keys.
@since 0.17 *)valmap:('a->'b)->'at->'bt(** Map values, giving only the value.
@since 0.17 *)valiter:(key->'a->unit)->'at->unit(** Same as {!fold}, but for effectful functions *)valfold_values:('b->'a->'b)->'b->'at->'b(** More efficient version of {!fold}, that doesn't keep keys *)valiter_values:('a->unit)->'at->unitvalmerge:('a->'a->'aoption)->'at->'at->'at(** Merge two tries together. The function is used in
case of conflicts, when a key belongs to both tries *)valsize:_t->int(** Number of bindings *)(** {6 Conversions} *)valto_list:'at->(key*'a)listvalof_list:(key*'a)list->'atvalto_seq:'at->(key*'a)sequencevalof_seq:(key*'a)sequence->'atvalto_seq_values:'at->'asequencevalto_tree:'at->[`Charofchar_|`Valof'a|`Switch]ktree(** {6 Ranges} *)valabove:key->'at->(key*'a)sequence(** All bindings whose key is bigger or equal to the given key, in
ascending order *)valbelow:key->'at->(key*'a)sequence(** All bindings whose key is smaller or equal to the given key,
in decreasing order *)(**/**)valcheck_invariants:_t->bool(**/**)end(*$inject
module T = MakeList(CCInt)
module S = String
let l1 = [ [1;2], "12"; [1], "1"; [2;1], "21"; [1;2;3], "123"; [], "[]" ]
let t1 = T.of_list l1
let small_l l = List.fold_left (fun acc (k,v) -> List.length k+acc) 0 l
let s1 = String.of_list ["cat", 1; "catogan", 2; "foo", 3]
*)(*$T
String.of_list ["a", 1; "b", 2] |> String.size = 2
String.of_list ["a", 1; "b", 2; "a", 3] |> String.size = 2
String.of_list ["a", 1; "b", 2] |> String.find_exn "a" = 1
String.of_list ["a", 1; "b", 2] |> String.find_exn "b" = 2
String.of_list ["a", 1; "b", 2] |> String.find "c" = None
s1 |> String.find_exn "cat" = 1
s1 |> String.find_exn "catogan" = 2
s1 |> String.find_exn "foo" = 3
s1 |> String.find "cato" = None
*)moduleMake(W:WORD):Swithtypechar_=W.char_andtypekey=W.t=structtypechar_=W.char_typekey=W.tmoduleM=Map.Make(structtypet=char_letcompare=W.compareend)type'at=|Empty|Consofchar_*'at(* simple case *)|Nodeof'aoption*'atM.t(* invariants:
- for Path(l,t) l is never empty
- for Node (None,map) map always has at least 2 elements
- for Node (Some _,map) map can be anything *)letempty=Emptylet_invariant=function|Node(None,map)whenM.is_emptymap->false|_->trueletreccheck_invariants=function|Empty->true|Cons(_,t)->check_invariantst|Node(None,map)whenM.is_emptymap->false|Node(_,map)->M.for_all(fun_v->check_invariantsv)mapletis_empty=function|Empty->true|_->falselet_idx=x(* fold [f] on [seq] with accumulator [acc], and call [finish]
on the accumulator once [seq] is exhausted *)let_fold_seq_and_thenf~finishaccseq=letacc=refaccinseq(funx->acc:=f!accx);finish!acclet_filter_map_seqfseqk=seq(funx->matchfxwith|None->()|Somey->ky)let_seq_mapfseqk=seq(funx->k(fx))let_seq_append_list_revlseq=letl=reflinseq(funx->l:=x::!l);!llet_seq_append_listlseq=List.rev_append(_seq_append_list_rev[]seq)lletseq_of_mapmapk=M.iter(funkeyv->k(key,v))map(* return common prefix, and disjoint suffixes *)letrec_merge_listsl1l2=matchl1,l2with|[],_|_,[]->[],l1,l2|c1::l1',c2::l2'->ifW.comparec1c2=0thenletpre,rest1,rest2=_merge_listsl1'l2'inc1::pre,rest1,rest2else[],l1,l2(* sub-tree t prefixed with c *)let_consct=Cons(c,t)(* build a Node value *)let_mk_nodevaluemap=matchvaluewith|Some_->Node(value,map)|None->ifM.is_emptymapthenEmptyelseifM.cardinalmap=1thenletc,sub=M.min_bindingmapin_conscsubelseNode(value,map)(* remove key [c] from [t] *)let_removect=matchtwith|Empty->t|Cons(c',_)->ifW.comparecc'=0thenEmptyelset|Node(value,map)->ifM.memcmapthenletmap'=M.removecmapin_mk_nodevaluemap'elsetletupdatekeyft=(* first arg: current subtree and rebuild function; [c]: current char *)letgoto(t,rebuild)c=matchtwith|Empty->empty,funt->rebuild(_consct)|Cons(c',t')->ifW.comparecc'=0thent',(funt->rebuild(_consct))elseletrebuild'new_child=rebuild(ifis_emptynew_childthentelseletmap=M.singletoncnew_childinletmap=M.addc't'mapin_mk_nodeNonemap)inempty,rebuild'|Node(value,map)->trylett'=M.findcmapin(* rebuild: we modify [t], so we put the new version in [map]
if it's not empty, and make the node again *)letrebuild'new_child=rebuild(ifis_emptynew_childthen_mk_nodevalue(M.removecmap)else_mk_nodevalue(M.addcnew_childmap))int',rebuild'withNot_found->letrebuild'new_child=ifis_emptynew_childthenrebuildt(* ignore *)elseletmap'=M.addcnew_childmapinrebuild(_mk_nodevaluemap')inempty,rebuild'inletfinish(t,rebuild)=matchtwith|Empty->rebuild(_mk_node(fNone)M.empty)|Cons(c,t')->rebuild(matchfNonewith|None->t|Some_asv->_mk_nodev(M.singletonct'))|Node(value,map)->letvalue'=fvalueinrebuild(_mk_nodevalue'map)inletword=W.to_seqkeyin_fold_seq_and_thengoto~finish(t,_id)wordletaddkvt=updatek(fun_->Somev)tletremovekt=updatek(fun_->None)t(*$T
T.add [3] "3" t1 |> T.find_exn [3] = "3"
T.add [3] "3" t1 |> T.find_exn [1;2] = "12"
T.remove [1;2] t1 |> T.find [1;2] = None
T.remove [1;2] t1 |> T.find [1] = Some "1"
T.remove [1;2] t1 |> T.find [] = Some "[]"
*)letfind_exnkt=(* at subtree [t], and character [c] *)letgototc=matchtwith|Empty->raiseNot_found|Cons(c',t')->ifW.comparecc'=0thent'elseraiseNot_found|Node(_,map)->M.findcmapandfinisht=matchtwith|Node(Somev,_)->v|_->raiseNot_foundinletword=W.to_seqkin_fold_seq_and_thengoto~finishtwordletfindkt=trySome(find_exnkt)withNot_found->Nonetype'adifflist='alist->'alistlet_difflist_add:'adifflist->'a->'adifflist=funfx->funl'->f(x::l')letlongest_prefixkt=(* at subtree [t], and character [c] *)letgoto(t,prefix)c=matchtwith|Empty->Empty,prefix|Cons(c',t')->ifW.comparecc'=0thent',_difflist_addprefixcelseEmpty,prefix|Node(_,map)->trylett'=M.findcmapint',_difflist_addprefixcwithNot_found->Empty,prefixandfinish(_,prefix)=W.of_list(prefix[])inletword=W.to_seqkin_fold_seq_and_thengoto~finish(t,_id)word(*$= & ~printer:CCFun.id
"ca" (String.longest_prefix "carte" s1)
"" (String.longest_prefix "yolo" s1)
"cat" (String.longest_prefix "cat" s1)
"catogan" (String.longest_prefix "catogan" s1)
*)(*$Q
Q.(pair (list (pair (printable_string_of_size Gen.(0 -- 30)) int)) printable_string) (fun (l,s) -> \
let m = String.of_list l in \
let s' = String.longest_prefix s m in \
CCString.prefix ~pre:s' s)
*)(* fold that also keeps the path from the root, so as to provide the list
of chars that lead to a value. The path is a difference list, ie
a function that prepends a list to some suffix *)letrec_foldfpathtacc=matchtwith|Empty->acc|Cons(c,t')->_foldf(_difflist_addpathc)t'acc|Node(v,map)->letacc=matchvwith|None->acc|Somev->faccpathvinM.fold(funct'acc->_foldf(_difflist_addpathc)t'acc)mapaccletfoldfacct=_fold(funaccpathv->letkey=W.of_list(path[])infacckeyv)_idtacc(*$T
T.fold (fun acc k v -> (k,v) :: acc) [] t1 \
|> List.sort Stdlib.compare = List.sort Stdlib.compare l1
*)letmapift=letrecmap_prefixt=matchtwith|Empty->Empty|Cons(c,t')->Cons(c,map_(_difflist_addprefixc)t')|Node(v,map)->letv'=matchvwith|None->None|Somev->Some(f(W.of_list(prefix[]))v)inletmap'=M.mapi(funct'->letprefix'=_difflist_addprefixcinmap_prefix't')mapinNode(v',map')inmap__idt(*$= & ~printer:Q.Print.(list (pair (list int) string))
(List.map (fun (k, v) -> (k, v ^ "!")) l1 |> List.sort Stdlib.compare) \
(T.mapi (fun k v -> v ^ "!") t1 \
|> T.to_list |> List.sort Stdlib.compare)
*)letmapft=letrecmap_=function|Empty->Empty|Cons(c,t')->Cons(c,map_t')|Node(v,map)->letv'=matchvwith|None->None|Somev->Some(fv)inletmap'=M.mapmap_mapinNode(v',map')inmap_t(*$= & ~printer:Q.Print.(list (pair (list int) string))
(List.map (fun (k, v) -> (k, v ^ "!")) l1 |> List.sort Stdlib.compare) \
(T.map (fun v -> v ^ "!") t1 \
|> T.to_list |> List.sort Stdlib.compare)
*)letiterft=_fold(fun()pathy->f(W.of_list(path[]))y)_idt()let_iter_prefix~prefixft=_fold(fun()pathy->letkey=W.of_list(prefix(path[]))infkeyy)_idt()letrecfold_valuesfacct=matchtwith|Empty->acc|Cons(_,t')->fold_valuesfacct'|Node(v,map)->letacc=matchvwith|None->acc|Somev->faccvinM.fold(fun_ct'acc->fold_valuesfacct')mapaccletiter_valuesft=fold_values(fun()x->fx)()tletrecmergeft1t2=matcht1,t2with|Empty,_->t2|_,Empty->t1|Cons(c1,t1'),Cons(c2,t2')->ifW.comparec1c2=0then_consc1(mergeft1't2')elseletmap=M.addc1t1'M.emptyinletmap=M.addc2t2'mapin_mk_nodeNonemap|Cons(c1,t1'),Node(value,map)->begintry(* collision *)lett2'=M.findc1mapinletnew_t=mergeft1't2'inletmap'=ifis_emptynew_tthenM.removec1mapelseM.addc1new_tmapin_mk_nodevaluemap'withNot_found->(* no collision *)assert(not(is_emptyt1'));Node(value,M.addc1t1'map)end|Node_,Cons_->mergeft2t1(* previous case *)|Node(v1,map1),Node(v2,map2)->letv=matchv1,v2with|None,_->v2|_,None->v1|Somev1,Somev2->fv1v2inletmap'=M.merge(fun_ct1t2->matcht1,t2with|None,None->assertfalse|Somet,None|None,Somet->Somet|Somet1,Somet2->letnew_t=mergeft1t2inifis_emptynew_tthenNoneelseSomenew_t)map1map2in_mk_nodevmap'(*$QR & ~count:30
Q.(let p = list_of_size Gen.(0--100) (pair printable_string small_int) in pair p p)
(fun (l1,l2) ->
let t1 = S.of_list l1 and t2 = S.of_list l2 in
let t = S.merge (fun a _ -> Some a) t1 t2 in
S.to_seq t |> Iter.for_all
(fun (k,v) -> S.find k t1 = Some v || S.find k t2 = Some v) &&
S.to_seq t1 |> Iter.for_all (fun (k,v) -> S.find k t <> None) &&
S.to_seq t2 |> Iter.for_all (fun (k,v) -> S.find k t <> None))
*)letrecsizet=matchtwith|Empty->0|Cons(_,t')->sizet'|Node(v,map)->lets=matchvwithNone->0|Some_->1inM.fold(fun_t'acc->sizet'+acc)maps(*$T
T.size t1 = List.length l1
*)letto_listt=fold(funacckv->(k,v)::acc)[]tletof_listl=List.fold_left(funacc(k,v)->addkvacc)emptylletto_seqtk=iter(funkeyv->k(key,v))tletto_seq_valuestk=iter_valuesktletof_seqseq=_fold_seq_and_then(funacc(k,v)->addkvacc)~finish:_idemptyseqletrecto_treet()=let_tree_nodexl()=`Node(x,l)inmatchtwith|Empty->`Nil|Cons(c,t')->`Node(`Charc,[to_treet'])|Node(v,map)->letx=matchvwith|None->`Switch|Somev->`Valvinletl=M.bindingsmapin`Node(x,List.map(fun(c,t')->_tree_node(`Charc)[to_treet'])l)(** {6 Ranges} *)(* stack of actions for [above] and [below] *)type'aalternative=|Yieldof'a*char_difflist|Exploreof'at*char_difflisttypedirection=|Above|Belowletrecexplore~dirkalt=matchaltwith|Yield(v,prefix)->k(W.of_list(prefix[]),v)|Explore(Empty,_)->()|Explore(Cons(c,t),prefix)->explore~dirk(Explore(t,_difflist_addprefixc))|Explore(Node(o,map),prefix)->(* if above, yield value now *)beginmatcho,dirwith|Somev,Above->k(W.of_list(prefix[]),v)|_->()end;letseq=seq_of_mapmapinletseq=_seq_map(fun(c,t')->Explore(t',_difflist_addprefixc))seqinletl'=matcho,dirwith|_,Above->_seq_append_list[]seq|None,Below->_seq_append_list_rev[]seq|Somev,Below->_seq_append_list_rev[Yield(v,prefix)]seqinList.iter(explore~dirk)l'let_list_eql1l2=tryList.for_all2(funxy->W.comparexy=0)l1l2withInvalid_argument_->falselet_key_to_listkey=List.rev(_seq_append_list_rev[](W.to_seqkey))(* range above (if [above = true]) or below a threshold .
[p c c'] must return [true] if [c'], in the tree, meets some criterion
w.r.t [c] which is a part of the key. *)let_half_range~dir~pkeytk=(* at subtree [cur = Some (t,trail)] or [None], alternatives above
[alternatives], and char [c] in [key]. *)leton_char(cur,alternatives)c=matchcurwith|None->(None,alternatives)|Some(Empty,_)->(None,alternatives)|Some(Cons(c',t'),trail)->ifW.comparecc'=0thenSome(t',_difflist_addtrailc),alternativeselseNone,alternatives|Some(Node(o,map),trail)->(* if [dir=Below], [o]'s key is below [key] and the other
alternatives in [map] *)letalternatives=matcho,dirwith|Somev,Below->Yield(v,trail)::alternatives|_->alternativesinletalternatives=letseq=seq_of_mapmapinletseq=_filter_map_seq(fun(c',t')->ifp~cur:c~other:c'thenSome(Explore(t',_difflist_addtrailc'))elseNone)seqin(* ordering:
- Above: explore alternatives in increasing order
- Below: explore alternatives in decreasing order *)matchdirwith|Above->_seq_append_listalternativesseq|Below->_seq_append_list_revalternativesseqinbegintrylett'=M.findcmapinSome(t',_difflist_addtrailc),alternativeswithNot_found->None,alternativesend(* run through the current path (if any) and alternatives *)andfinish(cur,alternatives)=beginmatchcur,dirwith|Some(t,prefix),Above->(* subtree prefixed by input key, therefore above key *)_iter_prefix~prefix(funkey'v->k(key',v))t|Some(Node(Somev,_),prefix),Below->(* yield the value for key *)assert(_list_eq(prefix[])(_key_to_listkey));k(key,v)|Some_,_|None,_->()end;List.iter(explore~dirk)alternativesinletword=W.to_seqkeyin_fold_seq_and_thenon_char~finish(Some(t,_id),[])wordletabovekeyt=_half_range~dir:Above~p:(fun~cur~other->W.comparecurother<0)keytletbelowkeyt=_half_range~dir:Below~p:(fun~cur~other->W.comparecurother>0)keyt(*$= & ~printer:CCFormat.(to_string (list (pair (list int) string)))
[ [1], "1"; [1;2], "12"; [1;2;3], "123"; [2;1], "21" ] \
(T.above [1] t1 |> Iter.to_list)
[ [1;2], "12"; [1;2;3], "123"; [2;1], "21" ] \
(T.above [1;1] t1 |> Iter.to_list)
[ [1;2], "12"; [1], "1"; [], "[]" ] \
(T.below [1;2] t1 |> Iter.to_list)
[ [1], "1"; [], "[]" ] \
(T.below [1;1] t1 |> Iter.to_list)
*)(* NOTE: Regression test. See #158 *)(*$T
let module TPoly = Make (struct \
type t = (unit -> char) list \
type char_ = char \
let compare = compare \
let to_seq a k = List.iter (fun c -> k (c ())) a \
let of_list l = List.map (fun c -> (fun () -> c)) l \
end) \
in \
let trie = TPoly.of_list [[fun () -> 'a'], 1; [fun () -> 'b'], 2] in \
ignore (TPoly.below [fun () -> 'a'] trie |> Iter.to_list); \
true
*)(*$Q & ~count:30
Q.(list_of_size Gen.(0--100) (pair printable_string small_int)) (fun l -> \
let t = S.of_list l in \
S.check_invariants t)
*)(*$inject
let rec sorted ~rev = function
| [] | [_] -> true
| x :: ((y ::_) as tl) ->
(if rev then x >= y else x <= y) && sorted ~rev tl
let gen_str = Q.small_printable_string
*)(*$Q & ~count:200
Q.(list_of_size Gen.(1 -- 20) (pair gen_str small_int)) \
(fun l -> let t = String.of_list l in \
List.for_all (fun (k,_) -> \
String.above k t |> Iter.for_all (fun (k',v) -> k' >= k)) \
l)
Q.(list_of_size Gen.(1 -- 20) (pair gen_str small_int)) \
(fun l -> let t = String.of_list l in \
List.for_all (fun (k,_) -> \
String.below k t |> Iter.for_all (fun (k',v) -> k' <= k)) \
l)
Q.(list_of_size Gen.(1 -- 20) (pair gen_str small_int)) \
(fun l -> let t = String.of_list l in \
List.for_all (fun (k,_) -> \
String.above k t |> Iter.to_list |> sorted ~rev:false) \
l)
Q.(list_of_size Gen.(1 -- 20) (pair gen_str small_int)) \
(fun l -> let t = String.of_list l in \
List.for_all (fun (k,_) -> \
String.below k t |> Iter.to_list |> sorted ~rev:true) \
l)
*)endmoduletypeORDERED=sigtypetvalcompare:t->t->intendmoduleMakeArray(X:ORDERED)=Make(structtypet=X.tarraytypechar_=X.tletcompare=X.compareletto_seqak=Array.iterkaletof_list=Array.of_listend)moduleMakeList(X:ORDERED)=Make(structtypet=X.tlisttypechar_=X.tletcompare=X.compareletto_seqak=List.iterkaletof_listl=lend)moduleString=Make(structtypet=stringtypechar_=charletcompare=Char.compareletto_seqsk=String.iterksletof_listl=letbuf=Buffer.create(List.lengthl)inList.iter(func->Buffer.add_charbufc)l;Buffer.contentsbufend)