123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663(* This file is free software, part of containers. See file "license" for more details. *)(** {1 Persistent hash-table on top of OCaml's hashtables} *)type'asequence=('a->unit)->unittype'aprinter=Format.formatter->'a->unittype'aequal='a->'a->boolmoduletypeHashedType=sigtypetvalequal:t->t->boolvalhash:t->intend(** {2 Signature of such a hashtable} *)moduletypeS=sigtypekeytype'atvalempty:unit->'at(** Empty table. The table will be allocated at the first binding *)valcreate:int->'at(** Create a new hashtable, with the given initial capacity *)valis_empty:'at->bool(** Is the table empty? *)valfind:'at->key->'a(** Find the value for this key, or fails
@raise Not_found if the key is not present in the table *)valget_exn:key->'at->'a(** Synonym to {!find} with flipped arguments *)valget:key->'at->'aoption(** Safe version of !{get_exn} *)valmem:'at->key->bool(** Is the key bound? *)vallength:_t->int(** Number of bindings *)valadd:'at->key->'a->'at(** Add the binding to the table, returning a new table. The old binding
for this key, if it exists, is shadowed and will be restored upon
[remove tbl k].
@since 0.14 *)valreplace:'at->key->'a->'at(** Add the binding to the table, returning a new table. This erases
the current binding for [key], if any. *)valupdate:'at->key->('aoption->'aoption)->'at(** [update tbl key f] calls [f None] if [key] doesn't belong in [tbl],
[f (Some v)] if [key -> v] otherwise; If [f] returns [None] then
[key] is removed, else it returns [Some v'] and [key -> v'] is added. *)valremove:'at->key->'at(** Remove the key *)valcopy:'at->'at(** Fresh copy of the table; the underlying structure is not shared
anymore, so using both tables alternatively will be efficient *)valmerge:f:(key->[`Leftof'a|`Rightof'b|`Bothof'a*'b]->'coption)->'at->'bt->'ct(** Merge two tables together into a new table. The function's argument
correspond to values associated with the key (if present); if the
function returns [None] the key will not appear in the result. *)valiter:'at->(key->'a->unit)->unit(** Iterate over bindings *)valfold:('b->key->'a->'b)->'b->'at->'b(** Fold over bindings *)valmap:(key->'a->'b)->'at->'bt(** Map all values *)valfilter:(key->'a->bool)->'at->'atvalfilter_map:(key->'a->'boption)->'at->'btvalfor_all:(key->'a->bool)->'at->boolvalexists:(key->'a->bool)->'at->bool(** {3 Conversions} *)valof_seq:(key*'a)sequence->'at(** Add (replace) bindings from the sequence to the table *)valof_list:(key*'a)list->'atvaladd_seq:'at->(key*'a)sequence->'atvaladd_list:'at->(key*'a)list->'atvalto_seq:'at->(key*'a)sequence(** Iter of the bindings of the table *)valto_list:'at->(key*'a)list(** {3 Misc} *)valequal:'aequal->'atequalvalpp:?sep:string->?arrow:string->keyprinter->'aprinter->'atprintervalstats:_t->Hashtbl.statistics(** Statistics on the internal table.
@since 0.14 *)end(*$inject
module H = Make(CCInt)
let my_list =
[ 1, "a";
2, "b";
3, "c";
4, "d";
]
let my_seq = Iter.of_list my_list
let _list_uniq = CCList.sort_uniq
~cmp:(fun a b -> Stdlib.compare (fst a) (fst b))
let _list_int_int = Q.(
map_same_type _list_uniq
(list_of_size Gen.(0 -- 40) (pair small_int small_int))
)
*)(** {2 Implementation} *)moduleMake(H:HashedType):Swithtypekey=H.t=structtypekey=H.t(* main hashtable *)type'at={mutablearr:'ap_array;(* invariant: length is a power of 2 *)length:int;}(* piece of a persistent array *)and'ap_array=|Arrof'abucketarray|Setofint*'abucket*'at(* bucket of the hashtbl *)and'abucket=|Nil|Consofkey*'a*'abucket(* first power of two that is bigger than [than], starting from [n] *)letrecpower_two_larger~thann=ifn>=thanthennelsepower_two_larger~than(2*n)letcreatei=leti=power_two_larger~than:i16in{length=0;arr=Arr(Array.makeiNil)}letempty()=create16letrecreroot_rec_tk=matcht.arrwith|Arra->ka|Set(i,v,t')->reroot_rec_t'(funa->letv'=a.(i)ina.(i)<-v;t.arr<-Arra;t'.arr<-Set(i,v',t);ka)(* obtain the array *)letreroot_t=matcht.arrwith|Arra->a|_->reroot_rec_t(funx->x)letis_emptyt=t.length=0letlengtht=t.length(* find index of [h] in [a] *)letfind_idx_a~h=(* bitmask 00001111 if length(a) = 10000 *)hland(Array.lengtha-1)letrecfind_rec_kl=matchlwith|Nil->raiseNot_found|Cons(k',v',l')->ifH.equalkk'thenv'elsefind_rec_kl'letfindtk=leta=reroot_tin(* unroll like crazy *)matcha.(find_idx_~h:(H.hashk)a)with|Nil->raiseNot_found|Cons(k1,v1,l1)->ifH.equalkk1thenv1elsematchl1with|Nil->raiseNot_found|Cons(k2,v2,l2)->ifH.equalkk2thenv2elsematchl2with|Nil->raiseNot_found|Cons(k3,v3,l3)->ifH.equalkk3thenv3elsematchl3with|Nil->raiseNot_found|Cons(k4,v4,l4)->ifH.equalkk4thenv4elsefind_rec_kl4(*$R
let h = H.of_seq my_seq in
OUnit.assert_equal "a" (H.find h 1);
OUnit.assert_raises Not_found (fun () -> H.find h 5);
let h' = H.replace h 5 "e" in
OUnit.assert_equal "a" (H.find h' 1);
OUnit.assert_equal "e" (H.find h' 5);
OUnit.assert_equal "a" (H.find h 1);
OUnit.assert_raises Not_found (fun () -> H.find h 5);
*)(*$R
let n = 10000 in
let seq = Iter.map (fun i -> i, string_of_int i) Iter.(0--n) in
let h = H.of_seq seq in
Iter.iter
(fun (k,v) ->
OUnit.assert_equal ~printer:(fun x -> x) v (H.find h k))
seq;
OUnit.assert_raises Not_found (fun () -> H.find h (n+1));
*)(*$QR
_list_int_int
(fun l ->
let h = H.of_list l in
List.for_all
(fun (k,v) ->
try
H.find h k = v
with Not_found -> false)
l
)
*)letget_exnkt=findtkletgetkt=trySome(findtk)withNot_found->Noneletmemtk=tryignore(findtk);truewithNot_found->false(*$R
let h = H.of_seq
Iter.(map (fun i -> i, string_of_int i)
(0 -- 200)) in
OUnit.assert_equal 201 (H.length h);
*)(*$QR
_list_int_int (fun l ->
let h = H.of_list l in
H.length h = List.length l
)
*)letrecbuck_rev_iter_~fl=matchlwith|Nil->()|Cons(k,v,l')->buck_rev_iter_~fl';fkv(* resize [a] so it has capacity [new_size], and insert [k,v] in it *)letresize_kvhanew_size=assert(new_size>Array.lengtha);leta'=Array.makenew_sizeNilin(* preserve order of elements by iterating on each bucket in rev order *)Array.iter(buck_rev_iter_~f:(funkv->leti=find_idx_~h:(H.hashk)a'ina'.(i)<-Cons(k,v,a'.(i))))a;leti=find_idx_~ha'ina'.(i)<-Cons(k,v,a'.(i));a'(* insert [k,v] in [l] and returns new list and boolean flag indicating
whether it's a new element *)letrecreplace_rec_kvl=matchlwith|Nil->Cons(k,v,Nil),true|Cons(k',v',l')->ifH.equalkk'thenCons(k,v,l'),falseelseletl',is_new=replace_rec_kvl'inCons(k',v',l'),is_newletreplacetkv=leta=reroot_tinleth=H.hashkinleti=find_idx_~hainmatcha.(i)with|Nil->ift.length>(Array.lengtha)lsl1then((* resize *)letnew_size=min(2*(Array.lengtha))Sys.max_array_lengthinleta=resize_kvhanew_sizein{length=t.length+1;arr=Arra})else(a.(i)<-Cons(k,v,Nil);lett'={length=t.length+1;arr=Arra}int.arr<-Set(i,Nil,t');t')|Cons_asl->letl',is_new=replace_rec_kvlinifis_new&&t.length>(Array.lengtha)lsl1then((* resize and insert [k,v] (again, it's new anyway) *)letnew_size=min(2*(Array.lengtha))Sys.max_array_lengthinleta=resize_kvhanew_sizein{length=t.length+1;arr=Arra})else((* no resize *)a.(i)<-l';lett'={length=ifis_newthent.length+1elset.length;arr=Arra;}int.arr<-Set(i,l,t');t')letaddtkv=leta=reroot_tinleth=H.hashkinleti=find_idx_~hainift.length>(Array.lengtha)lsl1then((* resize *)letnew_size=min(2*(Array.lengtha))Sys.max_array_lengthinleta=resize_kvhanew_sizein{length=t.length+1;arr=Arra})else((* prepend *)letold=a.(i)ina.(i)<-Cons(k,v,old);lett'={length=t.length+1;arr=Arra}int.arr<-Set(i,old,t');t')(*$R
let h = H.of_seq my_seq in
OUnit.assert_equal "a" (H.find h 1);
OUnit.assert_raises Not_found (fun () -> H.find h 5);
let h1 = H.add h 5 "e" in
OUnit.assert_equal "a" (H.find h1 1);
OUnit.assert_equal "e" (H.find h1 5);
OUnit.assert_equal "a" (H.find h 1);
let h2 = H.add h1 5 "ee" in
OUnit.assert_equal "ee" (H.find h2 5);
OUnit.assert_raises Not_found (fun () -> H.find h 5);
let h3 = H.remove h2 1 in
OUnit.assert_equal "ee" (H.find h3 5);
OUnit.assert_raises Not_found (fun () -> H.find h3 1);
let h4 = H.remove h3 5 in
OUnit.assert_equal "e" (H.find h4 5);
OUnit.assert_equal "ee" (H.find h3 5);
*)(* return [Some l'] if [l] changed into [l'] by removing [k] *)letrecremove_rec_kl=matchlwith|Nil->None|Cons(k',v',l')->ifH.equalkk'thenSomel'elsematchremove_rec_kl'with|None->None|Somel'->Some(Cons(k',v',l'))letremovetk=leta=reroot_tinleti=find_idx_~h:(H.hashk)ainmatcha.(i)with|Nil->t|Cons_asl->matchremove_rec_klwith|None->t|Somel'->a.(i)<-l';lett'={length=t.length-1;arr=Arra}int.arr<-Set(i,l,t');t'(*$R
let h = H.of_seq my_seq in
OUnit.assert_equal (H.find h 2) "b";
OUnit.assert_equal (H.find h 3) "c";
OUnit.assert_equal (H.find h 4) "d";
OUnit.assert_equal (H.length h) 4;
let h = H.remove h 2 in
OUnit.assert_equal (H.find h 3) "c";
OUnit.assert_equal (H.length h) 3;
OUnit.assert_raises Not_found (fun () -> H.find h 2)
*)(*$R
let open Iter.Infix in
let n = 10000 in
let seq = Iter.map (fun i -> i, string_of_int i) (0 -- n) in
let h = H.of_seq seq in
OUnit.assert_equal (n+1) (H.length h);
let h = Iter.fold (fun h i -> H.remove h i) h (0 -- 500) in
OUnit.assert_equal (n-500) (H.length h);
OUnit.assert_bool "is_empty" (H.is_empty (H.create 16));
*)(*$QR
_list_int_int (fun l ->
let h = H.of_list l in
let h = List.fold_left (fun h (k,_) -> H.remove h k) h l in
H.is_empty h)
*)letupdatetkf=letv=getktinmatchv,fvwith|None,None->t(* no change *)|Some_,None->removetk|_,Somev'->replacetkv'letcopyt=leta=Array.copy(reroot_t)in{twitharr=Arra}letrecbuck_iter_~fl=matchlwith|Nil->()|Cons(k,v,l')->fkv;buck_iter_~fl'letitertf=leta=reroot_tinArray.iter(buck_iter_~f)aletrecbuck_fold_faccl=matchlwith|Nil->acc|Cons(k,v,l')->letacc=facckvinbuck_fold_faccl'letfoldfacct=leta=reroot_tinArray.fold_left(buck_fold_f)accaletmapft=letrecbuck_map_fl=matchlwith|Nil->Nil|Cons(k,v,l')->letv'=fkvinCons(k,v',buck_map_fl')inleta=reroot_tinleta'=Array.map(buck_map_f)ain{length=t.length;arr=Arra'}letrecbuck_filter_~fl=matchlwith|Nil->Nil|Cons(k,v,l')->letl'=buck_filter_~fl'iniffkvthenCons(k,v,l')elsel'letbuck_length_b=buck_fold_(funn__->n+1)0bletfilterpt=leta=reroot_tinletlength=ref0inleta'=Array.map(funb->letb'=buck_filter_~f:pbinlength:=!length+(buck_length_b');b')ain{length=!length;arr=Arra'}letrecbuck_filter_map_~fl=matchlwith|Nil->Nil|Cons(k,v,l')->letl'=buck_filter_map_~fl'inmatchfkvwith|None->l'|Somev'->Cons(k,v',l')letfilter_mapft=leta=reroot_tinletlength=ref0inleta'=Array.map(funb->letb'=buck_filter_map_~fbinlength:=!length+(buck_length_b');b')ain{length=!length;arr=Arra'}exceptionExitPTblletfor_allpt=tryitert(funkv->ifnot(pkv)thenraiseExitPTbl);truewithExitPTbl->falseletexistspt=tryitert(funkv->ifpkvthenraiseExitPTbl);falsewithExitPTbl->trueletmerge~ft1t2=lettbl=create(max(lengtht1)(lengtht2))inlettbl=fold(funtblkv1->letcomb=try`Both(v1,findt2k)withNot_found->`Leftv1inmatchfkcombwith|None->tbl|Somev'->replacetblkv')tblt1infold(funtblkv2->ifmemt1kthentblelsematchfk(`Rightv2)with|None->tbl|Somev'->replacetblkv')tblt2(*$R
let t1 = H.of_list [1, "a"; 2, "b1"] in
let t2 = H.of_list [2, "b2"; 3, "c"] in
let t = H.merge
~f:(fun _ -> function
| `Right v2 -> Some v2
| `Left v1 -> Some v1
| `Both (s1,s2) -> if s1 < s2 then Some s1 else Some s2)
t1 t2
in
OUnit.assert_equal ~printer:string_of_int 3 (H.length t);
OUnit.assert_equal "a" (H.find t 1);
OUnit.assert_equal "b1" (H.find t 2);
OUnit.assert_equal "c" (H.find t 3);
*)letadd_seqinitseq=lettbl=refinitinseq(fun(k,v)->tbl:=replace!tblkv);!tblletof_seqseq=add_seq(empty())seqletadd_listinitl=add_seqinit(funk->List.iterkl)(*$QR
_list_int_int (fun l ->
let l1, l2 = List.partition (fun (x,_) -> x mod 2 = 0) l in
let h1 = H.of_list l1 in
let h2 = H.add_list h1 l2 in
List.for_all
(fun (k,v) -> H.find h2 k = v)
l
&&
List.for_all
(fun (k,v) -> H.find h1 k = v)
l1
&&
List.length l1 = H.length h1
&&
List.length l = H.length h2
)
*)letof_listl=add_list(empty())lletto_listt=fold(funacckv->(k,v)::acc)[]t(*$R
let h = H.of_seq my_seq in
let l = Iter.to_list (H.to_seq h) in
OUnit.assert_equal my_list (List.sort compare l)
*)letto_seqt=funk->itert(funxy->k(x,y))(*$R
let h = H.of_seq my_seq in
OUnit.assert_equal "b" (H.find h 2);
OUnit.assert_equal "a" (H.find h 1);
OUnit.assert_raises Not_found (fun () -> H.find h 42);
*)letequaleqt1t2=lengtht1=lengtht2&&for_all(funkv->matchgetkt2with|None->false|Somev'->eqvv')t1letpp?(sep=",")?(arrow="->")pp_kpp_vfmtt=letfirst=reftrueinitert(funkv->if!firstthenfirst:=falseelse(Format.pp_print_stringfmtsep;Format.pp_print_cutfmt());Format.fprintffmt"%a %s %a"pp_kkarrowpp_vv);()letstatst=leta=reroot_tinletmax_bucket_length=Array.fold_left(funnb->maxn(buck_length_b))0ainletbucket_histogram=Array.make(max_bucket_length+1)0inArray.iter(funb->letl=buck_length_binbucket_histogram.(l)<-bucket_histogram.(l)+1)a;{Hashtbl.num_bindings=t.length;num_buckets=Array.lengtha;max_bucket_length;bucket_histogram;}end