123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770(* This file is free software, part of containers. See file "license" for more details. *)(*$inject
module M = Make(CCInt) ;;
let _listuniq =
let g = Q.(list (pair small_int small_int)) in
Q.map_same_type
(fun l ->
CCList.sort_uniq ~cmp:(fun a b -> Stdlib.compare (fst a)(fst b)) l
) g
;;
*)(** {1 Hash Tries} *)type'asequence=('a->unit)->unittype'agen=unit->'aoptiontype'aprinter=Format.formatter->'a->unittype'aktree=unit->[`Nil|`Nodeof'a*'aktreelist](** {2 Transient IDs} *)moduleTransient=structtypet={mutablefrozen:bool}letempty={frozen=true}(* special value *)letequalab=Stdlib.(==)abletcreate()={frozen=false}letactivest=notst.frozenletfrozenst=st.frozenletfreezest=st.frozen<-trueletwith_f=letr=create()intryletx=frinfreezer;xwithe->freezer;raiseeexceptionFrozenendmoduletypeS=sigtypekeytype'atvalempty:'atvalis_empty:_t->boolvalsingleton:key->'a->'atvaladd:key->'a->'at->'atvalmem:key->_t->boolvalget:key->'at->'aoptionvalget_exn:key->'at->'a(** @raise Not_found if key not present *)valremove:key->'at->'at(** Remove the key, if present. *)valupdate:key->f:('aoption->'aoption)->'at->'at(** [update k ~f m] calls [f (Some v)] if [get k m = Some v], [f None]
otherwise. Then, if [f] returns [Some v'] it binds [k] to [v'],
if [f] returns [None] it removes [k] *)valadd_mut:id:Transient.t->key->'a->'at->'at(** [add_mut ~id k v m] behaves like [add k v m], except it will mutate
in place whenever possible. Changes done with an [id] might affect all
versions of the structure obtained with the same [id] (but not
other versions).
@raise Transient.Frozen if [id] is frozen *)valremove_mut:id:Transient.t->key->'at->'at(** Same as {!remove}, but modifies in place whenever possible
@raise Transient.Frozen if [id] is frozen *)valupdate_mut:id:Transient.t->key->f:('aoption->'aoption)->'at->'at(** Same as {!update} but with mutability
@raise Transient.Frozen if [id] is frozen *)valcardinal:_t->intvalchoose:'at->(key*'a)optionvalchoose_exn:'at->key*'a(** @raise Not_found if not pair was found *)valiter:f:(key->'a->unit)->'at->unitvalfold:f:('b->key->'a->'b)->x:'b->'at->'b(** {6 Conversions} *)valto_list:'at->(key*'a)listvaladd_list:'at->(key*'a)list->'atvaladd_list_mut:id:Transient.t->'at->(key*'a)list->'at(** @raise Frozen if the ID is frozen *)valof_list:(key*'a)list->'atvaladd_seq:'at->(key*'a)sequence->'atvaladd_seq_mut:id:Transient.t->'at->(key*'a)sequence->'at(** @raise Frozen if the ID is frozen *)valof_seq:(key*'a)sequence->'atvalto_seq:'at->(key*'a)sequencevaladd_gen:'at->(key*'a)gen->'atvaladd_gen_mut:id:Transient.t->'at->(key*'a)gen->'at(** @raise Frozen if the ID is frozen *)valof_gen:(key*'a)gen->'atvalto_gen:'at->(key*'a)gen(** {6 IO} *)valpp:keyprinter->'aprinter->'atprintervalas_tree:'at->[`Lofint*(key*'a)list|`N]ktree(** For debugging purpose: explore the structure of the tree,
with [`L (h,l)] being a leaf (with shared hash [h])
and [`N] an inner node *)endmoduletypeKEY=sigtypetvalequal:t->t->boolvalhash:t->intend(*
from https://en.wikipedia.org/wiki/Hamming_weight
//This uses fewer arithmetic operations than any other known
//implementation on machines with slow multiplication.
//It uses 17 arithmetic operations.
int popcount_2(uint64_t x) {
x -= (x >> 1) & m1; //put count of each 2 bits into those 2 bits
x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits
x = (x + (x >> 4)) & m4; //put count of each 8 bits into those 8 bits
x += x >> 8; //put count of each 16 bits into their lowest 8 bits
x += x >> 16; //put count of each 32 bits into their lowest 8 bits
x += x >> 32; //put count of each 64 bits into their lowest 8 bits
return x & 0x7f;
}
m1 = 0x5555555555555555
m2 = 0x3333333333333333
m4 = 0x0f0f0f0f0f0f0f0f
We use Int64 for our 64-bits popcount.
*)moduleI64=structtypet=Int64.tlet(+)=Int64.addlet(-)=Int64.sublet(lsl)=Int64.shift_leftlet(lsr)=Int64.shift_right_logicallet(land)=Int64.logandlet(lor)=Int64.logorletlnot=Int64.lognotendletpopcount(b:I64.t):int=letopenI64inletb=b-((blsr1)land0x5555555555555555L)inletb=(bland0x3333333333333333L)+((blsr2)land0x3333333333333333L)inletb=(b+(blsr4))land0x0f0f0f0f0f0f0f0fLinletb=b+(blsr8)inletb=b+(blsr16)inletb=b+(blsr32)inInt64.to_int(bland0x7fL)(*$T
popcount 5L = 2
popcount 256L = 1
popcount 255L = 8
popcount 0xFFFFL = 16
popcount 0xFF1FL = 13
popcount 0xFFFFFFFFL = 32
popcount 0xFFFFFFFFFFFFFFFFL = 64
*)(*$Q
Q.int (fun i -> let i = Int64.of_int i in popcount i <= 64)
*)(* sparse array, using a bitfield and POPCOUNT *)moduleA_SPARSE=structtype'at={bits:int64;arr:'aarray;id:Transient.t;}letlength_log=6letlength=1lsllength_loglet()=assert(length=64)letcreate~id={bits=0L;arr=[||];id;}letowns~ida=Transient.activeid&&Transient.equalida.idletget~defaultai=letopenI64inletidx=1Llsliinifa.bitslandidx=0Lthen(default)else(letreal_idx=popcount(a.bitsland(idx-1L))ina.arr.(real_idx))letset~mutaix=letopenI64inletidx=1Llsliinletreal_idx=popcount(a.bitsland(idx-1L))inif(a.bitslandidx=0L)then((* insert at [real_idx] in a new array *)letbits=a.bitsloridxinletn=Array.lengtha.arrinletarr=Array.makeStdlib.(n+1)xinarr.(real_idx)<-x;ifreal_idx>0then(Array.blita.arr0arr0real_idx;);ifreal_idx<nthen(letopenStdlibinArray.blita.arrreal_idxarr(real_idx+1)(n-real_idx););{awithbits;arr})else((* replace element at [real_idx] *)ifmutthen(a.arr.(real_idx)<-x;a)else(letarr=ifmutthena.arrelseArray.copya.arrinarr.(real_idx)<-x;{awitharr}))letupdate~mut~defaultaif=letopenI64inletidx=1Llsliinletreal_idx=popcount(a.bitsland(idx-1L))inifa.bitslandidx=0Lthen((* not present *)letx=fdefaultin(* insert at [real_idx] in a new array *)letbits=a.bitsloridxinletn=Array.lengtha.arrinletarr=Array.makeStdlib.(n+1)xinifreal_idx>0then(Array.blita.arr0arr0real_idx;);ifreal_idx<nthen(letopenStdlibinArray.blita.arrreal_idxarr(real_idx+1)(n-real_idx););{awithbits;arr})else(letx=fa.arr.(real_idx)in(* replace element at [real_idx] *)letarr=ifmutthena.arrelseArray.copya.arrinarr.(real_idx)<-x;{awitharr})letremoveai=letopenI64inletidx=1Llsliinletreal_idx=popcount(a.bitsland(idx-1L))inifa.bitslandidx=0Lthen(a(* not present *))else((* remove at [real_idx] *)letbits=a.bitsland(lnotidx)inletn=Array.lengtha.arrinletarr=ifn=1then[||]elseArray.makeStdlib.(n-1)a.arr.(0)inletopenStdlibinifreal_idx>0then(Array.blita.arr0arr0real_idx;);ifreal_idx+1<nthen(Array.blita.arr(real_idx+1)arrreal_idx(n-real_idx-1););{awithbits;arr})letiterfa=Array.iterfa.arrletfoldfacca=Array.fold_leftfacca.arrend(** {2 Functors} *)moduleMake(Key:KEY):Swithtypekey=Key.t=structmoduleA=A_SPARSElet()=assert(A.length=1lslA.length_log)moduleHash:sigtypet=privateintvalmake:Key.t->tvalzero:t(* special "hash" *)valis_0:t->boolvalequal:t->t->boolvalrem:t->int(* [A.length_log] last bits *)valquotient:t->t(* remove [A.length_log] last bits *)end=structtypet=intletmake=Key.hashletzero=0letis_0h=h=0letequal:int->int->bool=Stdlib.(=)letremh=hland(A.length-1)letquotienth=hlsrA.length_logendlethash_=Hash.maketypekey=Key.t(* association list, without duplicates *)type'aleaf=|Nil|Oneofkey*'a|Twoofkey*'a*key*'a|Consofkey*'a*'aleaftype'at=|E|SofHash.t*key*'a(* single pair *)|LofHash.t*'aleaf(* same hash for all elements *)|Nof'aleaf*'atA.t(* leaf for hash=0, subnodes *)(* invariants:
L [] --> E
N [E, E,...., E] -> E
*)letempty=Eletis_empty=function|E->true|L(_,Nil)->assertfalse|S_|L_|N_->false(*$T
M.is_empty M.empty
*)letleaf_kv~h=L(h,Cons(k,v,Nil))letsingletonkv=leaf_kv~h:(hash_k)(*$T
not (M.is_empty (M.singleton 1 2))
M.cardinal (M.singleton 1 2) = 1
*)letrecget_exn_list_kl=matchlwith|Nil->raiseNot_found|One(k',v')->ifKey.equalkk'thenv'elseraiseNot_found|Two(k1,v1,k2,v2)->ifKey.equalkk1thenv1elseifKey.equalkk2thenv2elseraiseNot_found|Cons(k',v',tail)->ifKey.equalkk'thenv'elseget_exn_list_ktailletrecget_exn_k~hm=matchmwith|E->raiseNot_found|S(_,k',v')->ifKey.equalkk'thenv'elseraiseNot_found|L(_,l)->get_exn_list_kl|N(leaf,a)->ifHash.is_0hthenget_exn_list_kleafelse(leti=Hash.remhinleth'=Hash.quotienthinget_exn_k~h:h'(A.get~default:Eai))letget_exnkm=get_exn_k~h:(hash_k)m(*$Q
_listuniq (fun l -> \
let m = M.of_list l in \
List.for_all (fun (x,y) -> M.get_exn x m = y) l)
*)letgetkm=trySome(get_exn_k~h:(hash_k)m)withNot_found->Noneletmemkm=tryignore(get_exn_k~h:(hash_k)m);truewithNot_found->false(* TODO: use Hash.combine if array only has one non-empty LEAF element? *)(* add [k,v] to the list [l], removing old binding if any *)letrecadd_list_kvl=matchlwith|Nil->One(k,v)|One(k1,v1)->ifKey.equalkk1thenOne(k,v)elseTwo(k,v,k1,v1)|Two(k1,v1,k2,v2)->ifKey.equalkk1thenTwo(k,v,k2,v2)elseifKey.equalkk2thenTwo(k,v,k1,v1)elseCons(k,v,l)|Cons(k',v',tail)->ifKey.equalkk'thenCons(k,v,tail)(* replace *)elseCons(k',v',add_list_kvtail)letnode_leafa=N(leaf,a)(* [h]: hash, with the part required to reach this leaf removed
[id] is the transient ID used for mutability *)letrecadd_~idkv~hm=matchmwith|E->S(h,k,v)|S(h',k',v')->ifHash.equalhh'then(ifKey.equalkk'thenS(h,k,v)(* replace *)elseL(h,Cons(k,v,Cons(k',v',Nil))))else(make_array_~id~leaf:(Cons(k',v',Nil))~h_leaf:h'kv~h)|L(h',l)->ifHash.equalhh'thenL(h,add_list_kvl)else(* split into N *)make_array_~id~leaf:l~h_leaf:h'kv~h|N(leaf,a)->ifHash.is_0hthennode_(add_list_kvleaf)aelse(letmut=A.owns~idain(* can we modify [a] in place? *)node_leaf(add_to_array_~id~mutkv~ha))(* make an array containing a leaf, and insert (k,v) in it *)andmake_array_~id~leaf~h_leaf:h'kv~h=leta=A.create~idinleta,leaf=ifHash.is_0h'thena,leafelse((* put leaf in the right bucket *)leti=Hash.remh'inleth''=Hash.quotienth'inA.set~mut:trueai(L(h'',leaf)),Nil)in(* then add new node *)leta,leaf=ifHash.is_0hthena,add_list_kvleafelseadd_to_array_~id~mut:truekv~ha,leafinN(leaf,a)(* add k->v to [a] *)andadd_to_array_~id~mutkv~ha=(* insert in a bucket *)leti=Hash.remhinleth'=Hash.quotienthinA.update~default:E~mutai(funx->add_~idkv~h:h'x)letaddkvm=add_~id:Transient.emptykv~h:(hash_k)m(*$Q
_listuniq (fun l -> \
let m = List.fold_left (fun m (x,y) -> M.add x y m) M.empty l in \
List.for_all (fun (x,y) -> M.get_exn x m = y) l)
*)letadd_mut~idkvm=ifTransient.frozenidthenraiseTransient.Frozen;add_~idkv~h:(hash_k)m(*$R
let lsort = List.sort Stdlib.compare in
let m = M.of_list [1, 1; 2, 2] in
let id = Transient.create() in
let m' = M.add_mut ~id 3 3 m in
let m' = M.add_mut ~id 4 4 m' in
assert_equal [1, 1; 2, 2] (M.to_list m |> lsort);
assert_equal [1, 1; 2, 2; 3,3; 4,4] (M.to_list m' |> lsort);
Transient.freeze id;
assert_bool "must raise"
(try ignore(M.add_mut ~id 5 5 m'); false with Transient.Frozen -> true)
*)exceptionLocalExitletis_empty_arr_a=tryA.iter(funt->ifnot(is_emptyt)thenraiseLocalExit)a;truewithLocalExit->falseletis_empty_list_=function|Nil->true|One_|Two_|Cons_->falseletrecremove_list_kl=matchlwith|Nil->Nil|One(k',_)->ifKey.equalkk'thenNilelsel|Two(k1,v1,k2,v2)->ifKey.equalkk1thenOne(k2,v2)elseifKey.equalkk2thenOne(k1,v1)elsel|Cons(k',v',tail)->ifKey.equalkk'thentailelseCons(k',v',remove_list_ktail)letrecremove_rec_~idk~hm=matchmwith|E->E|S(_,k',_)->ifKey.equalkk'thenEelsem|L(h,l)->letl=remove_list_klinifis_empty_list_lthenEelseL(h,l)|N(leaf,a)->letleaf,a=ifHash.is_0hthenremove_list_kleaf,aelse(leti=Hash.remhinleth'=Hash.quotienthinletnew_t=remove_rec_~idk~h:h'(A.get~default:Eai)inifis_emptynew_tthenleaf,A.removeai(* remove sub-tree *)else(letmut=A.owns~idainleaf,A.set~mutainew_t))inifis_empty_list_leaf&&is_empty_arr_athenEelseN(leaf,a)letremovekm=remove_rec_~id:Transient.emptyk~h:(hash_k)mletremove_mut~idkm=ifTransient.frozenidthenraiseTransient.Frozen;remove_rec_~idk~h:(hash_k)m(*$QR
_listuniq (fun l ->
let m = M.of_list l in
List.for_all
(fun (x,_) ->
let m' = M.remove x m in
not (M.mem x m') &&
M.cardinal m' = M.cardinal m - 1 &&
List.for_all
(fun (y,v) -> y = x || M.get_exn y m' = v)
l
) l
)
*)letupdate_~idkfm=leth=hash_kinletopt_v=trySome(get_exn_k~hm)withNot_found->Noneinbeginmatchopt_v,fopt_vwith|None,None->m|Some_,Somev|None,Somev->add_~idkv~hm|Some_,None->remove_rec_~idk~hmendletupdatek~fm=update_~id:Transient.emptykfmletupdate_mut~idk~fm=ifTransient.frozenidthenraiseTransient.Frozen;update_~idkfm(*$R
let m = M.of_list [1, 1; 2, 2; 5, 5] in
let m' = M.update 4
(function
| None -> Some 4
| Some _ -> Some 0
) m
in
assert_equal [1,1; 2,2; 4,4; 5,5] (M.to_list m' |> List.sort Stdlib.compare);
*)letiter~ft=letrecaux=function|E->()|S(_,k,v)->fkv|L(_,l)->aux_listl|N(l,a)->aux_listl;A.iterauxaandaux_list=function|Nil->()|One(k,v)->fkv|Two(k1,v1,k2,v2)->fk1v1;fk2v2|Cons(k,v,tl)->fkv;aux_listtlinauxtletfold~f~x:acct=letrecauxacct=matchtwith|E->acc|S(_,k,v)->facckv|L(_,l)->aux_listaccl|N(l,a)->letacc=aux_listacclinA.foldauxaccaandaux_listaccl=matchlwith|Nil->acc|One(k,v)->facckv|Two(k1,v1,k2,v2)->f(facck1v1)k2v2|Cons(k,v,tl)->letacc=facckvinaux_listacctlinauxacct(*$T
let l = CCList.(1 -- 10 |> map (fun x->x,x)) in \
M.of_list l \
|> M.fold ~f:(fun acc x y -> (x,y)::acc) ~x:[] \
|> List.sort Stdlib.compare = l
*)letcardinalm=fold~f:(funn__->n+1)~x:0mletto_listm=fold~f:(funacckv->(k,v)::acc)~x:[]mletadd_list_mut~idml=List.fold_left(funacc(k,v)->add_mut~idkvacc)mlletadd_listml=Transient.with_(funid->add_list_mut~idml)letof_listl=add_listemptylletadd_seq_mut~idmseq=letm=refminseq(fun(k,v)->m:=add_mut~idkv!m);!mletadd_seqmseq=Transient.with_(funid->add_seq_mut~idmseq)letof_seqs=add_seqemptysletto_seqmyield=iter~f:(funkv->yield(k,v))m(*$Q
_listuniq (fun l -> \
(List.sort Stdlib.compare l) = \
(l |> Iter.of_list |> M.of_seq |> M.to_seq |> Iter.to_list \
|> List.sort Stdlib.compare) )
*)letrecadd_gen_mut~idmg=matchg()with|None->m|Some(k,v)->add_gen_mut~id(add_mut~idkvm)gletadd_genmg=Transient.with_(funid->add_gen_mut~idmg)letof_geng=add_genemptyg(* traverse the tree by increasing hash order, where the order compares
hashes lexicographically by A.length_log-wide chunks of bits,
least-significant chunks first *)letto_genm=letst=Stack.create()inStack.pushmst;letrecnext()=ifStack.is_emptystthenNoneelsematchStack.popstwith|E->next()|S(_,k,v)->Some(k,v)|L(_,Nil)->next()|L(_,One(k,v))->Some(k,v)|L(h,Two(k1,v1,k2,v2))->Stack.push(L(h,One(k2,v2)))st;Some(k1,v1)|L(h,Cons(k,v,tl))->Stack.push(L(h,tl))st;(* tail *)Some(k,v)|N(l,a)->A.iter(funsub->Stack.pushsubst)a;Stack.push(L(Hash.zero,l))st;(* leaf *)next()innext(*$Q
_listuniq (fun l -> \
(List.sort Stdlib.compare l) = \
(l |> Gen.of_list |> M.of_gen |> M.to_gen |> Gen.to_list \
|> List.sort Stdlib.compare) )
*)letchoosem=to_genm()(*$T
M.choose M.empty = None
M.choose M.(of_list [1,1; 2,2]) <> None
*)letchoose_exnm=matchchoosemwith|None->raiseNot_found|Some(k,v)->k,vletppppkppvoutm=letfirst=reftrueiniterm~f:(funkv->if!firstthenfirst:=falseelseFormat.fprintfout";@ ";ppkoutk;Format.pp_print_stringout" -> ";ppvoutv)letrecas_treem()=matchmwith|E->`Nil|S(h,k,v)->`Node(`L((h:>int),[k,v]),[])|L(h,l)->`Node(`L((h:>int),list_as_tree_l),[])|N(l,a)->`Node(`N,as_tree(L(Hash.zero,l))::array_as_tree_a)andlist_as_tree_l=matchlwith|Nil->[]|One(k,v)->[k,v]|Two(k1,v1,k2,v2)->[k1,v1;k2,v2]|Cons(k,v,tail)->(k,v)::list_as_tree_tailandarray_as_tree_a=A.fold(funacct->as_treet::acc)[]aend(*$R
let m = M.of_list CCList.( (501 -- 1000) @ (500 -- 1) |> map (fun i->i,i)) in
assert_equal ~printer:CCInt.to_string 1000 (M.cardinal m);
assert_bool "check all get"
(Iter.for_all (fun i -> i = M.get_exn i m) Iter.(1 -- 1000));
let m = Iter.(501 -- 1000 |> fold (fun m i -> M.remove i m) m) in
assert_equal ~printer:CCInt.to_string 500 (M.cardinal m);
assert_bool "check all get after remove"
(Iter.for_all (fun i -> i = M.get_exn i m) Iter.(1 -- 500));
assert_bool "check all get after remove"
(Iter.for_all (fun i -> None = M.get i m) Iter.(501 -- 1000));
*)