123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686(**************************************************************************)(* *)(* OCaml *)(* *)(* Damien Doligez, projet Para, INRIA Rocquencourt *)(* *)(* Copyright 1997 Institut National de Recherche en Informatique et *)(* en Automatique. *)(* *)(* All rights reserved. This file is distributed under the terms of *)(* the GNU Lesser General Public License version 2.1, with the *)(* special exception on linking described in the file LICENSE. *)(* *)(**************************************************************************)moduletypeSeededS=sigincludeHashtbl.SeededSvalclean:'at->unitvalstats_alive:'at->Hashtbl.statistics(** same as {!stats} but only count the alive bindings *)endmoduletypeS=sigincludeHashtbl.Svalclean:'at->unitvalstats_alive:'at->Hashtbl.statistics(** same as {!stats} but only count the alive bindings *)endmoduleGenHashTable=structtypeequal=|ETrue|EFalse|EDead(** the garbage collector reclaimed the data *)moduleMakeSeeded(H:sigtypettype'acontainervalcreate:t->'a->'acontainervalhash:int->t->intvalequal:'acontainer->t->equalvalget_data:'acontainer->'aoptionvalget_key:'acontainer->toptionvalset_key_data:'acontainer->t->'a->unitvalcheck_key:'acontainer->boolend):SeededSwithtypekey=H.t=structtype'at={mutablesize:int;(* number of entries *)mutabledata:'abucketlistarray;(* the buckets *)mutableseed:int;(* for randomization *)initial_size:int;(* initial array size *)}and'abucketlist=|Empty|Consofint(* hash of the key *)*'aH.container*'abucketlist(** the hash of the key is kept in order to test the equality of the hash
before the key. Same reason as for Weak.Make *)typekey=H.tletrecpower_2_abovexn=ifx>=nthenxelseifx*2>Sys.max_array_lengththenxelsepower_2_above(x*2)nletprng=lazy(Random.State.make_self_init())letcreate?(random=(Hashtbl.is_randomized()))initial_size=lets=power_2_above16initial_sizeinletseed=ifrandomthenRandom.State.bits(Lazy.forceprng)else0in{initial_size=s;size=0;seed=seed;data=Array.makesEmpty}letclearh=h.size<-0;letlen=Array.lengthh.datainfori=0tolen-1doh.data.(i)<-Emptydoneletreseth=letlen=Array.lengthh.datainiflen=h.initial_sizethenclearhelsebeginh.size<-0;h.data<-Array.makeh.initial_sizeEmptyendletcopyh={hwithdata=Array.copyh.data}letkey_indexhhkey=hkeyland(Array.lengthh.data-1)letcleanh=letrecdo_bucket=function|Empty->Empty|Cons(_,c,rest)whennot(H.check_keyc)->h.size<-h.size-1;do_bucketrest|Cons(hkey,c,rest)->Cons(hkey,c,do_bucketrest)inletd=h.datainfori=0toArray.lengthd-1dod.(i)<-do_bucketd.(i)done(** resize is the only function to do the actual cleaning of dead keys
(remove does it just because it could).
The goal is to:
- not resize infinitely when the actual number of alive keys is
bounded but keys are continuously added. That would happen if
this function always resize.
- not call this function after each addition, that would happen if this
function don't resize even when only one key is dead.
So the algorithm:
- clean the keys before resizing
- if the number of remaining keys is less than half the size of the
array, don't resize.
- if it is more, resize.
The second problem remains if the table reaches {!Sys.max_array_length}.
*)letresizeh=letodata=h.datainletosize=Array.lengthodatainletnsize=osize*2incleanh;ifnsize<Sys.max_array_length&&h.size>=osizelsr1thenbeginletndata=Array.makensizeEmptyinh.data<-ndata;(* so that key_index sees the new bucket count *)letrecinsert_bucket=functionEmpty->()|Cons(hkey,data,rest)->insert_bucketrest;(* preserve original order of elements *)letnidx=key_indexhhkeyinndata.(nidx)<-Cons(hkey,data,ndata.(nidx))infori=0toosize-1doinsert_bucketodata.(i)doneendletaddhkeyinfo=lethkey=H.hashh.seedkeyinleti=key_indexhhkeyinletcontainer=H.createkeyinfoinletbucket=Cons(hkey,container,h.data.(i))inh.data.(i)<-bucket;h.size<-h.size+1;ifh.size>Array.lengthh.datalsl1thenresizehletremovehkey=lethkey=H.hashh.seedkeyinletrecremove_bucket=function|Empty->Empty|Cons(hk,c,next)whenhkey=hk->beginmatchH.equalckeywith|ETrue->h.size<-h.size-1;next|EFalse->Cons(hk,c,remove_bucketnext)|EDead->(* The dead key is automatically removed. It is acceptable
for this function since it already removes a binding *)h.size<-h.size-1;remove_bucketnextend|Cons(hk,c,next)->Cons(hk,c,remove_bucketnext)inleti=key_indexhhkeyinh.data.(i)<-remove_bucketh.data.(i)(** {!find} don't remove dead keys because it would be surprising for
the user that a read-only function mutates the state (eg. concurrent
access). Same for {!iter}, {!fold}, {!mem}.
*)letrecfind_reckeyhkey=function|Empty->raiseNot_found|Cons(hk,c,rest)whenhkey=hk->beginmatchH.equalckeywith|ETrue->beginmatchH.get_datacwith|None->(* This case is not impossible because the gc can run between
H.equal and H.get_data *)find_reckeyhkeyrest|Somed->dend|EFalse->find_reckeyhkeyrest|EDead->find_reckeyhkeyrestend|Cons(_,_,rest)->find_reckeyhkeyrestletfindhkey=lethkey=H.hashh.seedkeyin(* TODO inline 3 iterations *)find_reckeyhkey(h.data.(key_indexhhkey))letrecfind_rec_optkeyhkey=function|Empty->None|Cons(hk,c,rest)whenhkey=hk->beginmatchH.equalckeywith|ETrue->beginmatchH.get_datacwith|None->(* This case is not impossible because the gc can run between
H.equal and H.get_data *)find_rec_optkeyhkeyrest|Some_asd->dend|EFalse->find_rec_optkeyhkeyrest|EDead->find_rec_optkeyhkeyrestend|Cons(_,_,rest)->find_rec_optkeyhkeyrestletfind_opthkey=lethkey=H.hashh.seedkeyin(* TODO inline 3 iterations *)find_rec_optkeyhkey(h.data.(key_indexhhkey))letfind_allhkey=lethkey=H.hashh.seedkeyinletrecfind_in_bucket=function|Empty->[]|Cons(hk,c,rest)whenhkey=hk->beginmatchH.equalckeywith|ETrue->beginmatchH.get_datacwith|None->find_in_bucketrest|Somed->d::find_in_bucketrestend|EFalse->find_in_bucketrest|EDead->find_in_bucketrestend|Cons(_,_,rest)->find_in_bucketrestinfind_in_bucketh.data.(key_indexhhkey)letreplacehkeyinfo=lethkey=H.hashh.seedkeyinletrecreplace_bucket=function|Empty->raiseNot_found|Cons(hk,c,next)whenhkey=hk->beginmatchH.equalckeywith|ETrue->H.set_key_datackeyinfo|EFalse|EDead->replace_bucketnextend|Cons(_,_,next)->replace_bucketnextinleti=key_indexhhkeyinletl=h.data.(i)intryreplace_bucketlwithNot_found->letcontainer=H.createkeyinfoinh.data.(i)<-Cons(hkey,container,l);h.size<-h.size+1;ifh.size>Array.lengthh.datalsl1thenresizehletmemhkey=lethkey=H.hashh.seedkeyinletrecmem_in_bucket=function|Empty->false|Cons(hk,c,rest)whenhk=hkey->beginmatchH.equalckeywith|ETrue->true|EFalse|EDead->mem_in_bucketrestend|Cons(_hk,_c,rest)->mem_in_bucketrestinmem_in_bucketh.data.(key_indexhhkey)letiterfh=letrecdo_bucket=function|Empty->()|Cons(_,c,rest)->beginmatchH.get_keyc,H.get_datacwith|None,_|_,None->()|Somek,Somed->fkdend;do_bucketrestinletd=h.datainfori=0toArray.lengthd-1dodo_bucketd.(i)doneletfoldfhinit=letrecdo_bucketbaccu=matchbwithEmpty->accu|Cons(_,c,rest)->letaccu=beginmatchH.get_keyc,H.get_datacwith|None,_|_,None->accu|Somek,Somed->fkdaccuendindo_bucketrestaccuinletd=h.datainletaccu=refinitinfori=0toArray.lengthd-1doaccu:=do_bucketd.(i)!accudone;!acculetfilter_map_inplacefh=letrecdo_bucket=function|Empty->Empty|Cons(hk,c,rest)->matchH.get_keyc,H.get_datacwith|None,_|_,None->do_bucketrest|Somek,Somed->matchfkdwith|None->do_bucketrest|Somenew_d->H.set_key_datacknew_d;Cons(hk,c,do_bucketrest)inletd=h.datainfori=0toArray.lengthd-1dod.(i)<-do_bucketd.(i)doneletlengthh=h.sizeletrecbucket_lengthaccu=function|Empty->accu|Cons(_,_,rest)->bucket_length(accu+1)restletstatsh=letmbl=Array.fold_left(funmb->maxm(bucket_length0b))0h.datainlethisto=Array.make(mbl+1)0inArray.iter(funb->letl=bucket_length0binhisto.(l)<-histo.(l)+1)h.data;{Hashtbl.num_bindings=h.size;num_buckets=Array.lengthh.data;max_bucket_length=mbl;bucket_histogram=histo}letrecbucket_length_aliveaccu=function|Empty->accu|Cons(_,c,rest)whenH.check_keyc->bucket_length_alive(accu+1)rest|Cons(_,_,rest)->bucket_length_aliveaccurestletstats_aliveh=letsize=ref0inletmbl=Array.fold_left(funmb->maxm(bucket_length_alive0b))0h.datainlethisto=Array.make(mbl+1)0inArray.iter(funb->letl=bucket_length_alive0binsize:=!size+l;histo.(l)<-histo.(l)+1)h.data;{Hashtbl.num_bindings=!size;num_buckets=Array.lengthh.data;max_bucket_length=mbl;bucket_histogram=histo}letto_seqtbl=(* capture current array, so that even if the table is resized we
keep iterating on the same array *)lettbl_data=tbl.datain(* state: index * next bucket to traverse *)letrecauxibuck()=matchbuckwith|Empty->ifi=Array.lengthtbl_datathenSeq.Nilelseaux(i+1)tbl_data.(i)()|Cons(_,c,next)->beginmatchH.get_keyc,H.get_datacwith|None,_|_,None->auxinext()|Somekey,Somedata->Seq.Cons((key,data),auxinext)endinaux0Emptyletto_seq_keysm=Seq.mapfst(to_seqm)letto_seq_valuesm=Seq.mapsnd(to_seqm)letadd_seqtbli=Seq.iter(fun(k,v)->addtblkv)iletreplace_seqtbli=Seq.iter(fun(k,v)->replacetblkv)iletof_seqi=lettbl=create16inreplace_seqtbli;tblendendmoduleObjEph=Obj.Ephemeronlet_obj_opt:Obj.toption->'aoption=funx->matchxwith|None->x|Somev->Some(Obj.objv)(** The previous function is typed so this one is also correct *)letobj_opt:Obj.toption->'aoption=funx->Obj.magicxmoduleK1=structtype('k,'d)t=ObjEph.tletcreate():('k,'d)t=ObjEph.create1letget_key(t:('k,'d)t):'koption=obj_opt(ObjEph.get_keyt0)letget_key_copy(t:('k,'d)t):'koption=obj_opt(ObjEph.get_key_copyt0)letset_key(t:('k,'d)t)(k:'k):unit=ObjEph.set_keyt0(Obj.reprk)letunset_key(t:('k,'d)t):unit=ObjEph.unset_keyt0letcheck_key(t:('k,'d)t):bool=ObjEph.check_keyt0letblit_key(t1:('k,'d)t)(t2:('k,'d)t):unit=ObjEph.blit_keyt10t201letget_data(t:('k,'d)t):'doption=obj_opt(ObjEph.get_datat)letget_data_copy(t:('k,'d)t):'doption=obj_opt(ObjEph.get_data_copyt)letset_data(t:('k,'d)t)(d:'d):unit=ObjEph.set_datat(Obj.reprd)letunset_data(t:('k,'d)t):unit=ObjEph.unset_datatletcheck_data(t:('k,'d)t):bool=ObjEph.check_datatletblit_data(t1:(_,'d)t)(t2:(_,'d)t):unit=ObjEph.blit_datat1t2moduleMakeSeeded(H:Hashtbl.SeededHashedType)=GenHashTable.MakeSeeded(structtype'acontainer=(H.t,'a)ttypet=H.tletcreatekd=letc=create()inset_datacd;set_keyck;clethash=H.hashletequalck=(* {!get_key_copy} is not used because the equality of the user can be
the physical equality *)matchget_keycwith|None->GenHashTable.EDead|Somek'->ifH.equalkk'thenGenHashTable.ETrueelseGenHashTable.EFalseletget_data=get_dataletget_key=get_keyletset_key_datackd=unset_datac;set_keyck;set_datacdletcheck_key=check_keyend)moduleMake(H:Hashtbl.HashedType):(Swithtypekey=H.t)=structincludeMakeSeeded(structtypet=H.tletequal=H.equallethash(_seed:int)x=H.hashxend)letcreatesz=create~random:falseszletof_seqi=lettbl=create16inreplace_seqtbli;tblendendmoduleK2=structtype('k1,'k2,'d)t=ObjEph.tletcreate():('k1,'k2,'d)t=ObjEph.create2letget_key1(t:('k1,'k2,'d)t):'k1option=obj_opt(ObjEph.get_keyt0)letget_key1_copy(t:('k1,'k2,'d)t):'k1option=obj_opt(ObjEph.get_key_copyt0)letset_key1(t:('k1,'k2,'d)t)(k:'k1):unit=ObjEph.set_keyt0(Obj.reprk)letunset_key1(t:('k1,'k2,'d)t):unit=ObjEph.unset_keyt0letcheck_key1(t:('k1,'k2,'d)t):bool=ObjEph.check_keyt0letget_key2(t:('k1,'k2,'d)t):'k2option=obj_opt(ObjEph.get_keyt1)letget_key2_copy(t:('k1,'k2,'d)t):'k2option=obj_opt(ObjEph.get_key_copyt1)letset_key2(t:('k1,'k2,'d)t)(k:'k2):unit=ObjEph.set_keyt1(Obj.reprk)letunset_key2(t:('k1,'k2,'d)t):unit=ObjEph.unset_keyt1letcheck_key2(t:('k1,'k2,'d)t):bool=ObjEph.check_keyt1letblit_key1(t1:('k1,_,_)t)(t2:('k1,_,_)t):unit=ObjEph.blit_keyt10t201letblit_key2(t1:(_,'k2,_)t)(t2:(_,'k2,_)t):unit=ObjEph.blit_keyt11t211letblit_key12(t1:('k1,'k2,_)t)(t2:('k1,'k2,_)t):unit=ObjEph.blit_keyt10t202letget_data(t:('k1,'k2,'d)t):'doption=obj_opt(ObjEph.get_datat)letget_data_copy(t:('k1,'k2,'d)t):'doption=obj_opt(ObjEph.get_data_copyt)letset_data(t:('k1,'k2,'d)t)(d:'d):unit=ObjEph.set_datat(Obj.reprd)letunset_data(t:('k1,'k2,'d)t):unit=ObjEph.unset_datatletcheck_data(t:('k1,'k2,'d)t):bool=ObjEph.check_datatletblit_data(t1:(_,_,'d)t)(t2:(_,_,'d)t):unit=ObjEph.blit_datat1t2moduleMakeSeeded(H1:Hashtbl.SeededHashedType)(H2:Hashtbl.SeededHashedType)=GenHashTable.MakeSeeded(structtype'acontainer=(H1.t,H2.t,'a)ttypet=H1.t*H2.tletcreate(k1,k2)d=letc=create()inset_datacd;set_key1ck1;set_key2ck2;clethashseed(k1,k2)=H1.hashseedk1+H2.hashseedk2*65599letequalc(k1,k2)=matchget_key1c,get_key2cwith|None,_|_,None->GenHashTable.EDead|Somek1',Somek2'->ifH1.equalk1k1'&&H2.equalk2k2'thenGenHashTable.ETrueelseGenHashTable.EFalseletget_data=get_dataletget_keyc=matchget_key1c,get_key2cwith|None,_|_,None->None|Somek1',Somek2'->Some(k1',k2')letset_key_datac(k1,k2)d=unset_datac;set_key1ck1;set_key2ck2;set_datacdletcheck_keyc=check_key1c&&check_key2cend)moduleMake(H1:Hashtbl.HashedType)(H2:Hashtbl.HashedType):(Swithtypekey=H1.t*H2.t)=structincludeMakeSeeded(structtypet=H1.tletequal=H1.equallethash(_seed:int)x=H1.hashxend)(structtypet=H2.tletequal=H2.equallethash(_seed:int)x=H2.hashxend)letcreatesz=create~random:falseszletof_seqi=lettbl=create16inreplace_seqtbli;tblendendmoduleKn=structtype('k,'d)t=ObjEph.tletcreaten:('k,'d)t=ObjEph.createnletlength(k:('k,'d)t):int=ObjEph.lengthkletget_key(t:('k,'d)t)(n:int):'koption=obj_opt(ObjEph.get_keytn)letget_key_copy(t:('k,'d)t)(n:int):'koption=obj_opt(ObjEph.get_key_copytn)letset_key(t:('k,'d)t)(n:int)(k:'k):unit=ObjEph.set_keytn(Obj.reprk)letunset_key(t:('k,'d)t)(n:int):unit=ObjEph.unset_keytnletcheck_key(t:('k,'d)t)(n:int):bool=ObjEph.check_keytnletblit_key(t1:('k,'d)t)(o1:int)(t2:('k,'d)t)(o2:int)(l:int):unit=ObjEph.blit_keyt1o1t2o2lletget_data(t:('k,'d)t):'doption=obj_opt(ObjEph.get_datat)letget_data_copy(t:('k,'d)t):'doption=obj_opt(ObjEph.get_data_copyt)letset_data(t:('k,'d)t)(d:'d):unit=ObjEph.set_datat(Obj.reprd)letunset_data(t:('k,'d)t):unit=ObjEph.unset_datatletcheck_data(t:('k,'d)t):bool=ObjEph.check_datatletblit_data(t1:(_,'d)t)(t2:(_,'d)t):unit=ObjEph.blit_datat1t2moduleMakeSeeded(H:Hashtbl.SeededHashedType)=GenHashTable.MakeSeeded(structtype'acontainer=(H.t,'a)ttypet=H.tarrayletcreatekd=letc=create(Array.lengthk)inset_datacd;fori=0toArray.lengthk-1doset_keycik.(i);done;clethashseedk=leth=ref0infori=0toArray.lengthk-1doh:=H.hashseedk.(i)*65599+!h;done;!hletequalck=letlen=Array.lengthkinletlen'=lengthciniflen!=len'thenGenHashTable.EFalseelseletrecequal_arraykci=ifi<0thenGenHashTable.ETrueelsematchget_keyciwith|None->GenHashTable.EDead|Someki->ifH.equalk.(i)kithenequal_arraykc(i-1)elseGenHashTable.EFalseinequal_arraykc(len-1)letget_data=get_dataletget_keyc=letlen=lengthciniflen=0thenSome[||]elsematchget_keyc0with|None->None|Somek0->letrecfillai=ifi<1thenSomeaelsematchget_keyciwith|None->None|Someki->a.(i)<-ki;filla(i-1)inleta=Array.makelenk0infilla(len-1)letset_key_datackd=unset_datac;fori=0toArray.lengthk-1doset_keycik.(i);done;set_datacdletcheck_keyc=letreccheckci=i<0||(check_keyci&&checkc(i-1))incheckc(lengthc-1)end)moduleMake(H:Hashtbl.HashedType):(Swithtypekey=H.tarray)=structincludeMakeSeeded(structtypet=H.tletequal=H.equallethash(_seed:int)x=H.hashxend)letcreatesz=create~random:falseszletof_seqi=lettbl=create16inreplace_seqtbli;tblendend