123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614[@@@warning"-32"]typebigstring=(char,Bigarray.int8_unsigned_elt,Bigarray.c_layout)Bigarray.Array1.texternalget_int32:bigstring->int->int32="%caml_bigstring_get32"externalget_int64:bigstring->int->int64="%caml_bigstring_get64"externalget_int16:bigstring->int->int="%caml_bigstring_get16"externalswap32:int32->int32="%bswap_int32"externalswap64:int64->int64="%bswap_int64"externalswap16:int->int="%bswap16"letget_int16_be=ifSys.big_endianthenfunbufoff->get_int16bufoffelsefunbufoff->swap16(get_int16bufoff)letget_int64_be=ifSys.big_endianthenfunbufoff->get_int64bufoffelsefunbufoff->swap64(get_int64bufoff)externalstring_get_int16:string->int->int="%caml_string_get16"externalstring_get_int32:string->int->int32="%caml_string_get32"externalstring_get_int64:string->int->int64="%caml_string_get64"letstring_get_int16_be=ifSys.big_endianthenfunbufoff->string_get_int16bufoffelsefunbufoff->swap16(string_get_int16bufoff)letstring_get_int32_be=ifSys.big_endianthenfunbufoff->string_get_int32bufoffelsefunbufoff->swap32(string_get_int32bufoff)letstring_get_int64_be=ifSys.big_endianthenfunbufoff->string_get_int64bufoffelsefunbufoff->swap64(string_get_int64bufoff)letstring_get_int8si=Char.codes.[i]letget_int32_be=ifSys.big_endianthenfunbufoff->get_int32bufoffelsefunbufoff->swap32(get_int32bufoff)type'uididx={mp:bigstring;n:int;uid_ln:int;uid_rw:'uid->string;uid_wr:string->'uid;}andsub={off:int;len:int}andoptint=Optint.tletmake:bigstring->uid_ln:int->uid_rw:('uid->string)->uid_wr:(string->'uid)->'uididx=funmp~uid_ln~uid_rw~uid_wr->leti=get_int32_bemp0inletv=get_int32_bemp4inletn=get_int32_bemp(8+(255*4))inifi<>0xff744f63lthenFmt.invalid_arg"Invalid IDX file (header: %lx <> %lx)"i0xff744f63l;ifv<>0x2lthenFmt.invalid_arg"Invalid version of IDX file";{mp;n=Int32.to_intn;uid_ln;uid_rw;uid_wr}letcompare_bigstringidxahash=letps=ref0inletc1=ref0inletc2=ref0inletexceptionEqualintrywhilec1:=get_int16_beidx.mp(a.off+!ps);c2:=string_get_int16_behash!ps;!c1==!c2dops:=!ps+2;if!ps==idx.uid_lnthenraise_notraceEqualdone;letres0=(!c1land0xff)-(!c2land0xff)inletres1=(!c1asr8)-(!c2asr8)inifres1==0thenres0elseres1withEqual->0let(<->)ab=Int32.subabletfanout_offset=8lethashes_offset=8+(256*4)letbsearchidxhash=letn=string_get_int8hash0inleta=ifn=0then0lelseget_int32_beidx.mp(fanout_offset+(4*(n-1)))inletb=get_int32_beidx.mp(fanout_offset+(4*n))inletabs_off=hashes_offset+(Int32.to_inta*idx.uid_ln)inletlen=Int32.to_int(b<->a)*idx.uid_lninletrecgosub_offsub_len=letlen=sub_len/(2*idx.uid_ln)*idx.uid_lnin(* XXX(dinosaure): prevent a wrong comparison with something outside the
hashes table. *)ifsub_off+len=hashes_offset+(idx.uid_ln*idx.n)thenraise_notraceNot_found;letcmp=compare_bigstringidx{off=sub_off+len;len}hashinifcmp==0then{off=sub_off+len;len}elseifsub_len<=idx.uid_lnthenraise_notraceNot_foundelseifcmp>0then(go[@tailcall])sub_offlenelse(go[@tailcall])(sub_off+len)(sub_len-len)inlet{off;_}=goabs_offleninInt32.to_inta+((off-abs_off)/idx.uid_ln)(* XXX(dinosaure): FIXME! It does not work at some points. *)letisearchidxhash=letn=string_get_int8hash0inleta=ifn=0then0lelseget_int32_beidx.mp(fanout_offset+(4*(n-1)))inletb=get_int32_beidx.mp(fanout_offset+(4*n))inletabs_off=hashes_offset+(Int32.to_inta*idx.uid_ln)inletlen=Int32.to_int(b<->a<->1l)*idx.uid_lninlethashf=Int64.to_float(string_get_int64_behash0)inletuid_lnf=float_of_intidx.uid_lninletrecgolowhigh=iflow>highthenraise_notraceNot_found;iflow==high||low+idx.uid_ln==highthenletcmp=compare_bigstringidx{off=low;len=idx.uid_ln}hashinifcmp==0then{off=low;len=idx.uid_ln}elseraise_notraceNot_foundelseletlef=Int64.to_float(get_int64_beidx.mplow)inlethef=Int64.to_float(get_int64_beidx.mphigh)inletlowf=float_of_intlowinlethighf=float_of_inthighinletinterpolation=floor((highf-.lowf)*.(hashf-.lef)/.(hef-.lef))inletoff=lowf+.interpolation-.mod_floatinterpolationuid_lnfinletoff=int_of_floatoffinletcmp=compare_bigstringidx{off;len=idx.uid_ln}hashinifcmp==0then{off;len=idx.uid_ln}elseifcmp>0then(go[@tailcall])low(off-idx.uid_ln)else(go[@tailcall])(off+idx.uid_ln)highiniflen<0thenraise_notraceNot_found;let{off;_}=goabs_off(abs_off+len)inInt32.to_inta+((off-abs_off)/idx.uid_ln)letfindidxhash=lethash=idx.uid_rwhashinmatchbsearchidxhashwith|n->letcrcs_offset=8+(256*4)+(idx.n*idx.uid_ln)inletvalues_offset=8+(256*4)+(idx.n*idx.uid_ln)+(idx.n*4)inletcrc=get_int32_beidx.mp(crcs_offset+(n*4))inletoff=get_int32_beidx.mp(values_offset+(n*4))inSome(Optint.of_int32crc,Int64.of_int32off)|exceptionNot_found->Noneletexistsidxuid=letuid=idx.uid_rwuidinmatchbsearchidxuidwith_->true|exceptionNot_found->falseletget_uididxn=letres=Bytes.createidx.uid_lninBigstringaf.blit_to_bytesidx.mp~src_off:(hashes_offset+(n*idx.uid_ln))res~dst_off:0~len:idx.uid_ln;idx.uid_wr(Bytes.unsafe_to_stringres)letget_offsetidxn=letvalues_offset=8+(256*4)+(idx.n*idx.uid_ln)+(idx.n*4)inInt64.of_int32(get_int32_beidx.mp(values_offset+(n*4)))letget_crcidxn=letcrcs_offset=8+(256*4)+(idx.n*idx.uid_ln)inOptint.of_int32(get_int32_beidx.mp(crcs_offset+(n*4)))letmax{n;_}=nletiter~fidx=letrecgon=ifn==idx.nthen()elseletuid=get_uididxninletoffset=get_offsetidxninletcrc=get_crcidxninf~uid~offset~crc;go(succn)ingo0moduletypeUID=sigtypettypectxvalempty:ctxvalfeed:ctx->?off:int->?len:int->bigstring->ctxvalget:ctx->tvalcompare:t->t->intvallength:intvalto_raw_string:t->stringvalpp:tFmt.tendtype'uidentry={crc:optint;offset:int64;uid:'uid}moduleN(Uid:UID):sigtypeencodertypedst=[`Channelofout_channel|`BufferofBuffer.t|`Manual]valencoder:dst->pack:Uid.t->Uid.tentryarray->encodervalencode:encoder->[`Await]->[`Partial|`Ok]valdst_rem:encoder->intvaldst:encoder->Bigstringaf.t->int->int->unitend=structtypedst=[`Channelofout_channel|`BufferofBuffer.t|`Manual]typeencoder={dst:dst;mutableo:bigstring;mutableo_off:int;mutableo_pos:int;mutableo_max:int;t:bigstring;mutablet_pos:int;mutablet_max:int;mutablen:int;fanout:intarray;index:Uid.tentryarray;pack:Uid.t;mutablectx:Uid.ctx;mutablek:encoder->[`Await]->[`Partial|`Ok];}letdstesjl=ifj<0||l<0||j+l>Bigstringaf.lengthsthenFmt.invalid_arg"Out of bounds (off: %d, len: %d)"jl;e.o<-s;e.o_off<-j;e.o_pos<-j;e.o_max<-j+l-1letpartialke=function`Await->keletflush_with_ctxke=matche.dstwith|`Manual->letctx=Uid.feede.ctx~off:e.o_off~len:e.o_pose.oine.ctx<-ctx;e.k<-partialk;`Partial|`Channeloc->letraw=Bigstringaf.substringe.o~off:0~len:e.o_posinletctx=Uid.feede.ctx~off:e.o_off~len:e.o_pose.oinoutput_stringocraw;e.o_pos<-0;e.ctx<-ctx;ke|`Bufferb->letraw=Bigstringaf.substringe.o~off:0~len:e.o_posinletctx=Uid.feede.ctx~off:e.o_off~len:e.o_pose.oinBuffer.add_stringbraw;e.o_pos<-0;e.ctx<-ctx;keletflush_without_ctxke=matche.dstwith|`Manual->e.k<-partialk;`Partial|`Channeloc->letraw=Bigstringaf.substringe.o~off:e.o_off~len:e.o_posinoutput_stringocraw;e.o_pos<-0;ke|`Bufferb->letraw=Bigstringaf.substringe.o~off:e.o_off~len:e.o_posinBuffer.add_stringbraw;e.o_pos<-0;keleto_reme=e.o_max-e.o_pos+1lett_rangeem=e.t_pos<-0;e.t_max<-mletrect_flush?(with_ctx=true)ke=letblitel=Bigstringaf.blite.t~src_off:e.t_pose.o~dst_off:e.o_pos~len:l;e.o_pos<-e.o_pos+l;e.t_pos<-e.t_pos+linletrem=o_remeinletlen=e.t_max-e.t_pos+1inletflush=ifwith_ctxthenflush_with_ctxelseflush_without_ctxinifrem<lenthen(bliterem;flush(t_flushk)e)else(blitelen;ke)letoke=e.k<-(fun_`Await->`Ok);`Okletencode_traile`Await=letk2e=flush_without_ctxokeinletk1e=letrem=o_remeinlets,j,k=ifrem<Uid.lengththen(t_rangee(Uid.length-1);e.t,0,t_flush~with_ctx:falsek2)elseletj=e.o_posine.o_pos<-e.o_pos+Uid.length;e.o,j,k2inletuid=Uid.gete.ctxinletuid=Uid.to_raw_stringuidinBigstringaf.blit_from_stringuid~src_off:0s~dst_off:j~len:Uid.length;keinletk0e=flush_with_ctxk1einletrem=o_remeinlets,j,k=ifrem<Uid.lengththen(t_rangee(Uid.length-1);e.t,0,t_flushk0)elseletj=e.o_posine.o_pos<-e.o_pos+Uid.length;e.o,j,k0inletuid=Uid.to_raw_stringe.packinBigstringaf.blit_from_stringuid~src_off:0s~dst_off:j~len:Uid.length;keletrecencode_offsete`Await=letke=ife.n+1==Array.lengthe.indexthen(e.n<-0;encode_traile`Await)else(e.n<-succe.n;encode_offsete`Await)inletrem=o_remeinlets,j,k=ifrem<4then(t_rangee3;e.t,0,t_flushk)elseletj=e.o_posine.o_pos<-e.o_pos+4;e.o,j,kinlet{offset;_}=e.index.(e.n)inBigstringaf.set_int32_besj(Int64.to_int32offset);keletrecencode_crce`Await=letke=ife.n+1==Array.lengthe.indexthen(e.n<-0;encode_offsete`Await)else(e.n<-succe.n;encode_crce`Await)inletrem=o_remeinlets,j,k=ifrem<4then(t_rangee3;e.t,0,t_flushk)elseletj=e.o_posine.o_pos<-e.o_pos+4;e.o,j,kinlet{crc;_}=e.index.(e.n)inBigstringaf.set_int32_besj(Optint.to_int32crc);keletrecencode_hashe`Await=letke=ife.n+1==Array.lengthe.indexthen(e.n<-0;encode_crce`Await)else(e.n<-succe.n;encode_hashe`Await)inletrem=o_remeinlets,j,k=ifrem<Uid.lengththen(t_rangee(Uid.length-1);e.t,0,t_flushk)elseletj=e.o_posine.o_pos<-e.o_pos+Uid.length;e.o,j,kinlet{uid;_}=e.index.(e.n)inBigstringaf.blit_from_string(Uid.to_raw_stringuid)~src_off:0s~dst_off:j~len:Uid.length;keletrecencode_fanoute`Await=letke=ife.n+1==256then(e.n<-0;ifArray.lengthe.index>0thenencode_hashe`Awaitelseencode_traile`Await)else(e.n<-succe.n;encode_fanoute`Await)inletrem=o_remeinlets,j,k=ifrem<4then(t_rangee3;e.t,0,t_flushk)elseletj=e.o_posine.o_pos<-e.o_pos+4;e.o,j,kinletx=letacc=ref0infori=0toe.ndoacc:=!acc+e.fanout.(i)done;!accinBigstringaf.set_int32_besj(Int32.of_intx);keletencode_headere`Await=letke=e.n<-0;encode_fanoute`Awaitinletrem=o_remeinlets,j,k=ifrem<8then(t_rangee8;e.t,0,t_flushk)elseletj=e.o_posine.o_pos<-e.o_pos+8;e.o,j,kinBigstringaf.set_int32_besj0xff744f63l;Bigstringaf.set_int32_bes(j+4)0x2l;keletio_buffer_size=65536letencoderdst~packindex=Array.sort(fun{uid=a;_}{uid=b;_}->Uid.compareab)index;letfanout=Array.make2560inArray.iter(fun{uid;_}->letn=Char.code(Uid.to_raw_stringuid).[0]infanout.(n)<-fanout.(n)+1)index;leto,o_pos,o_max=matchdstwith|`Manual->Bigstringaf.empty,1,0|`Buffer_|`Channel_->Bigstringaf.createio_buffer_size,0,io_buffer_size-1in{dst;o;o_off=0;o_pos;o_max;t=Bigstringaf.createUid.length;t_pos=1;t_max=0;n=0;fanout;index;pack;ctx=Uid.empty;k=encode_header;}letdst_rem=o_remletencodee=e.keendmoduleDevice=structtype'uidvalue=Bigstringaf.ttypekey=Keytypeuid=keyreftype'uidt=(keyref,'uidvalueref)Ephemeron.K1.tletdevice()=Ephemeron.K1.create()letcreatetbl=letkey=refKeyinEphemeron.K1.set_keytblkey;Ephemeron.K1.set_datatbl(refBigstringaf.empty);keyletprojecttbluid=assert(Ephemeron.K1.get_keytbl=Someuid);matchStdlib.Option.get(Ephemeron.K1.get_datatbl)with|{contents=v}->vendmoduleM(IO:sigtype+'atvalbind:'at->('a->'bt)->'btvalreturn:'a->'atend)(Uid:sigincludeUIDvalof_raw_string:string->tvalnull:tend)=structopenIOlet(>>=)xf=bindxftypefd={mutablebuffer:Bigstringaf.t;mutablecapacity:int;mutablelength:int;}letenlargefdmore=let_old_length=fd.lengthinletold_capacity=fd.capacityinletnew_capacity=refold_capacityinwhileold_capacity+more>!new_capacitydonew_capacity:=2*!new_capacitydone;if!new_capacity>Sys.max_string_lengththenifold_capacity+more<=Sys.max_string_lengththennew_capacity:=Sys.max_string_lengthelsefailwith"Too big buffer";letnew_buffer=Bigstringaf.create!new_capacityinBigstringaf.blitfd.buffer~src_off:0new_buffer~dst_off:0~len:fd.length;fd.buffer<-new_buffer;fd.capacity<-!new_capacity;(* XXX(dinosaure): these asserts wants to rely on some assumptions
even if we use [enlarge] into a preemptive thread as [Stdlib.Buffer].
However, with [lwt], it should be fine to use it and avoid these
assertions. *)(* assert (fd.position + more <= fd.capacity) ; *)(* assert (old_length + more <= fd.capacity) ; *)()typet=Uid.tDevice.ttypeuid=Device.uidtypeerror=[`Already_computed]letpp_errorppf=function|`Already_computed->Fmt.stringppf"IDX already computed"letcreatetbluid=assert(Ephemeron.K1.get_keytbl=Someuid);(* Ephemeron.K1.set_data tbl (ref Bigstringaf.empty); *)return(Ok{buffer=Bigstringaf.create0x1000;capacity=0x1000;length=0})letappend_fdstr=letlen=String.lengthstrinletnew_length=fd.length+leninifnew_length>fd.capacitythenenlargefdlen;Bigstringaf.blit_from_stringstr~src_off:0fd.buffer~dst_off:fd.length~len;fd.length<-new_length;IO.return()letclosetblfd=letresult=Bigstringaf.subfd.buffer~off:0~len:fd.lengthin(matchEphemeron.K1.get_datatblwith|Somevalue->value:=result|None->assertfalse);IO.return(Ok())end