123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583(*****************************************************************************)(* *)(* Open Source License *)(* Copyright (c) 2022 Nomadic Labs <contact@nomadic-labs.com> *)(* *)(* Permission is hereby granted, free of charge, to any person obtaining a *)(* copy of this software and associated documentation files (the "Software"),*)(* to deal in the Software without restriction, including without limitation *)(* the rights to use, copy, modify, merge, publish, distribute, sublicense, *)(* and/or sell copies of the Software, and to permit persons to whom the *)(* Software is furnished to do so, subject to the following conditions: *)(* *)(* The above copyright notice and this permission notice shall be included *)(* in all copies or substantial portions of the Software. *)(* *)(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*)(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *)(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *)(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*)(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *)(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *)(* DEALINGS IN THE SOFTWARE. *)(* *)(*****************************************************************************)moduletypeMONAD=sigtype'atvalbind:'at->('a->'bt)->'btvalreturn:'a->'atendmoduletypeS=sigtype('content,'ptr)cellvalpp:pp_ptr:(Format.formatter->'ptr->unit)->pp_content:(Format.formatter->'content->unit)->Format.formatter->('content,'ptr)cell->unitvalequal:('ptr->'ptr->bool)->('content->'content->bool)->('content,'ptr)cell->('content,'ptr)cell->boolvalencoding:'ptrData_encoding.t->'contentData_encoding.t->('content,'ptr)cellData_encoding.tvalindex:(_,_)cell->Z.tvalcontent:('content,'ptr)cell->'contentvalback_pointer:('content,'ptr)cell->int->'ptroptionvalback_pointers:('content,'ptr)cell->'ptrlistvalgenesis:'content->('content,'ptr)cellvalnext:prev_cell:('content,'ptr)cell->prev_cell_ptr:'ptr->'content->('content,'ptr)celltype('ptr,'content)search_cell_result=|Foundof('ptr,'content)cell|Nearestof{lower:('ptr,'content)cell;upper:('ptr,'content)celloption;}|No_exact_or_lower_ptr|Deref_returned_nonetype('ptr,'content)search_result={rev_path:('ptr,'content)celllist;last_cell:('ptr,'content)search_cell_result;}valpp_search_result:pp_cell:(Format.formatter->('ptr,'content)cell->unit)->Format.formatter->('ptr,'content)search_result->unitmoduletypeMONADIC=sigtype'aresultvalfind:deref:('ptr->('content,'ptr)celloptionresult)->cell_ptr:'ptr->target_index:Z.t->('content,'ptr)celloptionresultvalback_path:deref:('ptr->('content,'ptr)celloptionresult)->cell_ptr:'ptr->target_index:Z.t->'ptrlistoptionresultvalvalid_back_path:equal_ptr:('ptr->'ptr->bool)->deref:('ptr->('content,'ptr)celloptionresult)->cell_ptr:'ptr->target_ptr:'ptr->'ptrlist->boolresultvalsearch:deref:('ptr->('content,'ptr)celloptionresult)->compare:('content->int)->cell:('content,'ptr)cell->('content,'ptr)search_resultresultendincludeMONADICwithtype'aresult:='amoduleLwt:MONADICwithtype'aresult:='aLwt.tmoduleMake_monadic(M:MONAD):MONADICwithtype'aresult:='aM.tendmoduleMake(Parameters:sigvalbasis:intend):S=structlet()=assert(Compare.Int.(Parameters.basis>=2))openParameters(*
A cell of a skip list with some [`content] and back pointers of
type [`ptr].
Invariants
----------
- back_pointers[i]
= Some (pointer to (index - (index mod (basis ** i)) - 1))
(for all i < length back_pointers)
- length back_pointers = log basis index
Notes
-----
- The [index] field is not strictly required but helps in making
the data structure more robust. Indeed, otherwise, we should
also ask the client to provide the index of the cell to be
built, which can be error-prone.
- The back pointers of a cell are chosen from the back pointers of
its predecessor (except for the genesis cell) and a pointer to this
predecessor. This locality makes the insertion of new cell very
efficient in practice.
*)type('content,'ptr)cell={content:'content;back_pointers:'ptroptionFallbackArray.t;index:Z.t;}letequalequal_ptrequal_contentcell1cell2=letequal_back_pointersb1b2=letopenFallbackArrayinCompare.Int.(lengthb1=lengthb2)&&fst@@fold(fun(equal,i)h1->(equal&&Option.equalequal_ptrh1(getb2i),i+1))b1(true,0)inlet{content;back_pointers;index}=cell1inequal_contentcontentcell2.content&&Compare.Z.equalindexcell2.index&&equal_back_pointersback_pointerscell2.back_pointersletindexcell=cell.indexletback_pointers_to_lista=FallbackArray.fold(funl->function|Someptr->ptr::l|None->(* By [cell] invariants. *)assertfalse)a[]|>List.revletpp~pp_ptr~pp_contentfmt{content;back_pointers;index}=Format.fprintffmt"content: %a@,index: %s@,@[<hv 2>back_pointers:@ %a@]"pp_contentcontent(Z.to_stringindex)(Format.pp_print_list~pp_sep:(funfmt()->Format.pp_print_stringfmt"; ")pp_ptr)(back_pointers_to_listback_pointers)letencodingptr_encodingcontent_encoding=letof_list=FallbackArray.of_list~fallback:None~proj:(func->Somec)inletto_list=back_pointers_to_listinletopenData_encodinginconv(fun{index;content;back_pointers}->(index,content,to_listback_pointers))(fun(index,content,back_pointers)->{index;content;back_pointers=of_listback_pointers})(obj3(req"index"n)(req"content"content_encoding)(req"back_pointers"(listptr_encoding)))letcontentcell=cell.contentletback_pointerscell=back_pointers_to_listcell.back_pointersletgenesiscontent={index=Z.zero;content;back_pointers=FallbackArray.make0None}letback_pointercelli=FallbackArray.getcell.back_pointersi(* Precondition: i < length cell.back_pointers *)letback_pointer_unsafecelli=matchFallbackArray.getcell.back_pointersiwith|Someptr->ptr|None->(* By precondition and invariants of cells. *)assertfalseletnext~prev_cell~prev_cell_ptrcontent=letindex=Z.succprev_cell.indexinletback_pointers=letrecauxpoweraccui=ifCompare.Z.(index<power)thenList.revaccuelseletback_pointer_i=ifCompare.Z.(Z.remindexpower=Z.zero)thenprev_cell_ptrelse(* The following call is valid because of
- [i < List.length prev_cell.back_pointer]
because [log_basis index = log_basis prev_cell.index]
- the invariants of [prev_cell] *)back_pointer_unsafeprev_celliinletaccu=back_pointer_i::accuinauxZ.(mulpower(of_intbasis))accu(i+1)inauxZ.one[]0inletback_pointers=FallbackArray.of_list~fallback:None~proj:Option.someback_pointersin{index;content;back_pointers}(* returns the array of [basis^i] forall [i < len (back_pointers cell)] *)letlist_powerscell=letrecauxnprevp=ifCompare.Int.(n<=0)thenList.revpelseaux(n-1)(basis*prev)(prev::p)inFallbackArray.of_list~fallback:0~proj:(funx->x)(aux(FallbackArray.lengthcell.back_pointers)1[])(*
[back_pointers] are sorted in decreasing order of their pointing cell index
in the list. So we can do a [binary_search] to find the [cell] with the
smallest index that is greater than [target] in the list.
More formally, min({c : cell | c.index >= target.index}) where [c] is one of
the pointed cells in the array of back pointers of the [cell] parameter.
*)letbest_skipcelltarget_indexpowers=letopenFallbackArrayinletpointed_cell_indexi=Z.(pred@@subcell.index(remcell.index(of_int(getpowersi))))in(* cell.index - (cell.index mod get powers i) - 1 in *)letrecbinary_searchstart_idxend_idx=ifCompare.Int.(start_idx>=end_idx)thenSomestart_idxelseletmid_idx=start_idx+((end_idx-start_idx)/2)inletmid_cell_index=pointed_cell_indexmid_idxinifCompare.Z.(mid_cell_index=target_index)thenSomemid_idxelseifCompare.Z.(mid_cell_index<target_index)thenbinary_searchstart_idx(mid_idx-1)elseletprev_mid_cell_index=pointed_cell_index(mid_idx+1)inifCompare.Z.(prev_mid_cell_index=target_index)thenSome(mid_idx+1)elseifCompare.Z.(prev_mid_cell_index<target_index)then(*
If (mid_cell_index > target_index) &&
(prev_mid_cell_index < target_index)
then we found the closest cell to the target, which is mid_cell,
so we return its index [mid_idx] in the array of back_pointers.
*)Somemid_idxelsebinary_search(mid_idx+1)end_idxinbinary_search0(lengthcell.back_pointers-1)type('ptr,'content)search_cell_result=|Foundof('ptr,'content)cell|Nearestof{lower:('ptr,'content)cell;upper:('ptr,'content)celloption;}|No_exact_or_lower_ptr|Deref_returned_nonetype('ptr,'content)search_result={rev_path:('ptr,'content)celllist;last_cell:('ptr,'content)search_cell_result;}letpp_rev_path~pp_cell=Format.pp_print_list~pp_sep:Format.pp_print_spacepp_cellletpp_search_cell_result~pp_cellfmt=function|Foundcell->Format.fprintffmt"Found(%a)"pp_cellcell|Nearest{lower;upper}->Format.fprintffmt"Nearest(lower=%a;upper=%a)"pp_celllower(Format.pp_print_optionpp_cell)upper|No_exact_or_lower_ptr->Format.fprintffmt"No_exact_or_lower_ptr"|Deref_returned_none->Format.fprintffmt"Deref_returned_none"letpp_search_result~pp_cellfmt{rev_path;last_cell}=Format.fprintffmt"{rev_path = %a; last_point = %a}"(pp_rev_path~pp_cell)rev_path(pp_search_cell_result~pp_cell)last_cellmoduletypeMONADIC=sigtype'aresultvalfind:deref:('ptr->('content,'ptr)celloptionresult)->cell_ptr:'ptr->target_index:Z.t->('content,'ptr)celloptionresultvalback_path:deref:('ptr->('content,'ptr)celloptionresult)->cell_ptr:'ptr->target_index:Z.t->'ptrlistoptionresultvalvalid_back_path:equal_ptr:('ptr->'ptr->bool)->deref:('ptr->('content,'ptr)celloptionresult)->cell_ptr:'ptr->target_ptr:'ptr->'ptrlist->boolresultvalsearch:deref:('ptr->('content,'ptr)celloptionresult)->compare:('content->int)->cell:('content,'ptr)cell->('content,'ptr)search_resultresultendmoduleMake_monadic(M:MONAD):MONADICwithtype'aresult:='aM.t=structmoduleMonad_syntax=structincludeMlet(let*)=bindmoduleOption=structlet(return[@ocaml.inline"always"])=funx->M.return(Somex)let(let*)lof=M.bindlo(functionNone->M.returnNone|Somex->fx)let(let*?)of=matchowithSomex->fx|None->M.returnNoneendendletrev_back_path~deref~cell_ptr~target_index=letopenMonad_syntax.Optioninlet*cell=derefcell_ptrinletpowers=list_powerscellinletrecauxpathptr=letpath=ptr::pathinlet*cell=derefptrinletindex=cell.indexinifCompare.Z.(target_index=index)thenreturnpathelseifCompare.Z.(target_index>index)thenM.returnNoneelselet*?best_idx=best_skipcelltarget_indexpowersinlet*?ptr=back_pointercellbest_idxinauxpathptrinaux[]cell_ptrletfind~deref~cell_ptr~target_index=letopenMonad_syntax.Optioninlet*rev_back_path=rev_back_path~deref~cell_ptr~target_indexinlet*?cell_ptr=List.hdrev_back_pathinderefcell_ptrletback_path~deref~cell_ptr~target_index=letopenMonad_syntax.Optioninlet*rev_back_path=rev_back_path~deref~cell_ptr~target_indexinreturn(List.revrev_back_path)letmemequalxl=letopenFallbackArrayinletn=lengthlinletrecauxidx=ifCompare.Int.(idx>=n)thenfalseelsematchgetlidxwith|None->aux(idx+1)|Somey->ifequalxythentrueelseaux(idx+1)inaux0letassume_someof=letopenMonad_syntaxinlet*o=oinmatchowithNone->returnfalse|Somex->fxletvalid_back_path~equal_ptr~deref~cell_ptr~target_ptrpath=letopenMonad_syntaxinassume_some(dereftarget_ptr)@@funtarget->assume_some(derefcell_ptr)@@funcell->lettarget_index=indextargetandcell_index=indexcellandpowers=list_powerscellinletrecvalid_pathindexcell_ptrpath=match(cell_ptr,path)with|final_cell,[]->return(equal_ptrtarget_ptrfinal_cell&&Compare.Z.(index=target_index))|cell_ptr,cell_ptr'::path->assume_some(derefcell_ptr)@@funcell->assume_some(derefcell_ptr')@@funcell'->ifmemequal_ptrcell_ptr'cell.back_pointersthenassume_some(return@@best_skipcelltarget_indexpowers)@@funbest_idx->assume_some(return@@back_pointercellbest_idx)@@funbest_ptr->letminimal=equal_ptrbest_ptrcell_ptr'inletindex'=cell'.indexinifminimalthenvalid_pathindex'cell_ptr'pathelsereturnfalseelsereturnfalseinmatchpathwith|[]->returnfalse|first_cell_ptr::path->ifequal_ptrfirst_cell_ptrcell_ptrthenvalid_pathcell_indexcell_ptrpathelsereturnfalseletsearch(typeptr)~(deref:ptr->('content,ptr)celloptionM.t)~compare~cell=letopenMonad_syntaxinlet(=),(<),(>)=Compare.Int.((=),(<),(>))in(* Given a cell, to compute the minimal path, we need to find the
good back-pointer. This is done linearly with respect to the
number of back-pointers. This number of back-pointers is
logarithmic with respect to the number of non-empty
inboxes. The complexity is consequently in O(log_2^2(n)). Since
in practice, [n < 2^32], we have at most [1000] calls. Besides,
the recursive function is tail recursive.
The linear search could be turned into a dichotomy search if
necessary. But since this piece of code won't be used in a
carbonated function, we prefer to keep a simple implementation
for the moment. *)letrecauxrev_pathcellix=(* Below, we call the [target] the cell for which [compare target = 0]. *)(* Invariant:
- compare cell > target
- ix >= 0
- if cell <> genesis => ix < List.length (back_pointers cell)
- \exists path' rev_path = cell:path'
*)letback_pointers_length=FallbackArray.lengthcell.back_pointersinifback_pointers_length=0then(* [cell] is the genesis cell. *)return{rev_path;last_cell=No_exact_or_lower_ptr}elseletcandidate_ptr=matchback_pointercellixwith|None->(* At this point we have [cell <> genesis]. Consequently,
thanks to the invariant of this function, we have [ix
< List.length (back_pointers cell)]. Consequently, the
call to [back_pointer] cannot fail. *)assertfalse|Somecandidate_ptr->candidate_ptrinlet*derefed=derefcandidate_ptrinmatchderefedwith|None->(* If we cannot dereference a pointer, We stop the search
and returns the current path. *)return{rev_path;last_cell=Deref_returned_none}|Somenext_cell->(letcomparison=comparenext_cell.contentinifcomparison=0then(* We have found the target.*)letrev_path=next_cell::rev_pathinreturn{rev_path;last_cell=Foundnext_cell}elseifcomparison>0thenifix<back_pointers_length-1then(* There might be a short path by dereferencing the next pointer. *)auxrev_pathcell(ix+1)else(* The last pointer is still above the target. We are on the good track, *)letrev_path=next_cell::rev_pathinauxrev_pathnext_cell0elseifix=0then(* We found a cell lower than the target. *)(* The first back pointers gives a cell below the target *)letrev_path=next_cell::rev_pathinreturn{rev_path;last_cell=Nearest{lower=next_cell;upper=Somecell};}else(* We found a cell lower than the target. *)(* The previous pointer was actually the good one. *)letgood_candidate_ptr=matchback_pointercell(ix-1)with|None->assertfalse|Somecandidate_ptr->candidate_ptrinlet*derefed=derefgood_candidate_ptrinmatchderefedwith|None->(* We already dereferenced this pointer before. *)assertfalse|Somegood_next_cell->letrev_path=good_next_cell::rev_pathinauxrev_pathgood_next_cell0)inletcomparison=comparecell.contentinifCompare.Int.(comparison=0)then(* Particular case where the target is the start cell. *)return{rev_path=[cell];last_cell=Foundcell}elseifCompare.Int.(comparison<0)thenreturn{rev_path=[cell];last_cell=Nearest{lower=cell;upper=None}}elseaux[cell]cell0endincludeMake_monadic(structtype'at='alet(bind[@ocaml.inline"always"])=(|>)let[@ocaml.inlinealways]returnx=xend)moduleLwt=Make_monadic(Lwt)end