123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232(*****************************************************************************)(* *)(* Open Source License *)(* Copyright (c) 2021 Nomadic Labs <contact@nomadic-labs.com> *)(* *)(* Permission is hereby granted, free of charge, to any person obtaining a *)(* copy of this software and associated documentation files (the "Software"),*)(* to deal in the Software without restriction, including without limitation *)(* the rights to use, copy, modify, merge, publish, distribute, sublicense, *)(* and/or sell copies of the Software, and to permit persons to whom the *)(* Software is furnished to do so, subject to the following conditions: *)(* *)(* The above copyright notice and this permission notice shall be included *)(* in all copies or substantial portions of the Software. *)(* *)(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*)(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *)(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *)(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*)(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *)(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *)(* DEALINGS IN THE SOFTWARE. *)(* *)(*****************************************************************************)(* In order to keep this algorithm reasonably efficient even for large
expressions, it is written in a tail-recursive manner. This means that
no node can descend into a recursive call directly to diff its children.
Instead a queue of nodes to be processed is maintained and any children
a node might have are pushed into this queue to be processed later.
[dequeue] and [diff_step] functions are responsible for this.
This means that the algorithm also needs to maintain a state in order to
know how to build the resulting expression. This state has the form of
a stack (see [state] and [state_stack] types). Each item on the stack
represents a [Seq] or a [Prim] node currently being built. Multiple levels
that might exist on the stack represent nesting of those nodes.
Whenever a [Seq] or a [Prim] node is discovered in both versions of the
expression, a corresponding item is added to the stack and all the children
of both nodes are paired in order and pushed into the queue (see [diff_item]
type and [zip_nodes] function). The queue is organised in levels (represented
as a list of lists). When a level is finished, one level of the stack is
folded into a complete expression, which is then added to the accumulator a
level up, from where it'll be included in the encompassing expression when
that is finished (see [fold_stack_level] function).
Eventually there will be no more nodes to dequeue, at which point all the
remaining stack levels are folded and the final result is produced. *)openMichelineletrepr_node=function|Int(_,i)->Format.asprintf"%a"Z.pp_printi|String(_,s)->Format.sprintf"\"%s\""s|Bytes(_,b)->Format.sprintf"0x%s"(Bytes.to_stringb)|Prim(_,prim,_,_)->prim|Seq(_,_)->"{...}"letno_comment:Micheline_printer.location={comment=None}letadded=Micheline_printer.{comment=Some"+"}letremoved=Micheline_printer.{comment=Some"-"}letreplacedrepl=Micheline_printer.{comment=Some("-> "^repr_noderepl)}letrecreplace_location:'a.Micheline_printer.location->('a,string)node->Micheline_printer.node=funloc->function|Int(_,i)->Int(loc,i)|String(_,s)->String(loc,s)|Bytes(_,b)->Bytes(loc,b)|Prim(_,p,args,annots)->Prim(loc,p,List.map(replace_locationno_comment)args,annots)|Seq(_,es)->Seq(loc,List.map(replace_locationno_comment)es)letseqdiffnodes=Seq(diff,nodes)letprim~name?(annots=[])diffnodes=Prim(diff,name,nodes,annots)typenode=Micheline_printer.nodetype('a,'b)diff_item=Bothof'a*'b|Left_onlyof'a|Right_onlyof'btypenode_state={is_different:bool;accum:nodelist;constr:nodelist->node;}type('a,'b)state_level={queue:('a,'b)diff_itemlist;node_state:node_state;}(* Essentially a non-empty list. *)type('a,'b)state_stack=|Bottomof('a,'b)state_level|Levelof('a,'b)state_level*('a,'b)state_stackletinitial={queue=[];node_state={is_different=false;accum=[];constr=List.hd};}letreczip_nodes=function|[],[]->[]|p::prevs,[]->Left_onlyp::zip_nodes(prevs,[])|[],c::curs->Right_onlyc::zip_nodes([],curs)|p::prevs,c::curs->Both(p,c)::zip_nodes(prevs,curs)letadd_stack_level~constr~children~diffstate_stack=letMicheline_printer.{comment}=diffinletlevel={queue=zip_nodeschildren;node_state={is_different=Option.is_somecomment;accum=[];constr=constrdiff;};}inLevel(level,state_stack)letupdate_node_stateis_differentnodestate={statewithis_different=is_different||state.is_different;accum=node::state.accum;}letfold_state{is_different;accum;constr}=(is_different,constr@@List.revaccum)letaccumulate_child(is_different,node)=function|Bottom{queue;node_state}->Bottom{queue;node_state=update_node_stateis_differentnodenode_state}|Level({queue;node_state},stack)->Level({queue;node_state=update_node_stateis_differentnodenode_state},stack)letdiff_simpleprevcurstate=match(prev,cur)with|Int(_,p),Int(_,c)whenZ.equalpc->accumulate_child(false,Int(no_comment,p))state|String(_,p),String(_,c)whenString.equalpc->accumulate_child(false,String(no_comment,p))state|Bytes(_,p),Bytes(_,c)whenBytes.equalpc->accumulate_child(false,Bytes(no_comment,p))state(* This function won't be called with pairs (Seq, Seq) or (Prim, Prim),
so we don't care about looking inside those. This is taken care of
elsewhere. *)|prev,cur->accumulate_child(true,replace_location(replacedcur)prev)stateletrecdequeue=function|Bottom{queue=[];node_state}->fold_statenode_state|Bottom{queue=item::items;node_state}->(diff_step[@ocaml.tailcall])(Bottom{queue=items;node_state})item|Level({queue=[];node_state},stack)->(dequeue[@ocaml.tailcall])(accumulate_child(fold_statenode_state)stack)|Level({queue=item::items;node_state},stack)->(diff_step[@ocaml.tailcall])(Level({queue=items;node_state},stack))itemanddiff_stepstatenodes=dequeue@@matchnodeswith|Right_only(Seq(_,curs))->add_stack_level~constr:seq~children:([],curs)~diff:addedstate|Right_only(Prim(_,name,args,annots))->add_stack_level~constr:(prim~name~annots)~children:([],args)~diff:addedstate|Right_only((Int_|String_|Bytes_)asexpr)->accumulate_child(true,replace_locationaddedexpr)state|Left_only(Seq(_,prevs))->add_stack_level~constr:seq~children:(prevs,[])~diff:removedstate|Left_only(Prim(_,name,args,annots))->add_stack_level~constr:(prim~name~annots)~children:(args,[])~diff:removedstate|Left_only((Int_|String_|Bytes_)asexpr)->accumulate_child(true,replace_locationremovedexpr)state|Both(Seq(_,prevs),Seq(_,curs))->add_stack_level~constr:seq~children:(prevs,curs)~diff:no_commentstate|Both(Prim(_,prev_name,args_prev,annots),(Prim(_,cur_name,args_cur,_)ascur))->add_stack_level~constr:(prim~name:prev_name~annots)~children:(args_prev,args_cur)~diff:(ifprev_name=cur_namethenno_commentelsereplacedcur)state|Both(Seq(_,prevs),(Prim(_,_,args_cur,_)ascur))->add_stack_level~constr:seq~children:(prevs,args_cur)~diff:(replacedcur)state|Both(Prim(_,name,args_prev,annots),(Seq(_,curs)ascur))->add_stack_level~constr:(prim~name~annots)~children:(args_prev,curs)~diff:(replacedcur)state|Both((Seq(_,_)asprevs),((Int_|String_|Bytes_)ascur))->accumulate_child(true,replace_location(replacedcur)prevs)state|Both((Prim(_,_,_,_)asprev),((Int_|String_|Bytes_)ascur))->accumulate_child(true,replace_location(replacedcur)prev)state|Both(((Int_|String_|Bytes_)asprev),cur)->diff_simpleprevcurstateletdiff~prev~current()=letis_different,diff=diff_step(Bottominitial)(Both(prev,current))inifis_differentthenSomediffelseNone