123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463(**************************************************************************)(* *)(* OCaml *)(* *)(* Gabriel Radanne, projet Cambium, Inria Paris *)(* *)(* Copyright 2020 Institut National de Recherche en Informatique et *)(* en Automatique. *)(* *)(* All rights reserved. This file is distributed under the terms of *)(* the GNU Lesser General Public License version 2.1, with the *)(* special exception on linking described in the file LICENSE. *)(* *)(**************************************************************************)[@@@warning"-16"](* This module implements a modified version of Wagner-Fischer
See <https://en.wikipedia.org/wiki/Wagner%E2%80%93Fischer_algorithm>
for preliminary reading.
The main extensions is that:
- State is computed based on the optimal patch so far.
- The lists can be extended at each state computation.
We add the constraint that extensions can only be in one side
(either the left or right list). This is enforced by the external API.
*)(** Shared types *)typechange_kind=|Deletion|Insertion|Modification|Preservationletstyle=function|Preservation->Misc.Style.[FGGreen]|Deletion->Misc.Style.[FGRed;Bold]|Insertion->Misc.Style.[FGRed;Bold]|Modification->Misc.Style.[FGMagenta;Bold]letprefixppf(pos,p)=letopenFormat_docinletsty=stylepinpp_open_stagppf(Misc.Style.Stylesty);fprintfppf"%i. "pos;pp_close_stagppf()let(let*)=Option.bindlet(let+)xf=Option.mapfxlet(let*!)xf=Option.iterfxmoduletypeDefs=sigtypelefttyperighttypeeqtypedifftypestateendtype('left,'right,'eq,'diff)change=|Deleteof'left|Insertof'right|Keepof'left*'right*'eq|Changeof'left*'right*'diffletclassify=function|Delete_->Deletion|Insert_->Insertion|Change_->Modification|Keep_->PreservationmoduleDefine(D:Defs)=structopenDtypenonrecchange=(left,right,eq,diff)changetypepatch=changelistmoduletypeS=sigvaldiff:state->leftarray->rightarray->patchendtypefull_state={line:leftarray;column:rightarray;state:state}(* The matrix supporting our dynamic programming implementation.
Each cell contains:
- The diff and its weight
- The state computed so far
- The lists, potentially extended locally.
The matrix can also be reshaped.
*)moduleMatrix:sigtypeshape={l:int;c:int}typetvalmake:shape->tvalreshape:shape->t->t(** accessor functions *)valdiff:t->int->int->changeoptionvalstate:t->int->int->full_stateoptionvalweight:t->int->int->intvalline:t->int->int->leftoptionvalcolumn:t->int->int->rightoptionvalset:t->int->int->diff:changeoption->weight:int->state:full_state->unit(** the shape when starting filling the matrix *)valshape:t->shape(** [shape m i j] is the shape as seen from the state at position (i,j)
after some possible extensions
*)valshape_at:t->int->int->shapeoption(** the maximal shape on the whole matrix *)valreal_shape:t->shape(** debugging printer *)val[@warning"-32"]pp:Format.formatter->t->unitend=structtypeshape={l:int;c:int}typet={states:full_stateoptionarrayarray;weight:intarrayarray;diff:changeoptionarrayarray;columns:int;lines:int;}letopt_getan=ifn<Array.lengthathenSome(Array.unsafe_getan)elseNoneletlinemij=let*st=m.states.(i).(j)inopt_getst.lineiletcolumnmij=let*st=m.states.(i).(j)inopt_getst.columnjletdiffmij=m.diff.(i).(j)letweightmij=m.weight.(i).(j)letstatemij=m.states.(i).(j)letshapem={l=m.lines;c=m.columns}letsetmij~diff~weight~state=m.weight.(i).(j)<-weight;m.states.(i).(j)<-Somestate;m.diff.(i).(j)<-diff;()letshape_attblij=let+st=tbl.states.(i).(j)inletl=Array.lengthst.lineinletc=Array.lengthst.columnin{l;c}letreal_shapetbl=letlines=reftbl.linesinletcolumns=reftbl.columnsinfori=0totbl.linesdoforj=0totbl.columnsdolet*!{l;c}=shape_attblijinifl>!linesthenlines:=l;ifc>!columnsthencolumns:=cdone;done;{l=!lines;c=!columns}letmake{l=lines;c=columns}={states=Array.make_matrix(lines+1)(columns+1)None;weight=Array.make_matrix(lines+1)(columns+1)max_int;diff=Array.make_matrix(lines+1)(columns+1)None;lines;columns;}letreshape{l=lines;c=columns}m=letcopydefaulta=Array.init(1+lines)(funi->Array.init(1+columns)(funj->ifi<=m.lines&&j<=m.columnsthena.(i).(j)elsedefault))in{states=copyNonem.states;weight=copymax_intm.weight;diff=copyNonem.diff;lines;columns}letppppfm=let{l;c}=shapeminFormat.eprintf"Shape : %i, %i@."lc;fori=0toldoforj=0tocdoletd=diffmijinmatchdwith|None->Format.fprintfppf" "|Somediff->letsdiff=matchdiffwith|Insert_->"\u{2190}"|Delete_->"\u{2191}"|Keep_->"\u{2196}"|Change_->"\u{21F1}"inletw=weightmijinFormat.fprintfppf"%s%i "sdiffwdone;Format.pp_print_newlineppf()doneend(* Building the patch.
We first select the best final cell. A potential final cell
is a cell where the local shape (i.e., the size of the strings) correspond
to its position in the matrix. In other words: it's at the end of both its
strings. We select the final cell with the smallest weight.
We then build the patch by walking backward from the final cell to the
origin.
*)letselect_final_statem0=letmaybe_finalij=matchMatrix.shape_atm0ijwith|Someshape_here->shape_here.l=i&&shape_here.c=j|None->falseinletbest_state(i0,j0,weigth0)(i,j)=letweight=Matrix.weightm0ijinifweight<weigth0then(i,j,weight)else(i0,j0,weigth0)inletres=ref(0,0,max_int)inletshape=Matrix.shapem0infori=0toshape.ldoforj=0toshape.cdoifmaybe_finalijthenres:=best_state!res(i,j)donedone;leti_final,j_final,_=!resinassert(i_final<>0||j_final<>0);(i_final,j_final)letconstruct_patchm0=letrecauxacc(i,j)=ifi=0&&j=0thenaccelsematchMatrix.diffm0ijwith|None->assertfalse|Somed->letnext=matchdwith|Keep_|Change_->(i-1,j-1)|Delete_->(i-1,j)|Insert_->(i,j-1)inaux(d::acc)nextinaux[](select_final_statem0)(* Computation of new cells *)letselect_best_propositionl=letcompare_propositioncurrprop=matchcurr,propwith|None,o|o,None->o|Some(curr_m,curr_res),Some(m,res)->Some(ifcurr_m<=mthencurr_m,curr_reselsem,res)inList.fold_leftcompare_propositionNonelmoduletypeFull_core=sigtypeupdate_resulttypeupdate_statevalweight:change->intvaltest:state->left->right->(eq,diff)resultvalupdate:change->update_state->update_resultendmoduleGeneric(X:Full_corewithtypeupdate_result:=full_stateandtypeupdate_state:=full_state)=structopenX(* Boundary cell update *)letcompute_column0tbli=let*!st=Matrix.statetbl(i-1)0inlet*!line=Matrix.linetbl(i-1)0inletdiff=DeletelineinMatrix.settbli0~weight:(weightdiff+Matrix.weighttbl(i-1)0)~state:(updatediffst)~diff:(Somediff)letcompute_line0tblj=let*!st=Matrix.statetbl0(j-1)inlet*!column=Matrix.columntbl0(j-1)inletdiff=InsertcolumninMatrix.settbl0j~weight:(weightdiff+Matrix.weighttbl0(j-1))~state:(updatediffst)~diff:(Somediff)letcompute_inner_celltblij=letcompute_propositionijdiff=let*diff=diffinlet+localstate=Matrix.statetblijinweightdiff+Matrix.weighttblij,(diff,localstate)inletdel=letdiff=let+x=Matrix.linetbl(i-1)jinDeletexincompute_proposition(i-1)jdiffinletinsert=letdiff=let+x=Matrix.columntbli(j-1)inInsertxincompute_propositioni(j-1)diffinletdiag=letdiff=let*state=Matrix.statetbl(i-1)(j-1)inlet*line=Matrix.linetbl(i-1)(j-1)inlet*column=Matrix.columntbl(i-1)(j-1)inmatchteststate.statelinecolumnwith|Okok->Some(Keep(line,column,ok))|Errorerr->Some(Change(line,column,err))incompute_proposition(i-1)(j-1)diffinlet*!newweight,(diff,localstate)=(* The order of propositions is important here:
the call [select_best_proposition [P_0, ...; P_n]] keeps the first
proposition with minimal weight as the representative path for this
weight class at the current matrix position.
By induction, the representative path for the minimal weight class will
be the smallest path according to the reverse lexical order induced by
the element order [[P_0;...; P_n]].
This is why we choose to start with the [Del] case since path ending with
[Del+] suffix are likely to correspond to parital application in the
functor application case.
Similarly, large block of deletions or insertions at the end of the
definitions might point toward incomplete definitions.
Thus this seems a good overall setting. *)select_best_proposition[del;insert;diag]inletstate=updatedifflocalstateinMatrix.settblij~weight:newweight~state~diff:(Somediff)letcompute_cellmij=matchi,jwith|_whenMatrix.diffmij<>None->()|0,0->()|0,j->compute_line0mj|i,0->compute_column0mi;|_->compute_inner_cellmij(* Filling the matrix
We fill the whole matrix, as in vanilla Wagner-Fischer.
At this point, the lists in some states might have been extended.
If any list have been extended, we need to reshape the matrix
and repeat the process
*)letcompute_matrixstate0=letm0=Matrix.make{l=0;c=0}inMatrix.setm000~weight:0~state:state0~diff:None;letrecloopm=letshape=Matrix.shapeminletnew_shape=Matrix.real_shapeminifnew_shape.l>shape.l||new_shape.c>shape.cthenletm=Matrix.reshapenew_shapeminfori=0tonew_shape.ldoforj=0tonew_shape.cdocompute_cellmijdonedone;loopmelseminloopm0endmoduletypeParameters=Full_corewithtypeupdate_state:=statemoduleSimple(X:Parameterswithtypeupdate_result:=state)=structmoduleInternal=Generic(structlettest=X.testletweight=X.weightletupdatedfs={fswithstate=X.updatedfs.state}end)letdiffstatelinecolumn=letfullstate={line;column;state}inInternal.compute_matrixfullstate|>construct_patchendletmay_appendx=function|[||]->x|y->Array.appendxymoduleLeft_variadic(X:Parameterswithtypeupdate_result:=state*leftarray)=structopenXmoduleInternal=Generic(structlettest=X.testletweight=X.weightletupdatedfs=letstate,a=updatedfs.statein{fswithstate;line=may_appendfs.linea}end)letdiffstatelinecolumn=letfullstate={line;column;state}inInternal.compute_matrixfullstate|>construct_patchendmoduleRight_variadic(X:Parameterswithtypeupdate_result:=state*rightarray)=structopenXmoduleInternal=Generic(structlettest=X.testletweight=X.weightletupdatedfs=letstate,a=updatedfs.statein{fswithstate;column=may_appendfs.columna}end)letdiffstatelinecolumn=letfullstate={line;column;state}inInternal.compute_matrixfullstate|>construct_patchendend