123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236(******************************************************************************)(* *)(* Reachability *)(* *)(* Copyright (c) 2025 Frédéric Bour *)(* *)(* Permission is hereby granted, free of charge, to any person obtaining a *)(* copy of this software and associated documentation files (the "Software"), *)(* to deal in the Software without restriction, including without limitation *)(* the rights to use, copy, modify, merge, publish, distribute, sublicense, *)(* and/or sell copies of the Software, and to permit persons to whom the *)(* Software is furnished to do so, subject to the following conditions: *)(* *)(* The above copyright notice and this permission notice shall be included in *)(* all copies or substantial portions of the Software. *)(* *)(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *)(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *)(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *)(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *)(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *)(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *)(* DEALINGS IN THE SOFTWARE. *)(* *)(******************************************************************************)(** This module computes the reachability of states in a parser automaton. It is
used to reason about the behavior of an LR(1) automaton after conflict
resolution (with some transitions removed).
The module implements algorithms for partitioning lookahead symbols with
identical behaviors, and uses these partitions to determine the cost of
reaching each state with a given lookahead. *)openFix.IndexingopenUtilsopenMiscopenInfomoduletypeS=sigtypegtypereduction={(* The production that is being reduced *)production:gproductionindex;(* The set of lookahead terminals that allow this reduction to happen *)lookahead:gterminalindexset;(* The shape of the stack, all the transitions that are replaced by the
goto transition when the reduction is performed *)steps:gtransitionindexlist;(* The lr1 state at the top of the stack before reducing.
That is [state] can reduce [production] when the lookahead terminal
is in [lookahead]. *)state:glr1index;}(* [unreduce tr] lists all the reductions that ends up following [tr]. *)valunreduce:ggoto_transitionindex->reductionlistmoduleClasses:sig(* Returns the classes of terminals for a given goto transition *)valfor_edge:ggoto_transitionindex->gterminalindexsetarray(* Returns the classes of terminals for a given LR(1) state *)valfor_lr1:glr1index->gterminalindexsetarray(* Returns the classes of terminals before taking a transition *)valpre_transition:gtransitionindex->gterminalindexsetarray(* Returns the classes of terminals after taking a transition *)valpost_transition:gtransitionindex->gterminalindexsetarrayendmoduleCoercion:sigtypepre=Pre_identity|Pre_singletonofint(* Compute the pre coercion from a partition of the form
P = first(cost(s, A))
to a partition of the form
Q = first(ccost(s, A → ϵ•α)))
*)valpre:'aindexsetarray->'aindexsetarray->preoptiontypeforward=intarrayarraytypebackward=intarraytypeinfix={forward:forward;backward:backward;}(* Compute the infix coercion from two partitions P Q such that Q <= P *)valinfix:?lookahead:'aindexset->'aindexsetarray->'aindexsetarray->infixendmoduleTree:sigincludeCARDINAL(* Returns the leaf node corresponding to a given transition *)valleaf:gtransitionindex->nindex(* Splits a node into its left and right children if it is an inner node *)valsplit:nindex->(gtransitionindex,nindex*nindex)either(* Returns the nullable terminals and non-nullable equations for a given goto transition *)typeequations={nullable_lookaheads:gterminalindexset;nullable:reductionlist;non_nullable:(reduction*nindex)list;}valgoto_equations:ggoto_transitionindex->equations(* Returns the pre-classes for a given node *)valpre_classes:nindex->gterminalindexsetarray(* Returns the post-classes for a given node *)valpost_classes:nindex->gterminalindexsetarrayend(* Identify each cell of compact cost matrices.
A [Cell.n index] can be thought of as a triple made of a tree node and two indices
(row, col) of the compact cost matrix associated to the node. *)moduleCell:sigincludeCARDINAL(* A value of type row represents the index of a row of a matrix.
A row of node [n] belongs to the interval
0 .. Array.length (Tree.pre_classes n) - 1
*)typerow=int(* A value of type column represents the index of a column of a matrix.
A column of node [n] belongs to the interval
0 .. Array.length (Tree.post_classes n) - 1
*)typecolumn=int(* Get the cell corresponding to a node, a row, and a column *)valencode:Tree.nindex->pre:row->post:column->nindex(* Get the node, row, and column corresponding to a cell *)valdecode:nindex->Tree.nindex*row*columntypegotovalgoto:gotocardinalvalis_goto:nindex->gotoindexoptionvalof_goto:gotoindex->nindexvalgoto_encode:ggoto_transitionindex->pre:row->post:column->gotoindexvalgoto_decode:gotoindex->ggoto_transitionindex*row*columnvaliter_goto:ggoto_transitionindex->(gotoindex->unit)->unitendmoduleAnalysis:sigvalcost:Cell.nindex->intvalfinite:Cell.nindex->boolendendtype'gt=(moduleSwithtypeg='g)type('g,'cell)t_cell=(moduleSwithtypeg='gandtypeCell.n='cell)letmake(typeg)(g:ggrammar):gt=(modulestructtypenonrecg=g(* ---------------------------------------------------------------------- *)(* Useful definitions *)(* Testing class inclusion *)letquick_subset=IndexSet.quick_subset(* ---------------------------------------------------------------------- *)(* Compute the inverse of the reduction relation.
It lists the different reductions that lead to following a goto
transition, reversing the effect of a single reduction.
It serves the same purpose as the [reduce(s, A → α)] function from the
paper but is more convenient for the rest of the implementation.
*)typereduction={(* The production that is being reduced *)production:gproductionindex;(* The set of lookahead terminals that allow this reduction to happen *)lookahead:gterminalindexset;(* The shape of the stack, all the transitions that are replaced by the
goto transition when the reduction is performed *)steps:gtransitionindexlist;(* The lr1 state at the top of the stack before reducing.
That is [state] can reduce [production] when the lookahead terminal
is in [lookahead]. *)state:glr1index;}(* [unreduce tr] lists all the reductions that ends up following [tr]. *)letunreduce:ggoto_transitionindex->reductionlist=letpredecessors=Vector.init(Lr1.cardinalg)@@funlr1->iterate[lr1,[]]@@funstates->letexpandacc(state,steps)=IndexSet.fold(funtracc->(Transition.sourcegtr,tr::steps)::acc)(Transition.predecessorsgstate)accinList.fold_leftexpand[]statesinlettable=Vector.make(Transition.gotog)[]in(* [add_reduction lr1 (production, lookahead)] populates [table] by
simulating the reduction [production], starting from [lr1] when
lookahead is in [lookahead] *)letadd_reductionlr1(production,lookahead)=ifProduction.kindgproduction=`REGULARthenbeginletlhs=Production.lhsgproductioninletrhs=Production.rhsgproductioninletstates=Array.fold_right(fun_pred->Lazy.forcepred.lnext)rhspredecessors.:(lr1)inList.iter(fun(source,steps)->table.@(Transition.find_gotogsourcelhs)<-List.cons{production;lookahead;steps;state=lr1})states.lvalueendin(* [get_reductions lr1] returns the list of productions and the lookahead
sets that allow reducing them from state [lr1] *)letget_reductionslr1=matchLr1.default_reductionglr1with|Someprod->(* State has a default reduction, the lookahead can be any terminal *)[prod,Terminal.allg]|None->IndexSet.fold(funredacc->(Reduction.productiongred,Reduction.lookaheadsgred)::acc)(Reduction.from_lr1glr1)[]in(* Populate [table] with the reductions of all state *)Index.iter(Lr1.cardinalg)(funlr1->List.iter(add_reductionlr1)(get_reductionslr1));Vector.gettable(* ---------------------------------------------------------------------- *)(* Compute classes refinement.
This implements section 6.2, Approximating first and follow Partitions.
This algorithm computes a partition of tokens for each transition.
The partition is a bit finer than necessary, but the approximation is
still sound: merging the rows or columns of the matrices based on token
classes gives a correct result.
*)moduleClasses=struct(* A node of the graph is either an lr1 state or a goto transition *)moduleNode=(valSum.make(Lr1.cardinalg)(Transition.gotog))(* Represents the dependency graph of Equation 7-9, to compute the SCCs *)moduleGr=structtypenode=Node.nindexletn=cardinalNode.nletindex=Index.to_intletvisit_lr1flr1=matchLr1.incomingglr1with|SomesymwhenSymbol.is_nonterminalgsym->IndexSet.iter(funtr->matchTransition.splitgtrwith|Lnt->f(Node.inj_rnt)|R_->assertfalse)(Transition.predecessorsglr1)|_->()letsuccessorsfi=matchNode.prjiwith|Llr1->visit_lr1flr1|Re->List.iter(fun{state;_}->f(Node.inj_lstate))(unreducee)letiterf=Index.iterNode.nfendmoduleScc=Tarjan.Run(Gr)(* Associate a class to each node *)letclasses=Vector.makeNode.nIndexSet.Set.empty(* Evaluate classes for a node, directly computing equation 4-6.
(compute follow for a goto node, first for an lr1 node)
[classes] vector is used to approximate recursive occurrences.
*)letclasses_ofaccnode=letacc=refaccinbeginmatchNode.prjnodewith|Llr1->Gr.visit_lr1(funn->acc:=IndexSet.Set.unionclasses.:(n)!acc)lr1|Redge->List.iter(fun{lookahead;state;_}->letbase=classes.:(Node.inj_lstate)in(* Comment the code below to have a partial order on partitions
(remove the ↑Z in equation (6) *)letbase=iflookahead!=Terminal.allgthenIndexSet.Set.map(IndexSet.interlookahead)baseelsebasein(* Stop commenting here *)acc:=IndexSet.Set.union(IndexSet.Set.addlookaheadbase)!acc)(unreduceedge)end;!accletpartition_setssets=sets|>IndexSet.Set.elements|>IndexRefine.partition|>IndexSet.Set.of_listletvisit_scc_nodes=(* Compute approximation for an SCC, as described in section 6.2 *)letcoarse_classes=partition_sets(List.fold_leftclasses_ofIndexSet.Set.emptynodes)inmatchnodeswith|[node]->classes.:(node)<-coarse_classes|nodes->List.iterbeginfunnode->matchNode.prjnodewith|L_->()|Re->letcoarse=refIndexSet.emptyinList.iter(fun{lookahead;_}->coarse:=IndexSet.unionlookahead!coarse)(unreducee);classes.:(node)<-partition_sets(IndexSet.Set.map(IndexSet.inter!coarse)coarse_classes)endnodes;List.iterbeginfunnode->matchNode.prjnodewith|R_->()|Llr1->letacc=refIndexSet.Set.emptyinGr.visit_lr1(funn->acc:=IndexSet.Set.unionclasses.:(n)!acc)lr1;classes.:(node)<-partition_sets!accendnodeslet()=Scc.rev_topological_itervisit_scc(* Initialize classes of initial states and of states whose incoming
symbol is a terminal *)let()=Index.iter(Lr1.cardinalg)(funlr1->matchLr1.incomingglr1with|SomesymwhenSymbol.is_nonterminalgsym->()|None|Some_->classes.:(Node.inj_llr1)<-IndexSet.Set.singleton(Terminal.allg))(* We now have the final approximation.
Classes will be identified and accessed by their index,
random access is important.
*)letclasses=letpreparel=leta=Array.of_seq(IndexSet.Set.to_seql)inArray.sortIndexSet.compare_minimuma;ainVector.mapprepareclassesletfor_edgente=classes.:(Node.inj_rnte)letfor_lr1st=classes.:(Node.inj_lst)(* Precompute the singleton partitions, e.g. { {t}, T/{t} } for each t *)lett_singletons=Vector.init(Terminal.cardinalg)(funt->[|IndexSet.singletont|])letall_terminals=[|Terminal.allg|](* Just before taking a transition [tr], the lookahead has to belong to
one of the classes in [pre_transition tr].
[pre_transition tr] indexes the rows of cost matrix for [tr].
*)letpre_transitiontr=matchTransition.splitgtrwith|L_goto->for_lr1(Transition.sourcegtr)|Rshift->t_singletons.:(Transition.shift_symbolgshift)(* Just after taking a transition [tr], the lookahead has to belong to
one of the classes in [post_transition tr].
[post_transition tr] indexes the columns of cost matrix for [tr].
*)letpost_transitiontr=matchTransition.splitgtrwith|Ledge->for_edgeedge|R_->all_terminalsendlet()=stopwatch2"reachability: computed classes"(* ---------------------------------------------------------------------- *)(* We now construct the DAG (as a tree with hash-consing) of all matrix
products.
Each occurrence of [ccost(s,x)] is mapped to a leaf.
Occurrences of [(ccost(s, A → α•xβ)] are mapped to inner nodes, except
that the chain of multiplication are re-associated.
*)moduleConsedTree():sig(* The finite set of nodes of the tree.
The set is not frozen yet: as long as its cardinal has not been
observed, new nodes can be added. *)includeCARDINAL(* The set of inner nodes *)moduleInner:CARDINAL(* [leaf tr] returns the node that corresponds [cost(s,x)]
where [s = source tr] and [x = symbol tr]. *)valleaf:gtransitionindex->nindex(* [node l r] returns the inner-node that corresponds to
the matrix product [l * r] *)valnode:nindex->nindex->nindex(* Get the tree node that corresponds to an inner node *)valinject:Inner.nindex->nindex(* Determines whether a node is a leaf or an inner node *)valsplit:nindex->(gtransitionindex,Inner.nindex)either(* Once all nodes have been added, the DAG needs to be frozen *)moduleFreezeTree():sigvaldefine:Inner.nindex->nindex*nindexendend=struct(* The fresh finite set of all inner nodes *)moduleInner=Gensym()(* The nodes of the trees is the disjoint sum of all transitions
(the leaves) and the inner nodes. *)include(valSum.make(Transition.anyg)Inner.n)letleaf=inj_lletinject=inj_rletsplit=prj(* An inner node is made of the index of its left and right children *)typepack=nindex*nindexletpacktu=(t,u)letunpackx=x(* The node table is used to give a unique index to each inner node *)letnode_table:(pack,Inner.nindex)Hashtbl.t=Hashtbl.create7(* Returns the index of an inner node, or allocate one for a new node *)letnodelr=letp=packlrinletnode_index=tryHashtbl.findnode_tablepwithNot_found->leti=Inner.fresh()inHashtbl.addnode_tablepi;iininj_rnode_index(* When all nodes have been created, the set of nodes can be frozen.
A reverse index is created to get the children of an inner node. *)moduleFreezeTree()=structletrev_index=Vector.make'Inner.n(fun()->letdummy=Index.of_intn0in(dummy,dummy))letdefineix=rev_index.:(ix)let()=Hashtbl.iter(funpairindex->rev_index.:(index)<-unpackpair)node_tableendend(* ---------------------------------------------------------------------- *)(* This module implements efficient representations of the coerce matrices,
as mentioned in section 6.5.
However, our implementation has one more optimization.
In general, we omit the last block of a partition (it can still be deduced
by removing the other blocks from the universe T, see section 6.1). The
block that is omitted is one that is guaranteed to have infinite cost in
the compact cost matrix.
Therefore, we never need to represent the rows and columns that correspond
to the missing class; by construction we know they have infinite cost.
For instance for shift transitions, it means we only have a 1x1 matrix:
the two classes are the terminal being shifted, with a cost of one, and
its complement, with an infinite cost, that is omitted.
Our coercion functions are augmented to handle this special case.
*)moduleCoercion=struct(* Pre coercions are used to handle the minimum in equation (7):
ccost(s, A → ϵ•α) · creduce(s, A → α)
If α begins with a terminal, it will have only one class.
This is handled by the [Pre_singleton x] constructor that indicates that
this only class should be coerced to class [x].
If α begins with a non-terminal, [Pre_identity] is used: ccost(s, A) and
ccost(s, A → ϵ•α) are guaranteed to have the same "first" classes.
*)typepre=|Pre_identity|Pre_singletonofint(* Compute the pre coercion from a partition of the form
P = first(cost(s, A))
to a partition of the form
Q = first(ccost(s, A → ϵ•α)))
If α starts with a terminal, we look only for the
*)letpreouterinner=ifouter==innerthenSomePre_identityelse(assert(Array.lengthinner=1);assert(IndexSet.is_singletoninner.(0));lett=IndexSet.chooseinner.(0)inmatchUtils.Misc.array_findi(fun_ts->IndexSet.memtts)0outerwith|i->Some(Pre_singletoni)|exceptionNot_found->(* If the production that starts with the 'inner' partition cannot be
reduced (because of conflict resolution), the transition becomes
unreachable and the terminal `t` might belong to no classes.
*)None)(* The type infix is the general representation for the coercion matrices
coerce(P, Q) appearing in M1 · coerce(P, Q) · M2
Since Q is finer than P, a class of P maps to multiple classes of Q.
This is represented by the forward array: a class p in P maps to all
classes q in array [forward.(p)].
The other direction is an injection: a class q in Q maps to class
[backward.(q)] in P.
The special class [-1] represents a class that is not mapped in the
partition (this occurs for instance when using creduce to filter a
partition).
*)typeforward=intarrayarraytypebackward=intarraytypeinfix={forward:forward;backward:backward}(* Compute the infix coercion from two partitions P Q such that Q <= P.
The optional [lookahead] argument is used to filter classes outside of a
certain set of terminals, exactly like the ↓ operator on partitions.
This is used to implement creduce operator.
*)letinfix?lookaheadpre_classespost_classes=letforward_size=Array.make(Array.lengthpre_classes)0inletbackward=Array.map(funca->letkeep=matchlookaheadwith|None->true|Somela->quick_subsetcalainifkeepthen(matchUtils.Misc.array_findi(fun_cb->quick_subsetcacb)0pre_classeswith|exceptionNot_found->-1|i->forward_size.(i)<-1+forward_size.(i);i)else(-1))post_classesinletforward=Array.map(funsz->Array.makesz0)forward_sizeinArray.iteri(funi_prei_f->ifi_f<>-1then(letpos=forward_size.(i_f)-1inforward_size.(i_f)<-pos;forward.(i_f).(pos)<-i_pre))backward;{forward;backward}end(* ---------------------------------------------------------------------- *)(* The hash-consed tree of all matrix equations (products and minimums). *)moduleTree=structincludeConsedTree()typeequations={nullable_lookaheads:gterminalindexset;nullable:reductionlist;non_nullable:(reduction*nindex)list;}letgoto_equations=(* Explicit representation of the rhs of equation (7).
This equation defines ccost(s, A) as the minimum of a set of
sub-matrices.
Matrices of the form [creduce(s, A → α)] are represented by a
[TerminalSet.t], following section 6.5.
[goto_equations] are represented as pair [(nullable, non_nullable)]
such that, for each sub-equation [ccost(s, A→ϵ•α) · creduce(s, A→α)]:
- if [α = ϵ] (an empty production can reduce A),
[nullable] contains the terminals [creduce(s, A → α)]
- otherwise,
[non_nullable] contains the pair [ccost(s, A→ϵ•α)], [creduce(s, A→α)}
*)tabulate_finset(Transition.gotog)@@funtr->(* Number of rows in the compact cost matrix for tr *)letfirst_dim=Array.length(Classes.pre_transition(Transition.of_gotogtr))in(* Number of columns in the compact cost matrix for a transition tr' *)lettransition_sizetr'=Array.length(Classes.post_transitiontr')in(* Import the solution to a matrix-chain ordering problem as a sub-tree *)letrecimport_mcop=function|Mcop.Matrixl->leafl|Mcop.Product(l,r)->node(import_mcopl)(import_mcopr)in(* Compute the nullable terminal set and non_nullable list for a single
reduction, optimizing the matrix-product chain. *)letsolve_ccost_pathred=letdimensions=first_dim::List.maptransition_sizered.stepsinmatchMcop.dynamic_solution(Array.of_listdimensions)with|exceptionMcop.Empty->Either.Leftred|solution->letsteps=Array.of_listred.stepsinletsolution=Mcop.map_solution(funi->steps.(i))solutioninEither.Right(red,import_mcopsolution)inletnullable,non_nullable=List.partition_mapsolve_ccost_path(unreducetr)in{nullable_lookaheads=List.fold_left(funsetred->IndexSet.unionred.lookaheadset)IndexSet.emptynullable;nullable;non_nullable;}includeFreezeTree()(* Pre-compute classes before (pre) and after (post) a node *)lettable_pre=Vector.makeInner.n[||]lettable_post=Vector.makeInner.n[||]letpre_classest=matchsplittwith|Ltr->Classes.pre_transitiontr|Rix->table_pre.:(ix)letpost_classest=matchsplittwith|Ltr->Classes.post_transitiontr|Rix->table_post.:(ix)letpre_countt=Array.length(pre_classest)letpost_countt=Array.length(post_classest)let()=(* Nodes are allocated in topological order.
When iterating over all nodes, children are visited before parents. *)Index.iterInner.n@@funnode->letl,r=definenodeintable_pre.:(node)<-pre_classesl;table_post.:(node)<-post_classesrletspliti=matchsplitiwith|L_asresult->result|Rn->R(definen)endlet()=stopwatch2"reachability: constructed tree"(* ---------------------------------------------------------------------- *)(* Representation of matrix cells, the variables of the data flow problem.
There will be a lot of them. Actually, on large grammars, most of the
memory is consumed by cost matrices.
Therefore we want a rather compact encoding.
We use a two-level encoding:
- first the [table] vector maps a node index to a "compact cost matrix"
- each "compact cost matrix" is represented as a 1-dimensional array of
integers, of dimension |pre_classes n| * |post_classes n|
This module defines conversion functions between three different
representations of cells:
[Cell.t] identify a cell as a single integer
<=>
[Tree.n index * Cells.offset] identify a cell as a pair of a node and an
offset in the array of costs
<=>
[Tree.n index * Cells.row * Cells.column] identify a cell as a triple of
a node, a row index and a column index of the compact cost matrix
*)moduleCell:sigincludeCARDINAL(* A value of type row represents the index of a row of a matrix.
A row of node [n] belongs to the interval
0 .. Array.length (Tree.pre_classes n) - 1
*)typerow=int(* A value of type column represents the index of a column of a matrix.
A column of node [n] belongs to the interval
0 .. Array.length (Tree.post_classes n) - 1
*)typecolumn=int(* Get the cell corresponding to a node, a row, and a column *)valencode:Tree.nindex->pre:row->post:column->nindex(* Get the node, row, and column corresponding to a cell *)valdecode:nindex->Tree.nindex*row*column(* Index of the first cell of matrix associated to a node *)valfirst_cell:Tree.nindex->nindextypegotovalgoto:gotocardinalvalis_goto:nindex->gotoindexoptionvalof_goto:gotoindex->nindexvalgoto_encode:ggoto_transitionindex->pre:row->post:column->gotoindexvalgoto_decode:gotoindex->ggoto_transitionindex*row*columnvaliter_goto:ggoto_transitionindex->(gotoindex->unit)->unitend=structtyperow=inttypecolumn=intletn,pre_bits,post_bits=letmax_pre=ref0inletmax_post=ref0inletn=ref0inletbits_neededn=leti=ref0inwhile1lsl!i<=ndoincri;done;!iinIndex.iterTree.nbeginfunnode->letpre=Tree.pre_countnodeinletpost=Tree.post_countnodeinn:=!n+pre*post;max_pre:=Int.maxpre!max_pre;max_post:=Int.maxpost!max_post;end;(!n,bits_needed!max_pre,bits_needed!max_post)includeConst(structletcardinal=nend)letmapping=Vector.maken0letfirst_cell=letindex=ref0inVector.initTree.n@@funnode->letfirst_index=!indexinletbase=(node:>int)lsl(pre_bits+post_bits)inletpre_count=Tree.pre_countnodeinletpost_count=Tree.post_countnodeinforpre=0topre_count-1doletbase=baselor(prelslpost_bits)inforpost=0topost_count-1domapping.:(Index.of_intn!index)<-baselorpost;incrindexdonedone;first_indexletdecodeix=leti=mapping.:(ix)inletpost=iland(1lslpost_bits-1)inleti=ilsrpost_bitsinletpre=iland(1lslpre_bits-1)in(Index.of_intTree.n(ilsrpre_bits),pre,post)letencodei=letfirst=first_cell.:(i)inletpost_count=Tree.post_countiinfun~pre~post->Index.of_intn(first+pre*post_count+post)letfirst_goto_node,first_goto_cell,last_goto_cell=matchcardinal(Transition.gotog)with|0->(0,0,-1)|n->lettri=Transition.of_gotog(Index.of_int(Transition.gotog)i)inletfirst_goto_node=Tree.leaf(tr0)inletfirst=first_cell.:(first_goto_node)inletlast=Tree.leaf(tr(n-1))inletnext=Index.of_intTree.n((last:>int)+1)in((first_goto_node:>int),first,first_cell.:(next)-1)moduleGoto=Const(structletcardinal=last_goto_cell-first_goto_cell+1end)typegoto=Goto.nletgoto=Goto.nletis_goto(i:nindex)=leti=(i:>int)iniffirst_goto_cell<=i&&i<=last_goto_cellthenSome(Index.of_intgoto(i-first_goto_cell))elseNoneletof_goto(g:gotoindex)=Index.of_intn(first_goto_cell+(g:>int))letgoto_decode(gt:gotoindex)=letn,pre,post=decode(of_gotogt)inletgt=Index.of_int(Transition.gotog)((n:>int)-first_goto_cell)in(gt,pre,post)letgoto_encodei=letnode=Tree.leaf(Transition.of_gotogi)inletfirst=first_cell.:(node)-first_goto_cellinletpost_count=Tree.post_countnodeinfun~pre~post->Index.of_intgoto(first+pre*post_count+post)letiter_goto(gt:ggoto_transitionindex)f=leti=(gt:>int)inletindex_ofi=first_cell.:(Index.of_intTree.n(first_goto_node+i))inforj=index_ofitoindex_of(i+1)-1dof(Index.of_intgoto(j-first_goto_cell))doneletfirst_celli=Index.of_intnfirst_cell.:(i)endlet()=stopwatch2"reachability: indexed matrix cells"moduleReverse_dependencies=struct(* Reverse dependencies record in which equations a node appears *)typet=(* Equation (7): this node appears in the RHS of the definition of a
goto transition.
The dependency is accompanied with pre-coercion (see [Coercion.pre])
and the forward coercion that represents the creduce(...). *)|Leafofggoto_transitionindex*Coercion.pre*Coercion.forward(* Equation (8): this node appears in some inner product.
The dependency stores the index of the parent node as well as the
coercion matrix. *)|InnerofTree.Inner.nindex*Coercion.infixletoccurrences:(Tree.n,tlist)vector=(* Store enough information with each node of the tree to compute
which cells are affected if a cell of this node changes.
Because of sharing, a node can have multiple parents. *)Vector.makeTree.n[]let()=Index.iter(Transition.gotog)beginfuntr->(* Record dependencies of a goto transition. *)letnode=Tree.leaf(Transition.of_gotogtr)inletpre=Tree.pre_classesnodeinletpost=Tree.post_classesnodein(* Register dependencies to other reductions *)List.iterbeginfun({lookahead;_},node')->matchCoercion.prepre(Tree.pre_classesnode')with|None->(* The goto transition is unreachable because of conflict
resolution. Don't register any dependency. *)()|Somecoerce_pre->letpost'=Tree.post_classesnode'inletcoerce_post=Coercion.infixpost'post~lookaheadinoccurrences.@(node')<-List.cons(Leaf(tr,coerce_pre,coerce_post.Coercion.forward))end(Tree.goto_equationstr).non_nullableend;(* Record dependencies on a inner node. *)Index.iterTree.Inner.nbeginfunnode->let(l,r)=Tree.definenodein(*(*sanity*)assert (Tree.pre_classes l == Tree.pre_classes node);*)(*(*sanity*)assert (Tree.post_classes r == Tree.post_classes node);*)letc1=Tree.post_classeslinletc2=Tree.pre_classesrinletcoercion=Coercion.infixc1c2inletdep=Inner(node,coercion)inassert(Array.lengthc2=Array.lengthcoercion.Coercion.backward);occurrences.@(l)<-List.consdep;occurrences.@(r)<-List.consdependletvisit_occurrencesindex~visit_goto~from_left~acc~acc_right~from_right=letnode,i_pre,i_post=Cell.decodeindexinletupdate_dep=function|Leaf(parent,pre,post)->(* If the production begins with a terminal,
we have to map the class *)leti_pre'=matchprewith|Coercion.Pre_singletoni->i|Coercion.Pre_identity->i_preinletencode=Cell.goto_encodeparentinArray.iter(funi_post'->visit_goto(encode~pre:i_pre'~post:i_post'))post.(i_post)|Inner(parent,inner)->(* This change updates the cost of an occurrence of equation 8,
of the form l . coercion . r
We have to find whether the change comes from the [l] or the [r]
node to update the right-hand cells of the parent *)letl,r=Tree.defineparentinletencode_p=Cell.encode(Tree.injectparent)inifl=nodethen((* The left term has been updated *)letencode_r=Cell.encoderinfori_post'=0toArray.length(Tree.post_classesr)-1doletacc=Array.fold_left(funacci_pre'->acc_rightacc(encode_r~pre:i_pre'~post:i_post'))accinner.Coercion.forward.(i_post)infrom_left~right:acc~parent:(encode_p~pre:i_pre~post:i_post')done)else((* The right term has been updated *)(*sanity*)assert(r=node);matchinner.Coercion.backward.(i_pre)with|-1->()|l_post->letencode_l=Cell.encodelinfori_pre=0toArray.length(Tree.pre_classesl)-1dofrom_right~left:(encode_l~pre:i_pre~post:l_post)~parent:(encode_p~pre:i_pre~post:i_post)done)inList.iterupdate_depoccurrences.:(node)endlet()=stopwatch2"reachability: reversed matrix dependencies"(* ---------------------------------------------------------------------- *)(* Represent the data flow problem to solve *)moduleSolver=structletmin_costab:int=ifa<bthenaelseb(* Initialize shift transitions to cost 1. *)letinitialize_shift~visit_roottr=letnode=Tree.leaf(Transition.of_shiftgtr)in(*sanity*)assert(Array.length(Tree.pre_classesnode)=1);(*sanity*)assert(Array.length(Tree.post_classesnode)=1);visit_root(Cell.first_cellnode)1(* Record dependencies on a goto transition. *)letinitialize_goto~visit_roottr=letnode=Tree.leaf(Transition.of_gotogtr)inleteqn=Tree.goto_equationstrin(* Set matrix cells corresponding to nullable reductions to 0 *)ifIndexSet.is_not_emptyeqn.nullable_lookaheadsthen(letpre=Tree.pre_classesnodeinletpost=Tree.post_classesnodein(* We use:
- [c_pre] and [i_pre] for a class in the pre partition and its index
- [c_post] and [i_post] for a class in the post partition and its
index
*)letencode=Cell.encodenodeinletupdate_celli_postc_posti_prec_pre=ifnot(IndexSet.disjointc_prec_post)thenvisit_root(encode~pre:i_pre~post:i_post)0inletupdate_coli_postc_post=ifquick_subsetc_posteqn.nullable_lookaheadsthenArray.iteri(update_celli_postc_post)preinArray.iteriupdate_colpost)letcosts=Vector.makeCell.nmax_int(* A graph representation suitable for the DataFlow solver *)moduleGraph=structtypevariable=Cell.nindex(* We cheat a bit. Normally a root is either the cell corresponding to a
shift transition (initialized to 1) or the cells corresponding to the
nullable reductions of a goto transitions (initialized to 0).
Rather than duplicating the code for exactly computing those cells, we
visit all transitions and consider every non-infinite cell a root.
*)letforeach_rootvisit_root=(* Populate roots:
- shift transitions have cost 1 by definition
- nullable goto transitions have cost 0
*)Index.iter(Transition.shiftg)(initialize_shift~visit_root);Index.iter(Transition.gotog)(initialize_goto~visit_root)(* Visit all the successors of a cell.
This amounts to:
- finding the node the cell belongs to
- looking at the reverse dependencies of this node
- visiting all cells that are affected in the dependencies
*)letforeach_successorindexcostf=(* The cost has to be less than the maximum otherwise there is no point
in relaxing the node.
This guarantees that the additions below do not overflow. *)assert(cost<max_int);Reverse_dependencies.visit_occurrencesindex~visit_goto:(funcell->f(Cell.of_gotocell)cost)~acc:max_int~acc_right:(funcostright->min_costcostcosts.:(right))~from_left:(fun~right~parent->ifright<max_intthenfparent(cost+right))~from_right:(fun~left~parent->letleft=costs.:(left)inifleft<max_intthenfparent(left+cost))endmoduleProperty=structtypeproperty=intletleq_join=min_costend(* Implement the interfaces required by DataFlow.ForCustomMaps *)moduleBoolMap()=structlettable=Boolvector.makeCell.nfalseletgett=Boolvector.testtabletletsettx=ifxthenBoolvector.settabletelseBoolvector.cleartabletend(* Run the solver for shortest paths *)includeFix.DataFlow.ForCustomMaps(Property)(Graph)(structletgeti=Vector.getcostsiletsetix=Vector.setcostsixend)(BoolMap())(* Run the solver for finite languages *)moduleBool_or=structtypeproperty=boolletleq_join=(||)endmoduleFinite=BoolMap()moduleFiniteGraph=structtypevariable=Cell.nindexletcount=Vector.makeCell.goto0let()=Index.iterCell.n(funcell->Reverse_dependencies.visit_occurrencescell~visit_goto:(fungoto->count.@(goto)<-succ)~acc:()~acc_right:(fun()_->())~from_left:(fun~right:()~parent:_->())~from_right:(fun~left:_~parent:_->()))letforeach_rootvisit_root=Index.iter(Transition.shiftg)(funsh->letnode=Tree.leaf(Transition.of_shiftgsh)invisit_root(Cell.first_cellnode)true);Index.iter(Transition.gotog)(fungt->Cell.iter_gotogt(fungt'->letindex=Cell.of_gotogt'inifcosts.:(index)<max_int&&count.:(gt')=0thenvisit_rootindextrue))(* Visit all the successors of a cell.
This amounts to:
- finding the node the cell belongs to
- looking at the reverse dependencies of this node
- visiting all cells that are affected in the dependencies
*)letforeach_successorindexfinitef=iffinitethenReverse_dependencies.visit_occurrencesindex~visit_goto:(fungt->letcount'=count.:(gt)-1incount.:(gt)<-count';assert(count'>=0);ifcount'=0thenfindextrue)~acc:true~acc_right:(funaccright->acc&&Finite.getright)~from_left:(fun~right~parent->ifrightthenfparenttrue)~from_right:(fun~left~parent->ifFinite.getleftthenfparenttrue)endincludeFix.DataFlow.ForCustomMaps(Bool_or)(FiniteGraph)(Finite)(BoolMap())let()=stopwatch2"solved minimal costs"endmoduleAnalysis=structletcost=Vector.getSolver.costsletfinite=Solver.Finite.getend(*let () =
let string_of_cost i = if i = max_int then "∞" else string_of_int i in
Index.iter (Transition.goto g)
(fun gt ->
let min = ref max_int in
let count = ref 0 in
Cell.iter_goto gt
(fun gtc -> incr count; min := Int.min (Analysis.cost (Cell.of_goto gtc)) !min);
if !min = max_int then (
let tr = Transition.of_goto g gt in
match
List.filter_map begin fun (red, n) ->
let pre_classes = Array.length (Tree.pre_classes n) in
let post_classes = Array.length (Tree.post_classes n) in
let encode = Cell.encode n in
let candidates = ref [] in
for pre = pre_classes - 1 downto 0 do
for post = post_classes - 1 downto 0 do
if Analysis.cost (encode ~pre ~post) <> max_int then
push candidates (pre, post)
done
done;
match !candidates with
| candidates when
List.exists begin function
| Reverse_dependencies.Inner _ -> false
| Reverse_dependencies.Leaf (gt', _pre, post) ->
gt = gt' &&
List.exists (fun (_, post_index) -> Array.length post.(post_index) > 0) candidates
end Reverse_dependencies.occurrences.:(n) ->
Some (red, n, candidates)
| _ -> None
end (Tree.goto_equations gt).non_nullable
with
| [] -> ()
| paths ->
Printf.eprintf "unreachable goto transition (id:%d): %s -> %s (%dx%d=%d classes)\n"
(gt :> int)
(Lr1.to_string g (Transition.source g tr))
(Lr1.to_string g (Transition.target g tr))
(Array.length (Classes.for_lr1 (Transition.source g tr)))
(Array.length (Classes.for_edge gt))
!count;
(*let terminals set =
string_concat_map ~wrap:("{","}") ", "
(Terminal.to_string g)
(List.rev (IndexSet.elements set))
in*)
let production_to_string g p =
Nonterminal.to_string g (Production.lhs g p) ^ ": " ^
string_concat_map " " (Symbol.name g)
(Array.to_list (Production.rhs g p))
in
List.iter begin fun (red, n, candidates) ->
(*let pre_classes = Array.length (Tree.pre_classes n) in
let post_classes = Array.length (Tree.post_classes n) in*)
Printf.eprintf "- reduction: %s" (production_to_string g red.production);
let encode = Cell.encode n in
Printf.eprintf " with candidates";
List.iter begin fun (pre, post) ->
Printf.eprintf " (%d,%d, cell:%d)" pre post (encode ~pre ~post :> int)
end candidates;
Printf.eprintf "\n";
List.iter begin function
| Reverse_dependencies.Leaf (gt', pre, post) when gt = gt' ->
let pre = match pre with
| Pre_identity -> Fun.id
| Pre_singleton i -> Fun.const i
in
Printf.eprintf " found a reverse dependency with classes %s\n"
(string_concat_map ", "
(fun (pre_index, post_index) ->
Printf.sprintf "(%d,%s)"
(pre pre_index)
(string_concat_map ~wrap:("[","]") "," string_of_cost
(Array.to_list post.(post_index)))
) candidates)
| _ -> ()
end Reverse_dependencies.occurrences.:(n);
end paths
)
)*)end)