123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907(* MIT License
*
* Copyright (c) 2025 Frédéric Bour
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*)(** This module defines data structures and operations for handling grammar
information in a structured way. It includes representations for terminals,
non-terminals, productions, and LR states, along with their transitions and
reductions. The module is designed to work with Menhir's grammar
representation and extends it with additional functionality for
convenience. *)openUtilsopenMiscopenFix.IndexingmoduletypeGRAMMAR=MenhirSdk.Cmly_api.GRAMMARmoduleUC_terminal=Unsafe_cardinal()moduleUC_nonterminal=Unsafe_cardinal()moduleUC_production=Unsafe_cardinal()moduleUC_lr0=Unsafe_cardinal()moduleUC_lr1=Unsafe_cardinal()moduleUC_item=Unsafe_cardinal()moduleUC_goto_transition=Unsafe_cardinal()moduleUC_shift_transition=Unsafe_cardinal()moduleUC_reduction=Unsafe_cardinal()type'gterminal='gUC_terminal.ttype'gnonterminal='gUC_nonterminal.ttype'gsymbol=('gterminal,'gnonterminal)Sum.ntype'gproduction='gUC_production.ttype'gitem='gUC_item.ttype'glr0='gUC_lr0.ttype'glr1='gUC_lr1.ttype'ggoto_transition='gUC_goto_transition.ttype'gshift_transition='gUC_shift_transition.ttype'gtransition=('ggoto_transition,'gshift_transition)Sum.ntype'greduction='gUC_reduction.ttype'ggrammar={raw:(moduleMenhirSdk.Cmly_api.GRAMMAR);terminal_n:'gterminalcardinal;terminal_all:'gterminalindexset;terminal_regular:'gterminalindexset;terminal_table:(string,'gterminalindex)Hashtbl.t;nonterminal_n:'gnonterminalcardinal;nonterminal_all:'gnonterminalindexset;nonterminal_table:(string,'gnonterminalindex)Hashtbl.t;symbol_all:'gsymbolindexset;production_lhs:('gproduction,'gnonterminalindex)vector;production_rhs:('gproduction,'gsymbolindexarray)vector;production_all:'gproductionindexset;item_productions:('gitem,'gproductionindex)vector;item_offsets:('gproduction,int)vector;lr0_items:('glr0,'gitemindexset)vector;lr0_incoming:('glr0,'gsymbolindexoption)vector;lr0_is_entrypoint:('glr0,'gproductionindexoption)vector;transition_source:('gtransition,'glr1index)vector;transition_target:('gtransition,'glr1index)vector;transition_shift_sym:('gshift_transition,'gterminalindex)vector;(*transition_shift_table: ('g lr1, ('g terminal, 'g shift_transition index) indexmap) vector;*)transition_goto_sym:('ggoto_transition,'gnonterminalindex)vector;transition_goto_table:('glr1,('gnonterminal,'ggoto_transitionindex)indexmap)vector;transition_predecessors:('glr1,'gtransitionindexset)vector;transition_successors:('glr1,'gtransitionindexset)vector;transition_accepting:'ggoto_transitionindexset;lr1_all:'glr1indexset;lr1_lr0:('glr1,'glr0index)vector;lr1_wait:'glr1indexset;lr1_accepting:'glr1indexset;lr1_reduce_on:('glr1,'gterminalindexset)vector;lr1_shift_on:('glr1,'gterminalindexset)vector;lr1_reject:('glr1,'gterminalindexset)vector;lr1_entrypoints:'glr1indexset;lr1_entrypoint_table:(string,'glr1index)Hashtbl.t;lr1_predecessors:('glr1,'glr1indexsetlazy_stream)vector;reduction_state:('greduction,'glr1index)vector;reduction_production:('greduction,'gproductionindex)vector;reduction_lookaheads:('greduction,'gterminalindexset)vector;reduction_from_lr1:('glr1,'greductionindexset)vector;}letrawg=g.rawmoduleLift()=structtypegletloaded=reffalsemoduleLoad_grammar(G:MenhirSdk.Cmly_api.GRAMMAR)=structlet()=if!loadedtheninvalid_arg"Info.Lift.Load: grammar can be loaded only once"elseloaded:=truemoduleImport(UC:UNSAFE_CARDINAL)(M:sigtypetvalcount:intvalof_int:int->tvalto_int:t->intend)=structincludeUC.Const(structtypet=gletcardinal=M.countend)letof_gi=Index.of_intn(M.to_inti)letto_gi=M.of_int(Index.to_inti)letall=IndexSet.allnendmoduleTerminal=structincludeImport(UC_terminal)(G.Terminal)letregular=IndexSet.init_from_setn(funt->matchG.Terminal.kind(G.Terminal.of_int(t:_index:>int))with|`EOF|`REGULAR->true|`PSEUDO|`ERROR->false)endmoduleNonterminal=Import(UC_nonterminal)(G.Nonterminal)moduleSymbol=structletn=Sum.cardinalTerminal.nNonterminal.nletall=IndexSet.allnletof_g=function|G.Tt->Sum.inj_l(Terminal.of_gt)|G.Nn->Sum.inj_rTerminal.n(Nonterminal.of_gn)(*let to_g t = match Sum.prj Terminal.n t with
| L t -> G.T (Terminal.to_g t)
| R n -> G.N (Nonterminal.to_g n)*)endmoduleProduction=structincludeImport(UC_production)(G.Production)letlhs=Vector.initn(funp->Nonterminal.of_g(G.Production.lhs(to_gp)))letrhs=Vector.initn@@funp->Array.map(fun(sym,_,_)->Symbol.of_gsym)(G.Production.rhs(to_gp))endmoduleItem=structletcount=ref0letoffsets=Vector.initProduction.n(funprod->letposition=!countincount:=!count+Array.lengthProduction.rhs.:(prod)+1;position)includeUC_item.Const(structtypet=gletcardinal=!countend)letproductions=Vector.make'n(fun()->Index.of_intProduction.n0)let()=letenum=Index.enumerateninIndex.iterProduction.n@@funprod->for_=0toArray.lengthProduction.rhs.:(prod)doproductions.:(enum())<-proddoneendmoduleLr0=structincludeImport(UC_lr0)(G.Lr0)letitems=Vector.initn@@funlr0->to_glr0|>G.Lr0.items|>List.map(fun(p,pos)->Index.of_intItem.n(Item.offsets.:(Production.of_gp)+pos))|>IndexSet.of_listletincoming=Vector.initn@@funlr0->to_glr0|>G.Lr0.incoming|>Option.mapSymbol.of_gletis_entrypoint=Vector.map(funitems->ifnot(IndexSet.is_singletonitems)thenNoneelseletitem=IndexSet.chooseitemsinletprod=Item.productions.:(item)inifIndex.to_intitem=Item.offsets.:(prod)thenSomeprodelseNone)itemsendmoduleLr1=structincludeImport(UC_lr1)(G.Lr1)letlr0=Vector.initn@@funlr1->Lr0.of_g(G.Lr1.lr0(to_glr1))endmoduleTransition=structletshift_count,goto_count=letshift_count=ref0inletgoto_count=ref0in(* Count goto and shift transitions by iterating on all states and
transitions *)G.Lr1.iterbeginfunlr1->List.iterbeginfun(sym,_)->matchsymwith|G.T_->incrshift_count|G.N_->incrgoto_countend(G.Lr1.transitionslr1)end;(!shift_count,!goto_count)moduleGoto=UC_goto_transition.Const(structtypet=gletcardinal=goto_countend)moduleShift=UC_shift_transition.Const(structtypet=gletcardinal=shift_countend)letany=Sum.cardinalGoto.nShift.nletof_goto=Sum.inj_lletof_shift=Sum.inj_rGoto.n(* Vectors to store information on states and transitions.
We allocate a bunch of data structures (sources, targets, t_symbols,
nt_symbols and predecessors vectors, t_table and nt_table hash tables),
and then populate them by iterating over all transitions.
*)letsources=Vector.make'any(fun()->Index.of_intLr1.n0)lettargets=Vector.make'any(fun()->Index.of_intLr1.n0)letshift_sym=Vector.make'Shift.n(fun()->Index.of_intTerminal.n0)letgoto_sym=Vector.make'Goto.n(fun()->Index.of_intNonterminal.n0)(* Tables to associate a pair of a state and a symbol to a transition. *)letgoto_table=Vector.makeLr1.nIndexMap.empty(*let shift_table = Vector.make Lr1.n IndexMap.empty*)(* A vector to store the predecessors of an lr1 state.
We cannot compute them directly, we discover them by exploring the
successor relation below. *)letpredecessors=Vector.makeLr1.nIndexSet.emptyletsuccessors=(* We populate all the data structures allocated above, i.e.
the vectors t_sources, t_symbols, t_targets, nt_sources, nt_symbols,
nt_targets and predecessors, as well as the tables t_table and
nt_table, by iterating over all successors. *)letnext_goto=Index.enumerateGoto.ninletnext_shift=Index.enumerateShift.ninVector.initLr1.nbeginfunsource->List.fold_rightbeginfun(sym,target)acc->lettarget=Lr1.of_gtargetinletindex=matchsymwith|G.Tt->lett=Terminal.of_gtinletindex=next_shift()inshift_sym.:(index)<-t;(*shift_table.@(source) <- IndexMap.add t index;*)of_shiftindex|G.Nnt->letnt=Nonterminal.of_gntinletindex=next_goto()ingoto_sym.:(index)<-nt;goto_table.@(source)<-IndexMap.addntindex;of_gotoindexinsources.:(index)<-source;targets.:(index)<-target;predecessors.@(target)<-IndexSet.addindex;IndexSet.addindexaccend(G.Lr1.transitions(Lr1.to_gsource))IndexSet.emptyendletaccepting=letacc=refIndexSet.emptyinIndex.rev_iterLr1.nbeginfunlr1->matchLr0.is_entrypoint.:(Lr1.lr0.:(lr1))with|None->()|Someprod->letsym=matchSum.prjTerminal.nProduction.rhs.:(prod).(0)with|L_->assertfalse|Rnt->ntinacc:=IndexSet.fold_right(funacctr->matchSum.prjGoto.ntrwith|Lgtwhengoto_sym.:(gt)=sym->IndexSet.addgtacc|_->acc)!accsuccessors.:(lr1)end;!accendmoduleLr1_extra=structopenLr1letaccepting=refIndexSet.empty(** The set of terminals that will trigger a reduction *)letreduce_on=Vector.initn@@funlr1->List.fold_left(funacc(t,_)->ifG.Terminal.kindt=`PSEUDOthenaccepting:=IndexSet.addlr1!accepting;IndexSet.add(Terminal.of_gt)acc)IndexSet.empty(G.Lr1.get_reductions(to_glr1))letaccepting=!accepting(** The set of terminals that will trigger a shift transition *)letshift_on=Vector.initn@@funlr1->List.fold_left(funacc(sym,_raw)->matchsymwith|G.Tt->IndexSet.add(Terminal.of_gt)acc|G.N_->acc)IndexSet.empty(G.Lr1.transitions(to_glr1))(** The set of terminals the state has no transition for *)letreject=Vector.initn@@funlr1->letresult=Terminal.allinletresult=IndexSet.diffresultreduce_on.:(lr1)inletresult=IndexSet.diffresultshift_on.:(lr1)inresultletwait=IndexSet.init_from_setn(funlr1->matchG.Lr0.incoming(Lr0.to_glr0.:(lr1))with|Some(G.N_)->false|Some(G.Tt)->G.Terminal.kindt=`REGULAR&¬(IndexSet.memlr1accepting)|None->true)letpredecessors=Vector.initn@@funlr1->IndexSet.map(funtr->Transition.sources.:(tr))Transition.predecessors.:(lr1)letentrypoints,entrypoint_table=letset=refIndexSet.emptyinlettable=Hashtbl.create7inIndex.rev_itern(funlr1->matchLr0.is_entrypoint.:(lr0.:(lr1))with|None->()|Someprod->set:=IndexSet.addlr1!set;letsym,_,_=(G.Production.rhs(Production.to_gprod)).(0)inHashtbl.addtable(G.Symbol.namesym)lr1);(!set,table)endmoduleReduction=structletn=ref0letraw=letimport_redreds=reds|>List.filter_map(fun(t,p)->matchG.Production.kindpwith|`START->None|`REGULAR->Some(Production.of_gp,Terminal.of_gt))|>Misc.group_by~compare:(fun(p1,_)(p2,_)->compare_indexp1p2)~group:(fun(p,t)ps->p,IndexSet.of_list(t::List.mapsndps))|>List.sort(fun(p1,_)(p2,_)->letl1=Array.lengthProduction.rhs.:(p1)inletl2=Array.lengthProduction.rhs.:(p2)inletc=Int.comparel1l2inifc<>0thencelsecompare_indexProduction.lhs.:(p1)Production.lhs.:(p2))inletimport_lr1lr1=letreds=import_red(G.Lr1.get_reductions(Lr1.to_glr1))inn:=!n+List.lengthreds;redsinVector.initLr1.nimport_lr1includeUC_reduction.Const(structtypet=gletcardinal=!nend)letstate=Vector.make'n(fun()->Index.of_intLr1.n0)letproduction=Vector.make'n(fun()->Index.of_intProduction.n0)letlookaheads=Vector.makenIndexSet.emptyletfrom_lr1=letenum=Index.enumerateninVector.mapi(funlr1reds->List.fold_left(funset(prod,la)->leti=enum()instate.:(i)<-lr1;production.:(i)<-prod;lookaheads.:(i)<-la;IndexSet.addiset)IndexSet.emptyreds)rawendletgrammar={raw=(moduleG);terminal_n=Terminal.n;terminal_all=Terminal.all;terminal_regular=Terminal.regular;terminal_table=Hashtbl.create7;nonterminal_n=Nonterminal.n;nonterminal_all=Nonterminal.all;nonterminal_table=Hashtbl.create7;symbol_all=Symbol.all;production_lhs=Production.lhs;production_rhs=Production.rhs;production_all=Production.all;item_productions=Item.productions;item_offsets=Item.offsets;lr0_items=Lr0.items;lr0_incoming=Lr0.incoming;lr0_is_entrypoint=Lr0.is_entrypoint;transition_source=Transition.sources;transition_target=Transition.targets;transition_shift_sym=Transition.shift_sym;(*transition_shift_table = Transition.shift_table;*)transition_goto_sym=Transition.goto_sym;transition_goto_table=Transition.goto_table;transition_predecessors=Transition.predecessors;transition_successors=Transition.successors;transition_accepting=Transition.accepting;lr1_all=Lr1.all;lr1_lr0=Lr1.lr0;lr1_wait=Lr1_extra.wait;lr1_accepting=Lr1_extra.accepting;lr1_reduce_on=Lr1_extra.reduce_on;lr1_shift_on=Lr1_extra.shift_on;lr1_reject=Lr1_extra.reject;lr1_entrypoints=Lr1_extra.entrypoints;lr1_entrypoint_table=Lr1_extra.entrypoint_table;lr1_predecessors=iterate_vectorLr1_extra.predecessors;reduction_state=Reduction.state;reduction_production=Reduction.production;reduction_lookaheads=Reduction.lookaheads;reduction_from_lr1=Reduction.from_lr1;}endendmoduletypeINDEXED=sigtype'gnvalcardinal:'ggrammar->'gncardinalvalof_int:'ggrammar->int->'gnindexendmoduleTerminal=structtype'gn='gterminalletcardinalg=g.terminal_nletof_intgi=Index.of_int(cardinalg)iletto_stringgi=letopen(valg.raw)inTerminal.name(Terminal.of_int(Index.to_inti))letallg=g.terminal_allletregularg=g.terminal_regularletsemantic_valuegi=letopen(valg.raw)inTerminal.typ(Terminal.of_int(Index.to_inti))letintersectgab=ifa==g.terminal_allthenbelseifb==g.terminal_allthenaelseIndexSet.interabletis_errorgi=letopen(valg.raw)inmatchTerminal.kind(Terminal.of_int(i:_index:>int))with|`ERROR->true|_->falseletlookaheads_to_stringgla=matchIndexSet.cardinallawith|nwhenn>10->Printf.sprintf"<%d lookaheads>"n|_->string_concat_map~wrap:("<",">")","(to_stringg)(IndexSet.elementsla)letterminal_tableg=ifHashtbl.lengthg.terminal_table=0thenIndex.iter(cardinalg)(funt->Hashtbl.addg.terminal_table(to_stringgt)t);g.terminal_tableletfindg?(approx=3)name=lettable=terminal_tableginmatchHashtbl.find_opttablename,approxwith|Somet,_->Result.Okt|None,0->Result.Error[]|None,dist->Result.Error(Damerau_levenshtein.filter_approx~distname(Hashtbl.to_seqtable))endmoduleNonterminal=structtype'gn='gnonterminalletcardinalg=g.nonterminal_nletof_intgi=Index.of_int(cardinalg)iletallg=g.nonterminal_allletto_stringgi=letopen(valg.raw)inNonterminal.name(Nonterminal.of_int(Index.to_inti))letto_mangled_stringgi=letopen(valg.raw)inNonterminal.mangled_name(Nonterminal.of_int(Index.to_inti))letfind_mangledgstr=letenum=Index.enumerate(cardinalg)inletrecloop()=leti=enum()inifto_mangled_stringgi=strthenielseloop()inmatchloop()with|i->Somei|exceptionIndex.End_of_set->Noneletkindgi=letopen(valg.raw)inNonterminal.kind(Nonterminal.of_int(Index.to_inti))letsemantic_valuegi=letopen(valg.raw)inNonterminal.typ(Nonterminal.of_int(Index.to_inti))letnullablegi=letopen(valg.raw)inNonterminal.nullable(Nonterminal.of_int(Index.to_inti))letfirstgi=letopen(valg.raw)inNonterminal.of_int(Index.to_inti)|>Nonterminal.first|>List.map(funt->Index.of_intg.terminal_n(Terminal.to_intt))|>IndexSet.of_listletnonterminal_tableg=ifHashtbl.lengthg.nonterminal_table=0thenIndex.iter(cardinalg)(funt->Hashtbl.addg.nonterminal_table(to_stringgt)t);g.nonterminal_tableletfindg?(approx=3)name=lettable=nonterminal_tableginmatchHashtbl.find_opttablename,approxwith|Somet,_->Result.Okt|None,0->Result.Error(`Dym[])|None,dist->matchfind_mangledgnamewith|Somei->Result.Error(`Mangledi)|None->letcandidates=Damerau_levenshtein.filter_approx~distname(Hashtbl.to_seqtable)inResult.Error(`Dymcandidates)endmoduleSymbol=structtype'gn='gsymbolletcardinalg=Sum.cardinalg.terminal_ng.nonterminal_nletof_intgi=Index.of_int(cardinalg)itype'gdesc=|Tof'gterminalindex|Nof'gnonterminalindexletprjgi=Sum.prjg.terminal_niletdescgi=matchprjgiwith|Lt->Tt|Rn->Nnletis_terminalgt=matchprjgtwith|L_->true|R_->falseletis_nonterminalgt=matchprjgtwith|L_->false|R_->trueletto_stringg?mangledt=letopen(valg.raw)inmatchprjgtwith|Lt->symbol_name?mangled(T(Terminal.of_int(Index.to_intt)))|Rn->symbol_name?mangled(N(Nonterminal.of_int(Index.to_intn)))letsemantic_valuegt=matchprjgtwith|Lt->Some(Option.value(Terminal.semantic_valuegt)~default:"unit")|Rn->Nonterminal.semantic_valuegnletallg=g.symbol_allletinj_t_t=Sum.inj_ltletinj_ngn=Sum.inj_rg.terminal_nnletfindg?(approx=3)name=letttable=Terminal.terminal_tableginmatchHashtbl.find_optttablenamewith|Somet->Result.Ok(inj_tgt)|None->letntable=Nonterminal.nonterminal_tableginmatchHashtbl.find_optntablename,approxwith|Somen,_->Result.Ok(inj_ngn)|None,0->Result.Error(`Dym[])|None,dist->matchNonterminal.find_mangledgnamewith|Somei->Result.Error(`Mangledi)|None->letcandidates=Damerau_levenshtein.filter_approx~distname(Seq.append(Seq.map(fun(s,t)->(s,inj_tgt))(Hashtbl.to_seqttable))(Seq.map(fun(s,n)->(s,inj_ngn))(Hashtbl.to_seqntable)))inResult.Error(`Dymcandidates)endmoduleProduction=structtype'gn='gproductionletcardinalg=Vector.lengthg.production_lhsletof_intgi=Index.of_int(cardinalg)iletlhsgi=g.production_lhs.:(i)letrhsgi=g.production_rhs.:(i)letlengthgi=Array.length(rhsgi)letkindgi=letopen(valg.raw)inProduction.kind(Production.of_int(Index.to_inti))letallg=g.production_allend(* Explicit representation of LR(0) items *)moduleItem=structtype'gn='gitemletcardinalg=Vector.lengthg.item_productionsletof_intgi=Index.of_int(cardinalg)iletmakegprodpos=ifpos<0||pos>Production.lengthgprodtheninvalid_arg"Info.Item.make: pos out of bounds";Index.of_int(cardinalg)(g.item_offsets.:(prod)+pos)letlastgprod=makegprod(Production.lengthgprod)letproductiongi=g.item_productions.:(i)letpositiongi=((i:_index:>int)-g.item_offsets.:(productiongi))letdescgi=letprod=productiongiin(prod,(i:_index:>int)-g.item_offsets.:(prod))letprevg(i:'gnindex)=matchIndex.prediwith|Somejwhennot(Index.equal(productiongi)(productiongj))->None|result->resultletis_reduciblegi=letprod=productiongiin((i:_index:>int)-g.item_offsets.:(prod))=Production.lengthgprodletto_stringgi=letprod,pos=descgiinletb=Buffer.create63inBuffer.add_stringb(Nonterminal.to_stringg(Production.lhsgprod));Buffer.add_charb':';letrhs=Production.rhsgprodinletadd_symsym=Buffer.add_charb' ';Buffer.add_stringb(Symbol.to_stringgsym);infori=0topos-1doadd_symrhs.(i)done;Buffer.add_stringb" .";fori=postoArray.lengthrhs-1doadd_symrhs.(i)done;Buffer.contentsbendmoduleLr0=structtype'gn='glr0letcardinalg=Vector.lengthg.lr0_itemsletof_intgi=Index.of_int(cardinalg)i(* See [Lr1.incoming]. *)letincominggi=g.lr0_incoming.:(i)(* See [Lr1.items]. *)letitemsgi=g.lr0_items.:(i)(* If the state is an initial state, returns the pseudo (start)
production that recognizes this entrypoint. *)letis_entrypointgi=g.lr0_is_entrypoint.:(i)endmoduleLr1=structtype'gn='glr1letcardinalg=Vector.lengthg.lr1_reduce_onletof_intgi=Index.of_int(cardinalg)iletallg=g.lr1_allletacceptingg=g.lr1_accepting(* A ``wait'' state is an LR(1) state in which the parser needs to look at
more input before knowing how to proceed.
Wait states are the initial states and the targets of SHIFT transitions
(states with a terminal as incoming symbol), except the accepting ones
(after reading EOF, the only valid action is to reduce). *)letwaitg=g.lr1_wait(* Get the LR(0) "core" state *)letto_lr0gi=g.lr1_lr0.:(i)(* The symbol annotating the incoming transitions of a state.
There is none for initial states, and at most one for others. *)letincominggi=Lr0.incomingg(to_lr0gi)(* Get the items in the kernel of a state (before closure). *)letitemsgi=Lr0.itemsg(to_lr0gi)letis_entrypointgi=Lr0.is_entrypointg(to_lr0gi)letentrypoint_tableg=g.lr1_entrypoint_tableletentrypointsg=g.lr1_entrypoints(* Printing functions, for debug purposes.
Not nice for the end-user (FIXME). *)letsymbol_to_stringglr1=matchincomingglr1with|Somesym->Symbol.to_stringgsym|None->letentrypoint=Option.get(is_entrypointglr1)in(Symbol.to_stringg(Production.rhsgentrypoint).(0)^":")letto_stringglr1=string_of_indexlr1^":"^symbol_to_stringglr1letlist_to_stringglr1s=string_concat_map~wrap:("[","]")"; "(to_stringg)lr1sletset_to_stringglr1s=string_concat_map~wrap:("{","}")", "(to_stringg)(IndexSet.elementslr1s)(** [shift_on t] is the set of lookaheads that state [t] can shift *)letshift_ongi=g.lr1_shift_on.:(i)(** [reduce_on t] is the set of lookaheads that trigger a reduction in state
[t] *)letreduce_ongi=g.lr1_reduce_on.:(i)(** [reject t] is set of lookaheads that cause the automaton to fail when in
state [t] *)letrejectgi=g.lr1_reject.:(i)(** [predecessors t] is the set of LR(1) states that have transition going
to [t]. *)letpredecessorsgi=g.lr1_predecessors.:(i)(** Wrapper around [IndexSet.inter] speeding-up intersection with [all] *)letintersectgab=ifa==g.lr1_allthenbelseifb==g.lr1_allthenaelseIndexSet.interabletdefault_reductiongi=letopen(valg.raw)inmatchLr1.default_reduction(Lr1.of_int(i:_index:>int))with|None->None|Somep->Some(Index.of_int(Vector.lengthg.production_rhs)(Production.to_intp))endmoduleReduction=structtype'gn='greductionletcardinalg=Vector.lengthg.reduction_productionletof_intgi=Index.of_int(cardinalg)i(* A reduction is a triple [(lr1, prod, lookaheads)], meaning that:
in state [lr1], when looking ahead at a terminal in [lookaheads], the
action is to reduce [prod]. *)letstategi=g.reduction_state.:(i)letproductiongi=g.reduction_production.:(i)letlookaheadsgi=g.reduction_lookaheads.:(i)(* All reductions applicable to an lr1 state. *)letfrom_lr1glr1=g.reduction_from_lr1.:(lr1)endmoduleTransition=struct(* The set of goto transitions *)letgotog=Vector.lengthg.transition_goto_sym(* The set of all transitions = goto U shift *)letanyg=Vector.lengthg.transition_source(* The set of shift transitions *)letshiftg=Vector.lengthg.transition_shift_sym(* Inject goto into any *)letof_goto_gi=Sum.inj_li(* Inject shift into any *)letof_shiftgi=Sum.inj_r(gotog)i(* Project a transition into a goto or a shift transition *)letsplitgi=Sum.prj(gotog)i(* [find_goto s nt] finds the goto transition originating from [s] and
labelled by [nt], or raise [Not_found]. *)letfind_gotoglr1nt=matchIndexMap.find_optntg.transition_goto_table.:(lr1)with|Somegt->gt|None->Printf.ksprintfinvalid_arg"find_goto(%s, %s)"(Lr1.to_stringglr1)(Nonterminal.to_stringglr1)letfind_goto_targetglr1nt=g.transition_target.:(of_gotog(find_gotoglr1nt))(* Get the source state of a transition *)letsourcegi=g.transition_source.:(i)(* Get the target state of a transition *)lettargetgi=g.transition_target.:(i)(* Symbol that labels a transition *)letsymbolgi=matchsplitgiwith|Li->Sum.inj_rg.terminal_ng.transition_goto_sym.:(i)|Ri->Sum.inj_lg.transition_shift_sym.:(i)(* Symbol that labels a goto transition *)letgoto_symbolgi=g.transition_goto_sym.:(i)(* Symbol that labels a shift transition *)letshift_symbolgi=g.transition_shift_sym.:(i)(* [successors s] returns all the transitions [tr] such that
[source tr = s] *)letsuccessorsgi=g.transition_successors.:(i)(* [predecessors s] returns all the transitions [tr] such that
[target tr = s] *)letpredecessorsgi=g.transition_predecessors.:(i)(* Accepting transitions are goto transitions from an initial state to an
accepting state, recognizing one of the grammar entrypoint. *)letacceptingg=g.transition_acceptingletto_stringgtr=Printf.sprintf"%s -> %s"(Lr1.to_stringg(sourcegtr))(Lr1.to_stringg(targetgtr))letfindgsrctgt=letinter=IndexSet.inter(successorsgsrc)(predecessorsgtgt)inassert(IndexSet.is_emptyinter||IndexSet.is_singletoninter);IndexSet.minimuminterend