123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548(* MIT License
Copyright (c) 2025 Frédéric Bour
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*)(** This module is responsible for computing viable reductions in a LR(1) parser
generator. It generates a graph of states, where each state represents a
configuration of the parser, including the top of the stack, the rest of the
stack, and the current lookahead set. The module also computes transitions
between these states based on possible reductions and goto actions.
*)openFix.IndexingopenUtilsopenMiscopenInfo(*let printf_debug = false*)(* Step 1: pre-compute closure of ϵ-reductions *)(* Group items being reduced by their depth (reductions with zero, one, two producers, ...). *)letgroup_reductionsg=function|[]->[]|items->letrecgroupdepthacc=function|[]->[acc]|(it,la)::restwhendepth=Item.positiongit->letlhs=Production.lhsg(Item.productiongit)ingroupdepth(IndexMap.updatelhs(union_updatela)acc)rest|otherwise->acc::group(depth+1)IndexMap.emptyotherwiseinletcompare_items(it1,_)(it2,_)=Int.compare(Item.positiongit1)(Item.positiongit2)ingroup0IndexMap.empty(List.sortcompare_itemsitems)type'gstack_tree={subs:('glr1indexlist*'gterminalindexset*'gstack_tree)list;}[@@ocaml.unboxed]type'greduction_closure={accepting:'gterminalindexset;failing:'gterminalindexset;reductions:('gnonterminal,'gterminalindexset)indexmaplist;stacks:'gstack_tree;}type('g,'n)reduction_closures=('n,'greduction_closure)vectorletadd_subsetgrsetla=r:=IndexSet.union(Terminal.intersectgsetla)!r(* Close ϵ-reductions of each LR(1) states *)letclose_lr1_reductions(typeg)(g:ggrammar):(glr1,greduction_closure)vector=Vector.init(Lr1.cardinalg)@@funlr1->letaccepting=refIndexSet.emptyinletfailing=refIndexSet.emptyinletitems=ref[]inletrecpoplookaheadacc(item:gitemindex)=function|[]->pushitems(item,lookahead);acc|hd::tlasstack->matchItem.prevgitemwith|Someitem'->poplookaheadaccitem'tl|None->letlhs=Production.lhsg(Item.productiongitem)inletstack=Transition.find_goto_targetghdlhs::stackinletsubs=reducelookahead[]stackin(stack,lookahead,{subs})::accandreducelookaheadaccstack=letlr1=List.hdstackinadd_subsetgfailing(Lr1.rejectglr1)lookahead;add_subsetgaccepting(Lr1.shift_onglr1)lookahead;IndexSet.foldbeginfunredacc->matchTerminal.intersectg(Reduction.lookaheadsgred)lookaheadwith|lawhenIndexSet.is_emptyla->acc|la->poplaacc(Item.lastg(Reduction.productiongred))stackend(Reduction.from_lr1glr1)accinletsubs=reduce(Terminal.allg)[][lr1]inletreductions=group_reductionsg!itemsinletfailing=!failinginletaccepting=!acceptingin{accepting;failing;reductions;stacks={subs}}(*let rec filter_reductions g la = function
| [] -> []
| r :: rs as rrs ->
let filtered = ref false in
let r' =
IndexMap.filter_map (fun _ la' ->
let la'' = Terminal.intersect g la la' in
if la' != la'' then filtered := true;
if IndexSet.is_empty la'' then None else Some la''
) r
in
let rs' = filter_reductions g la rs in
if rs == rs' && not !filtered
then rrs
else r' :: rs'
let rec filter_stacks g la acc = function
| [] -> acc
| (x, la') :: xs ->
let la' = Terminal.intersect g la la' in
let acc =
if IndexSet.is_empty la'
then acc
else (x, la') :: acc
in
filter_stacks g la' acc xs
let rec merge_reduction_step map acc = function
| [] -> (map, acc)
| [] :: _ -> assert false
| (r :: rs) :: rrs ->
let acc = if list_is_empty rs then acc else rs :: acc in
let augment _ a b = Some (IndexSet.union a b) in
let map = IndexMap.union augment r map in
merge_reduction_step map acc rrs
let rec merge_reductions = function
| [] -> []
| rrs ->
let r, rrs' = merge_reduction_step IndexMap.empty [] rrs in
r :: merge_reductions rrs'*)(* Close reductions of goto transitions *)(*let close_goto_reductions (type g) (g : g grammar) rcs
: (g goto_transition, g reduction_closure) vector
=
let sentinel = {accepting = IndexSet.empty; failing = IndexSet.empty;
reductions = []; stacks = {sub=[]}} in
let table = Vector.make (Transition.goto g) sentinel in
Index.rev_iter (Transition.goto g) begin fun gt ->
if printf_debug then
Printf.printf "## Closing %s\n"
(Transition.to_string g (Transition.of_goto g gt));
let tr = Transition.of_goto g gt in
let src = Transition.source g tr in
let tgt = Transition.target g tr in
let stacks = ref [] in
let reductions = ref [] in
let push_reductions = function
| [] -> ()
| rs -> push reductions rs
in
let failing = ref IndexSet.empty in
let accepting = ref IndexSet.empty in
let rec visit_target tgt la =
let rc = rcs.:(tgt) in
if printf_debug then
Printf.printf "- reaching target %s @ %s\n"
(Lr1.to_string g tgt)
(Terminal.lookaheads_to_string g la);
add_subset g failing rc.failing la;
add_subset g accepting rc.accepting la;
if printf_debug then
Printf.printf "importing %d stacks\n" (List.length rc.stacks);
stacks := ([tgt], la) :: filter_stacks g la !stacks rc.stacks;
match filter_reductions g la rc.reductions with
| [] -> ()
| r :: rs ->
push_reductions rs;
if printf_debug then
Printf.printf "importing %d reductions\n" (List.length rs);
IndexMap.iter visit_nt r
and visit_nt nt la =
let gt' = Transition.find_goto g src nt in
if true || Index.compare gt' gt <= 0 then
visit_target (Transition.target g (Transition.of_goto g gt')) la
else
let rc = table.:(gt') in
add_subset g failing rc.failing la;
add_subset g accepting rc.accepting la;
stacks := filter_stacks g la !stacks rc.stacks;
push_reductions (filter_reductions g la rc.reductions)
in
visit_target tgt (Terminal.all g);
let failing = !failing in
let accepting = !accepting in
let stacks = !stacks in
let reductions = merge_reductions !reductions in
table.:(gt) <- {accepting; failing; reductions; stacks}
end;
flush stdout;
table
*)letdump_closure?(failing=false)gprint_labelvector=Vector.iteribeginfunstdef->lethas_failing=failing&&IndexSet.is_not_emptydef.failinginlethas_reductions=not(list_is_emptydef.reductions)inlethas_stacks=not(list_is_emptydef.stacks.subs)inifhas_failing||has_reductions||has_stacksthenPrintf.fprintfstdout"%s:\n"(print_labelst);ifhas_failingthenPrintf.fprintfstdout"- failing: %s\n"(string_of_indexset~index:(Terminal.to_stringg)def.failing);ifhas_reductionsthen(Printf.fprintfstdout"- reductions:\n";List.iter(funmap->letfirst=reftrueinIndexMap.iter(funntla->if!firstthen(Printf.fprintfstdout" - ";first:=false)elsePrintf.fprintfstdout" ";Printf.fprintfstdout"%s @ %s\n"(Nonterminal.to_stringgnt)(Terminal.lookaheads_to_stringgla);)map)def.reductions);letrecprint_stacksindent=function|{subs=[]}->()|{subs}->letindent=" "^indentinList.iterbeginfun(stack,la,sub')->Printf.fprintfstdout"%s- %s @ %s\n"indent(Lr1.list_to_stringgstack)(Terminal.lookaheads_to_stringgla);print_stacksindentsub'endsubsinifhas_stacksthenPrintf.fprintfstdout"- stacks:\n";print_stacks""def.stacks;endvector(* Reduction targets indexation *)moduleTarget=Unsafe_cardinal()type'gtarget='gTarget.ttype'gtargets=('gtarget,'gterminalindexset)indexmaptype'gtarget_trie={mutablesub:('glr1,'gtarget_trie)indexmap;mutableimmediates:'glr1indexset;mutabletargets:('glr1,'gtargetindex)indexmap;}letindex_targets(typeg)(g:ggrammar)rc:gtarget_trie*(ggoto_transition,gtargets)vector=(* Index sources of goto transitions *)letgoto_sources=Vector.make(Lr1.cardinalg)IndexSet.emptyinIndex.rev_iter(Transition.gotog)beginfungt->lettr=(Transition.of_gotoggt)ingoto_sources.@(Transition.targetgtr)<-IndexSet.addgtend;(* Allocate target identifiers *)letmoduleGen=Gensym()inletopenTarget.Eq(structtypet=gincludeGenend)inletRefl=eqin(* Targets by goto transition *)letby_goto=Vector.make(Transition.gotog)IndexMap.emptyin(* Manage trie nodes *)letfresh_node()={sub=IndexMap.empty;immediates=IndexSet.empty;targets=IndexMap.empty;}inletget_child(node,lr1)=matchIndexMap.find_optlr1node.subwith|Somenode'->node'|None->letnode'=fresh_node()innode.sub<-IndexMap.addlr1node'node.sub;node'inletroot=fresh_node()inroot.immediates<-Lr1.allg;letrecfollow_path=function|[]->assertfalse|[lr1]->(root,lr1)|lr1::path->(get_child(follow_pathpath),lr1)in(* Construct target trie *)Index.rev_iter(Lr1.cardinalg)beginfuntgt->(* For each LR(1), there are three sources of reduction targets:
- stacks directly reachable from this state,
these are marked as "immediate" in the trie
- goto transitions reaching this target (found using the goto_sources)
- composition of both
*)letrecvisit_stacksacc{subs}=List.fold_leftbeginfunacc(stack,la,sub')->letacc=(follow_pathstack,la)::accinvisit_stacksaccsub'endaccsubsinletroots=visit_stacks[]rc.:(tgt).stacksin(* 1. Register immediates *)List.iter(fun((node,lr1),_)->node.immediates<-IndexSet.addlr1node.immediates)roots;(* Goto sources *)letsources=goto_sources.:(tgt)inifIndexSet.is_not_emptysourcesthen(* Prepend all goto transitions (by construction, rc stacks already end with tgt) *)letroots=(get_child(root,tgt),Terminal.allg)::List.map(fun(root,la)->(get_childroot,la))rootsinList.iterbeginfun(root,la)->IndexSet.iterbeginfungt->letsrc=Transition.sourceg(Transition.of_gotoggt)inletindex=matchIndexMap.find_optsrcroot.targetswith|Someindex->index|None->letindex=Gen.fresh()inroot.targets<-IndexMap.addsrcindexroot.targets;indexinby_goto.@(gt)<-IndexMap.addindexlaendsources;endrootsend;stopwatch2"indexed %d targets"(cardinalGen.n);(* Done *)(root,by_goto)(* Graph construction *)moduleStep=Unsafe_cardinal()type'gstep='gStep.tletget_stream?(initial=0)stream=lets=refstreaminletd=refinitialinfuni->assert(i>=!d);whilei>!ddos:=Lazy.force(!s).lnext;incrd;done;(!s).lvaluetype'gtransition={reached:'gtargetindexset;reachable:'gtargetindexset;step:'gstepindex;}type'ggraph=('gstep,('glr1,'gtransitionlist)indexmap)vectorletmake(typeg)(g:ggrammar)rctargets:ggraph=letopenIndexBufferinletmoduleCells=Gensym()inletmoduleLinks=Gen.Make()inletcells:(Cells.n,glr1indexset)Dyn.t=Dyn.makeIndexSet.emptyinletopenstructtypelabel=glr1index*gtargetindexset*int*Cells.nindex*Cells.nindex*glr1indexsetendinletlinks:(Links.n,label)Gen.t=Links.get_generator()inlettable=Vector.make(Nonterminal.cardinalg)IndexSet.Map.emptyinletget_cellntla=letmap0=table.:(nt)inmatchIndexSet.Map.find_optlamap0with|Someindex->index|None->letindex=Cells.fresh()intable.:(nt)<-IndexSet.Map.addlaindexmap0;indexinletinitial=Cells.fresh()inletsink=Cells.fresh()inletrecexplore_cellcellntlasrc=letgt=Transition.find_gotogsrcntinletreached=IndexMap.deflatetargets.:(gt)(fun_la'->not(IndexSet.disjointlala'));inletpredecessors=get_stream(Lr1.predecessorsgsrc)inlettgt=Transition.targetg(Transition.of_gotoggt)inexplore_transitionscellsrcreachedlapredecessorsrc.:(tgt).reductionsandexplore_transitionscell0srcreachedla0predecessorsreductions=letresult=ref[]inList.iteribeginfundepthgoto->IndexMap.iterbeginfunntla->letla=IndexSet.interla0lainifIndexSet.is_not_emptylathen(letcell=get_cellntlainletstates=predecessorsdepthinletdone_=Dyn.getcellscellinlettodo=IndexSet.diffstatesdone_inpushresult(src,reached,depth,cell0,cell,states);ifIndexSet.is_not_emptytodothen(Dyn.setcellscell(IndexSet.uniontododone_);IndexSet.rev_iter(explore_cellcellntla)todo;));endgotoendreductions;match!resultwith|[]->ignore(Gen.addlinks(src,reached,0,cell0,sink,IndexSet.empty));|result->List.iter(funtr->ignore(Gen.addlinkstr))resultinIndex.iter(Lr1.cardinalg)beginfunlr1->letpredecessors=get_stream~initial:(-1)(Lr1.predecessorsglr1)inexplore_transitionsinitiallr1IndexSet.empty(Terminal.regularg)predecessorsrc.:(lr1).reductionsend;stopwatch2"raw redgraph: %d cells, %d links"(cardinalCells.n)(cardinalLinks.n);letmoduleMin=Valmari.Minimize(structtypet=labelletcompare(lr1,targets1,depth1,_src1,_dst1,states1)(lr2,targets2,depth2,_src2,_dst2,states2)=letc=Index.comparelr1lr2inifc<>0thencelseletc=Int.comparedepth1depth2inifc<>0thencelseletc=IndexSet.comparetargets1targets2inifc<>0thencelseletc=IndexSet.comparestates1states2incend)(structtypestates=Cells.nletstates=Cells.ntypetransitions=Links.nlettransitions=Links.nletsourcetr=let(_,_,_,x,_,_)=Gen.getlinkstrinxlettargettr=let(_,_,_,_,x,_)=Gen.getlinkstrinxletlabeltr=Gen.getlinkstrletinitialsf=finitialletfinalsf=Index.iterCells.nfletrefinementsf=f(fun~add->addinitial);f(fun~add->addsink)end)inletinitial=Option.get(Min.transport_stateinitial)inletsink=Option.get(Min.transport_statesink)instopwatch2"minimized redgraph: %d cells, %d links"(cardinalMin.states)(cardinalMin.transitions);letcells_outgoing=Vector.makeMin.statesIndexMap.emptyinletcells_depth=Vector.makeMin.states0inIndex.rev_iterMin.transitionsbeginfuntr->letsource=Min.sourcetrinlettarget=Min.targettrinletlr,_,depth,_,_,_=Min.labeltrincells_outgoing.@(source)<-IndexMap.updatelr(add_updatetr);cells_depth.@(target)<-Int.maxdepthend;stopwatch2"redgraph: indexed transitions";letsuccftr=let(_,_,_,_,_,states)=Min.labeltrinletoutgoing=cells_outgoing.:(Min.targettr)inIndexSet.rev_iter(funsrc->IndexSet.iterf(IndexMap.findsrcoutgoing))statesinletreachable=Vector.initMin.transitions(funtr->letacc=refIndexSet.emptyinsucc(funtr'->let(_,targets,_,_,_,_)=Min.labeltr'inacc:=IndexSet.uniontargets!acc)tr;!acc)inTarjan.close_relationsuccreachable;stopwatch2"redgraph: reachability closure";letmoduleSteps=Step.Const(structtypet=gletcardinal=Vector.fold_left(+)(Vector.length_as_intcells_depth-1)cells_depthlet()=stopwatch2"redgraph: %d steps"cardinalend)inletenum=Index.enumerateSteps.ninletstep_zero=enum()inletcells_steps=Vector.mapi(funcelldepth->ifcell=initial||cell=sinkthenstep_zeroelse(for_=0todepth-1doignore(enum())done;enum()))cells_depthinletsteps=Vector.makeSteps.nIndexMap.emptyinVector.rev_iteribeginfuncellstep->steps.:(step)<-IndexMap.mapbeginfuntrs->List.map(funtr->let(_,reached,depth,_,_,_)=Min.labeltrinletreachable=reachable.:(tr)inlettarget=cells_steps.:(Min.targettr)inletstep=Index.of_intSteps.n(Index.to_inttarget-depth)in{reached;reachable;step})(IndexSet.elementstrs)endcells_outgoing.:(cell)endcells_steps;stepstype'gaction=|Advanceof'gstepindex|Switchof('glr1,'gtransitionlist)indexmapletinitial(typeg)(gr:ggraph)(lr1:glr1index)=matchIndexMap.find_optlr1(Vector.as_arraygr).(0)with|None->[]|Somel->lletfollowgrstep=match(step:_index:>int)with|0->SwitchIndexMap.empty|step'->letmap=gr.:(step)inifIndexMap.is_emptymapthenAdvance(Index.of_int(Vector.lengthgr)(step'+1))elseSwitchmap