123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375(* The MIT License (MIT)
Copyright (c) 2025 Frédéric Bour
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*)(** This module implements the regular expressions used by LRGrep.
It provides functions for creating, comparing, and deriving regular
expressions and continuations, which appear during the derivation process.
It is parameterized by the `Info` and `Redgraph` modules, which are used to
provide information about the LR automaton and its (viable) reductions,
respectively. *)openFix.IndexingopenUtilsopenMiscopenInfo(** The Capture module defines types and functions for representing variables
captured in regular expressions.
It uses an index type to uniquely identify a capture in an expression. *)moduleCapture:sigtypentypet=nindextypeset=nindexsettype'amap=(n,'a)indexmap(* The gensym is instantiated separately for each expression *)valgensym:unit->unit->nindexend=structincludePositivetypet=nindextypeset=nindexsettype'amap=(n,'a)indexmapletgensym()=letr=ref(-1)infun()->incrr;Index.of_intn!rend(** The RE module type defines the signature for regular expressions, including
types for reductions, unique IDs to identify sub-terms, and the regular
expression terms themselves.
It also includes functions for creating, comparing, and converting regular
expressions to a Cmon document. *)moduleReductions=structtype'gt={pattern:'gRedgraph.targetindexset;capture:Capture.set;policy:Syntax.quantifier_kind;}letcomparer1r2=ifr1==r2then0elseletc=IndexSet.comparer1.patternr2.patterninifc<>0thencelseletc=IndexSet.comparer1.capturer2.captureincletcmon{capture=_;pattern;policy}=Cmon.record[(*"capture", cmon_indexset capture;*)"pattern_domain",cmon_set_cardinal(*cmon_indexset*)pattern;"policy",Syntax.cmon_quantifier_kindpolicy;]endmoduleExpr=struct(** Integers that serves has unique id to identify sub-terms.
Thanks to properties of Antimirov's derivatives, no new term is
introduced during derivation. All terms are produced during initial
parsing. *)typeuid=intletuid=letk=ref0infun()->incrk;!ktype'gt={uid:uid;desc:'gdesc;position:Syntax.position;}(** The different constructors of regular expressions*)and'gdesc=|Setof'glr1indexset*Capture.set(** Recognise a set of states, and optionally bind the matching state to
a variable. *)|Altof'gtlist(** [Alt ts] is the disjunction of sub-terms [ts] (length >= 2).
[Alt []] represents the empty language. *)|Seqof'gtlist(** [Seq ts] is the concatenation of sub-terms [ts] (length >= 2).
[Seq []] represents the {ε}. *)|Starof'gt*Syntax.quantifier_kind(** [Star t] is represents the Kleene star of [t] *)|Filterof'glr1indexset|ReduceofCapture.set*'gReductions.t(** The reduction operator *)|UsageofUsage.set(** A regular expression term with its unique ID, its description and its
position. *)letempty={uid=0;desc=Alt[];position=Lexing.dummy_pos}(** Introduce a new term, allocating a unique ID *)letmakepositiondesc={uid=uid();desc;position}(** Compare two terms *)letcomparet1t2=Int.comparet1.uidt2.uidletcmon?(lr1=cmon_index)t=letrecauxt=matcht.descwith|Set(lr1s,_var)->Cmon.construct"Set"[cmon_indexset~index:lr1lr1s]|Altts->Cmon.constructor"Alt"(Cmon.list_mapauxts)|Seqts->Cmon.constructor"Seq"(Cmon.list_mapauxts)|Star(t,qk)->Cmon.construct"Star"[auxt;Syntax.cmon_quantifier_kindqk]|Filterlr1s->Cmon.constructor"Filter"(cmon_indexset~index:lr1lr1s)|Reduce(_var,r)->Cmon.construct"Reduce"[(*cmon_indexset var;*)Reductions.cmonr]|Usage_->Cmon.constant"Usage"inauxtendmoduleLabel=structtype'gt={filter:'glr1indexset;captures:Capture.set;usage:Usage.set;}letcomparel1l2=ifl1==l2then0elseletc=IndexSet.comparel1.filterl2.filterinifc<>0thencelseIndexSet.comparel1.capturesl2.capturesletfilterlabelfilter=letfilter=IndexSet.interlabel.filterfilterinifIndexSet.is_emptyfilterthenNoneelseSome{labelwithfilter}letunionl1l2={filter=IndexSet.unionl1.filterl2.filter;captures=IndexSet.unionl1.capturesl2.captures;usage=Usage.joinl1.usagel2.usage;}letcapturelabelvarsusage=ifIndexSet.is_emptyvars&&Usage.is_emptyusagethenlabelelse{labelwithcaptures=IndexSet.unionlabel.capturesvars;usage=Usage.joinlabel.usageusage}endmoduleK=structtype'gt=|Accept|Done|Moreof'gExpr.t*'gt|Reducingof{reduction:'gReductions.t;steps:'gRedgraph.stepindexset;next:'gt;}letcmon?lr1?stepk=letrecaux=function|Accept->Cmon.constant"Accept"|Done->Cmon.constant"Done"|More(e,t)->Cmon.construct"More"[Expr.cmon?lr1e;auxt]|Reducing{reduction=_;steps;next}->Cmon.crecord"Reducing"["reduction",Cmon.constant"...";"steps",cmon_indexset?index:stepsteps;"next",auxnext;]inauxkletreccomparet1t2=ift1==t2then0elsematcht1,t2with|Accept,Accept->0|Done,Done->0|More(e1,t1'),More(e2,t2')->letc=Expr.comparee1e2inifc<>0thencelsecomparet1't2'|Reducingr1,Reducingr2->letc=Reductions.comparer1.reductionr2.reductioninifc<>0thencelseletc=IndexSet.comparer1.stepsr2.stepsinifc<>0thencelsecomparer1.nextr2.next|Accept,(More_|Reducing_|Done)->-1|Done,(More_|Reducing_)->-1|(More_|Reducing_|Done),Accept->+1|(More_|Reducing_),Done->+1|More_,Reducing_->-1|Reducing_,More_->+1letintersectings1s2=not(IndexSet.disjoints1s2)letderive(typeg)(_g:ggrammar)(rg:gRedgraph.graph)filterk=letcontinuerlabelnext=match!rwith|(label',next')::r'whennext'==next->r:=(Label.unionlabel'label,next)::r'|r'->r:=(label,next)::r'inletks=ref[]inletrecprocess_reduction_stepmatchingnext_stepsfilter(reduction:_Reductions.t)step=matchRedgraph.followrgstepwith|Advancestep'->next_steps:=IndexMap.updatestep'(union_updatefilter)!next_steps|Switchmap->letmatching'=refIndexSet.emptyinIndexMap.rev_iterbeginfun(lr1,trs)->ifIndexSet.memlr1filterthen(lethas_match=reffalseinList.iterbeginfun(tr:_Redgraph.transition)->ifnot!has_matchthenhas_match:=intersectingtr.reachedreduction.pattern;ifintersectingtr.reachablereduction.patternthenbegin(*let reach = IndexSet.inter reachable reduction.pattern in
Printf.eprintf "continuing to step %d on %s because targets %s are reachable\n"
(step : _ index :> int) (Lr1.to_string g lr1)
(string_of_indexset reach)
;*)process_reduction_stepmatchingnext_steps(IndexSet.singletonlr1)reductiontr.stependendtrs;if!has_matchthenmatching':=IndexSet.addlr1!matching';)endmap;matching:=IndexSet.union!matching'!matchinginletrecprocess_klabel=function|Accept->()|Done->continuekslabelAccept|More(re,next)asself->process_relabelselfnextre.desc|Reducing{reduction;steps;next}->letfilter0=label.filterinletmatching=refIndexSet.emptyinletnext_steps=refIndexMap.emptyinletf=process_reduction_stepmatchingnext_stepslabel.filterreductioninIndexSet.iterfsteps;letpush_matching()=ifIndexSet.is_not_empty!matchingthen(letlabel={labelwithfilter=!matching}inprocess_klabelnext)inletpush_steps()=letlabel=Label.capturelabelreduction.captureUsage.emptyinletnext_steps=!next_steps|>IndexMap.bindings|>List.map(fun(a,b)->(b,a))|>IndexRefine.annotated_partitioninList.iter(fun(filter,steps)->assert(IndexSet.subsetfilterfilter0);letsteps=IndexSet.of_liststepsincontinueks{labelwithfilter}(Reducing{reduction;steps;next});)next_steps;inbeginmatchreduction.policywith|Shortest->push_matching();push_steps()|Longest->push_steps();push_matching()endandprocess_relabelselfnext=function|Set(s,var)->beginmatchLabel.filterlabelswith|None->()|Somelabel->continueks(Label.capturelabelvarUsage.empty)nextend|Altes->List.iter(fune->process_klabel(More(e,next)))es|Star(r,Shortest)->process_klabelnext;process_klabel(More(r,self))|Star(r,Longest)->process_klabel(More(r,self));process_klabelnext|Seqes->process_klabel(List.fold_right(funek->More(e,k))esnext)|Filterfilter->beginmatchLabel.filterlabelfilterwith|None->()|Somelabel'->process_klabel'nextend|Reduce(cap,reduction)->letlabel=Label.capturelabel(IndexSet.unioncapreduction.capture)Usage.emptyinletnext_steps=ref[]inIndexSet.iterbeginfunlr1->letsteps=List.fold_rightbeginfun(tr:_Redgraph.transition)steps->ifintersectingtr.reachablereduction.patternthenIndexSet.addtr.stepstepselsestepsend(Redgraph.initialrglr1)IndexSet.emptyinifIndexSet.is_not_emptystepsthenpushnext_steps(steps,lr1);endlabel.filter;letnext_steps=IndexRefine.annotated_partition!next_stepsinList.iter(fun(steps,filter)->letfilter=IndexSet.of_listfilterincontinueks{labelwithfilter}(Reducing{reduction;steps;next}))next_steps;|Usageset->letlabel=Label.capturelabelIndexSet.emptysetinprocess_klabelnextinletlabel={Label.filter;captures=IndexSet.empty;usage=Usage.empty}inprocess_klabelk;List.rev!ksend