123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196(**************************************************************************)(* This file is part of the Codex semantics library. *)(* *)(* Copyright (C) 2013-2025 *)(* CEA (Commissariat à l'énergie atomique et aux énergies *)(* alternatives) *)(* *)(* you can redistribute it and/or modify it under the terms of the GNU *)(* Lesser General Public License as published by the Free Software *)(* Foundation, version 2.1. *)(* *)(* It is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *)(* GNU Lesser General Public License for more details. *)(* *)(* See the GNU Lesser General Public License version 2.1 *)(* for more details (enclosed in the file LICENSE). *)(* *)(**************************************************************************)(** Interface for directed graphs with labeled vertices and edges. The
* reduction algorithm reduces graphs into a regex over the alphabet of edge
* labels. The input graph must be without self-loops, i.e. edges from one node
* to itself.
* NOTE: This algorithm's complexity and correctness has only been established
* if there is at most one edge between two nodes (as should be the case in a
* CFG)! *)moduletypeGRAPHI=sigtypetmoduleV:sigtypet(** Vertex labels *)valequal:t->t->boolvalhash:t->intvalpretty:Format.formatter->t->unitendmoduleE:sigtypetvalsrc:t->V.tvaldst:t->V.tend(** Fold over the incoming edges of a node *)valfold_pred_e:(E.t->'a->'a)->t->V.t->'a->'aendmoduleMake(G:GRAPHI)(R:Regex.Swithtypeletter=G.E.t)=structtypestate=(G.V.t,R.t)Hashtbl.tmoduleWto_utils=Wto_utils.Make(G.V)openWtomoduleNodeMap=Hashtbl.Make(G.V)(* To the w.t.o., we add a map from every node to the set of its heads, to
* look up efficiently whether a node is in a given component. The set of
* heads is represented as a list, because in practice we expect to be very
* small so lists will probably be more efficient. *)typedecorated_wto={partition:G.V.tpartition;heads:(G.V.tlist)NodeMap.t}letdecoratewto=lettable=NodeMap.create17inletrecloopheads=function|[]->()|Noden::rest->NodeMap.addtablenheads;loopheadsrest|Component(h,body)::rest->letheads'=h::headsinNodeMap.addtablehheads';loopheads'body;loopheadsrestinloop[]wto;{partition=wto;heads=table}letin_component{heads;_}headnode=tryList.memhead@@NodeMap.findheadsnodewithNot_found->raise@@Invalid_argument"in_component"(** [udpate state loops is_head add_epsilon graph predicate node] computes
the new path expression leading to [node] in [graph] and stores it in
[state] (replacing whatever value was there before). If [add_epsilon] is
true, then [R.epsilon] is joined to the result. To that end, it needs the
loop map [loops], and [is_head] that tells whether [node] is a head. The
path expression is computed by considering the edges to all predecessors
of [node] matching [predicate]. If [node] has no such predecessors, the
expression will be [R.empty], unless [add_epsilon] is true (useful if
[node] is the entry node).
*)letupdatestateloopsis_headadd_espilongraphpredicatenode=(* Codex_log.feedback "update %a" G.V.pretty node; *)letinitial=ifadd_espilonthenR.epsilonelseR.emptyinletnew_state=G.fold_pred_e(funeacc->(* Note: faults on nodes that are not reachable. *)(* Codex_log.feedback "pred is %a" G.V.pretty (G.E.src e); *)ifpredicate(G.E.srce)thenR.joinacc@@R.append(Hashtbl.findstate(G.E.srce))eelseacc)graphnodeinitialinHashtbl.replacestatenodenew_state;ifis_headthentryletloop=Hashtbl.findloopsnodeinifnot(R.equalloopR.epsilon)thenHashtbl.replacestatenode@@R.append_star(Hashtbl.findstatenode)loopwithNot_found->assertfalse(** [stabilize state loops graph add_epsilon head body] propagates inside the
component ([head] [body]) the paths that enter the component, but not
through its head. Once [stabilize] terminates, the state of every node in
the component is guaranteed to express all possible paths from the entry
node to that node, provided that the arguments verify the following
pre-conditions:
- all predecessors of the component have their states matching the
possible paths to them;
- [loops] contains a correct self-loop for [head].
*)letrecstabilizestateloopsgraphpredicateadd_espilonheadbody=Hashtbl.replacestateheadR.empty;letrecaux=function|[]->()|Noden::rest->updatestateloopsfalsefalsegraphpredicaten;auxrest|Component(h,l)::rest->stabilizestateloopsgraphpredicateadd_espilonhl;auxrestinauxbody;(*Format.printf "value in %a before update: %a\n" G.V.pretty head R.pretty (Hashtbl.find state head);*)updatestateloopstrueadd_espilongraphpredicatehead;(*Format.printf "value in %a after update: %a\n" G.V.pretty head R.pretty (Hashtbl.find state head);*)Wto_utils.iter(funnis_head->updatestateloopsis_headfalsegraphpredicaten)body(** [propagate state loops graph predicate wto] takes the first node in the
w.t.o. as the entry point and propagates and stabilizes path expressions.
[w] should be a w.t.o. of [g]. [state] and [loops] should be empty.
[propagate] guarantees, if [predicate] is [fun _ -> true] that the state
of each node expresses all paths from the first node of [wto] to that
node. [predicate] is passed to [update]. *)letrecpropagateloopsgraphpredicatewto=letstate=Hashtbl.create17in(* Loop function *)letrecaux=function|[]->()|Noden::rest->updatestateloopsfalsefalsegraphpredicaten;auxrest|Component(h,l)::rest->compute_self_looploopsgraphwtohl;stabilizestateloopsgraphpredicatefalsehl;auxrestin(matchwto.partitionwith|[]->raise(Invalid_argument"propagate")|Noden::rest->(* [n] is the entry point, set its state to "empty regex" *)Hashtbl.replacestatenR.epsilon;auxrest|Component(h,l)::rest->compute_self_looploopsgraphwtohl;stabilizestateloopsgraphpredicatetruehl;auxrest);state(** Computes the self-loop of a component and stores it. *)andcompute_self_looploopsgraphwtoheadbody=(* Propagate on the nodes in the component *)letsub_state=propagateloopsgraph(in_componentwtohead){wtowithpartition=Nodehead::body}in(* Updating the head inside the subgraph gives us all paths from [h] to
* [h] in that subgraph. Tell [update] that [h] is not a head so that it
* does not try to access the self-loop (since it is not computed yet). *)updatesub_stateloopsfalsefalsegraph(in_componentwtohead)head;letloop=Hashtbl.findsub_stateheadinifnot(R.equalloopR.empty)thenHashtbl.replaceloopsheadloopletcompute_exprsgraphwto=letloops=Hashtbl.create17inletwto=decoratewtoinpropagateloopsgraph(fun_->true)wtoend