123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243(**************************************************************************)(* This file is part of the Codex semantics library. *)(* *)(* Copyright (C) 2013-2025 *)(* CEA (Commissariat à l'énergie atomique et aux énergies *)(* alternatives) *)(* *)(* you can redistribute it and/or modify it under the terms of the GNU *)(* Lesser General Public License as published by the Free Software *)(* Foundation, version 2.1. *)(* *)(* It is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *)(* GNU Lesser General Public License for more details. *)(* *)(* See the GNU Lesser General Public License version 2.1 *)(* for more details (enclosed in the file LICENSE). *)(* *)(**************************************************************************)(* This algorithm is similar to the region-based analysis of the
dragon book ("Compilers: Principles, Techniques, and Tools (2nd
Edition)", by Aho, Lam, Sethi and Ullman). But there are some
important differences:
- We never build regions; the nesting of natural loops suffice.
- We do not compose transfer functions. Instead, we rely on the
fact that Ocaml has first-class functions, and associate to "loop
edges" functions describing the behaviour of loops.
The composition of region of the Dragon Book does not fit well the
translation to terms, for which composition of transfer function
would mean creation of closures or a costly substitution; this
algorithm avoids function composition.
The algorithm could be extended to handle non-natural loops. Intead
of using the notion of back edges, we could use that of retreating
edge, starting from a spanning tree of the graph. It should be
possible to compile strongly connected components with multiple
entry point several times, introducing the variable at different
entry points, or inlining part of the loop to "reuse" another entry
point when feasible. Computation of nested scc can be obtained
using "Earnest, Balke and Anderson: Analysis of Graphs by Ordering
of Nodes". It is maybe also possible to use the wto ordering of
bourdoncle for this purpose. *)includeRegion_analysis_sig;;moduleMake(N:NODE):sig(* Function computing from an entry abstracat value the "after"
state, which is a map from each outgoing edge to its respective
value. *)valafter:N.abstract_value->N.abstract_valuelistN.Edge_Dict.tend=struct(****************************************************************)(* Back edges. *)(* Return a dict from head nodes to the set of origins of back edges. *)letback_edges:N.Set.tN.Dict.t=letback_edges=N.Dict.createN.Graph.sizeN.Set.emptyinN.Graph.iter_nodes(funn->N.Graph.iter_succsn(funhead->ifN.DomTree.dominatesheadnthenN.Dict.setback_edgeshead(N.Set.addn(N.Dict.getback_edgeshead))));back_edges;;letis_back_edgefromto_=N.Set.memfrom(N.Dict.getback_edgesto_);;(****************************************************************)(* Natural loops. *)typenatural_loop=N.Set.t;;(* For each header node, the list of the nested natural_loops with
that header, from outermost to innermost. *)letnatural_loops:(natural_looplist)N.Dict.t=(* Perform a DFS using the "preds" relation, and returns the
set of visited nodes (we use the set to "mark" visits). We
first mark the head so as not to go beyond it.
Origin is the origin node of the back edge, corresponding to
the end of the loop body. We special case single-node loops. *)letnatural_loop_forheadorigin=(* Kernel.feedback "natural loop for %a %a %b" N.pretty head N.pretty origin (head == origin); *)letvisited=refN.Set.emptyinvisited:=N.Set.addhead!visited;ifN.equalheadoriginthen!visited(* Useful for while(1); loops *)elseletrecloopn=visited:=N.Set.addn!visited;(* Kernel.feedback "add %a" N.pretty n; *)N.Graph.iter_predsn(funk->ifN.Set.memk!visitedthen()elseloopk);inlooporigin;!visitedinletnatural_loops=N.Dict.createN.Graph.size[]in(* Attach natural loops to their headers, they can be nested; if
they are not, merge them. *)letadd_natural_loopheadernl=letnls=N.Dict.getnatural_loopsheaderinletrecloop=function|[]->[nl]|(a::b)asnls->ifN.Set.subsetanlthennl::nlselseifN.Set.subsetnlathena::(loopb)(* Neither is a subset of the other: we merge the loops. *)else(N.Set.unionanl)::binN.Dict.setnatural_loopsheader(loopnls)inN.Dict.iterback_edges(funheaderorigins->N.Set.iter(funorigin->letnatural_loop=natural_loop_forheaderorigininadd_natural_loopheadernatural_loop)origins);natural_loops;;(* Let us consider the tree of inclusion of natural loops (i.e. each
node in the tree is a natural loop, and children of that tree are
the natural loops that it contains). This function performs a
postfix iteration on that tree (i.e. with outermost natural loops
lasts).
Note that the children of the tree are disjoint (i.e. have an
empty intersection). *)letnatural_loops_postfix_iterf=N.DomTree.domtree_postfix_iter(funheader->letnls=N.Dict.getnatural_loopsheaderinList.iter(funnl->fheadernl)(List.revnls));;(****************************************************************)(* Transfer functions of regions.
A region is a set of nodes with a _header_ that dominates the
other nodes. *)(* For each header node, the transfer function summarizing the
natural loop starting at this header node (if any). Note that
there may be several loops starting at a header node; if so
selects the outermost that has been analyzed so far.
Also note that loop_transfer_functions contain closures that use
what is contained in [loop_transfer_functions], which causes a
recursion avoiding by giving to the closures _copies_ of
[loop_transfer_functions]. *)letloop_transfer_functions:(N.abstract_value->N.abstract_value)optionN.Dict.t=N.Dict.createN.Graph.sizeNone;;(* Given an entire region, returns a function computing the "after"
state given an abstract value input of the header node. *)letcompile_region_afterloop_transfer_functionsheadertset=funinput_term->letedge_term:N.abstract_valuelistN.Edge_Dict.t=N.Edge_Dict.create()in(* The idea is to iterate on each node in topological order, so
that we always find the data on input edges. To avoid a
topological sort, we allow do_node to call itself on a previous
node when the result is not ready, and rely on the fact that
edge_term memoizes the results to ensure that do_node is
eventually called only once per node. *)letrecdo_noden=letinput=ifn==headertheninput_termelseletinputs=ref[]inN.Graph.iter_predsn(funpred->if(N.Set.memntset)&&(not(is_back_edgepredn))thenletinput=get_edgesprednininputs:=input@!inputs);N.join!inputsinletinput=matchN.Dict.getloop_transfer_functionsnwith|None->input|Somef->N.mufinputinletoutputs=N.compile_nodeninputinList.iter(fun(edge,output)->letoutputs=tryN.Edge_Dict.getedge_termedgewithNot_found->[]inN.Edge_Dict.setedge_termedge(output::outputs))outputs(* Compute for previous node if result not yet available. *)andget_edgespredn=letedge=Edge(pred,n)intryN.Edge_Dict.getedge_termedgewithNot_found->do_nodepred;N.Edge_Dict.getedge_termedgein(* We can now iterate on any order. *)N.Set.iterdo_nodetset;edge_term;;(* Given a region that is a natural loop, compute the transfer
function for the body of the loop. *)letcompile_loop_transfer_functionloop_transfer_functionsheadertset=funinput_term->letedge_term=compile_region_afterloop_transfer_functionsheadertsetinput_termin(* Collect the abstract values on the back edges. *)letbody_exit_term=letinputs=ref[]inN.Graph.iter_predsheader(funpred->if(N.Set.mempredtset)&&(is_back_edgepredheader)thenletedge=Edge(pred,header)inletinput=N.Edge_Dict.getedge_termedgeininputs:=input@!inputs);N.join!inputsinbody_exit_term;;(* Compute the final [loop_transfer_functions]. *)natural_loops_postfix_iter(funheadertset->(* Copy [loop_transfer_functions] for the closure, so that it is
not affected by further modifications. *)letcopy_ltf=N.Dict.copyloop_transfer_functionsinletf=compile_loop_transfer_functioncopy_ltfheadertsetinN.Dict.setloop_transfer_functionsheader(Somef));;letafterinput=compile_region_afterloop_transfer_functionsN.Graph.entry_nodeN.Graph.all_nodesinput;;end