123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398(****************************************************************************)(* *)(* This file is part of MOPSA, a Modular Open Platform for Static Analysis. *)(* *)(* Copyright (C) 2018-2019 The MOPSA Project. *)(* *)(* This program is free software: you can redistribute it and/or modify *)(* it under the terms of the GNU Lesser General Public License as published *)(* by the Free Software Foundation, either version 3 of the License, or *)(* (at your option) any later version. *)(* *)(* This program is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *)(* GNU Lesser General Public License for more details. *)(* *)(* You should have received a copy of the GNU Lesser General Public License *)(* along with this program. If not, see <http://www.gnu.org/licenses/>. *)(* *)(****************************************************************************)(** Converts a Universal program AST into a CFG. *)openOptionopenMopsaopenUniversal.AstopenUniversal.Iterators.Interproc.CommonopenAstopenFlowletname="cfg.frontend"letdebugfmt=Debug.debug~channel:namefmtletdump_dot=false(** dump CFG in dot file (for debugging) *)(*==========================================================================*)(** {2 Graph conversion} *)(*==========================================================================*)(*
AST are transformed into graphs containing only "simple" statements.
Each edge is either:
- a single statement without any sub-statement (no if, while, etc.)
- a block, where each statement has no sub-statement
Expressions in statements are also simplified, either:
- an expression with no function call, or
- e0 = f(e1, ..., en), where e0, ..., en have no calls
NOTE: for e1 && e2 and e1 || e2, all calls in e1 and e2 are extracted
and evaluated before the simplified e1 and e2 are evaluated.
This makes it impossible to implement shortcut boolean operations
(where e2 is not evaluated for e1 && e2 if e1 is false).
*)(* Extract calls from expression.
Returns:
- a list of temporary variables
- a sequence of assignments tmp = call(e1, ..., en)
- the expression with no longer any call
*)letextract_calls_expr(e:expr):(var*range)list*stmtlist*expr=(* we need to match on AST root after recursively handling its sub-trees,
hence we cannot use Visitor.fold_map_expr directly
*)letrecdoit(tmps,assigns)e=(* recursive call on argument expressions *)letp,c=Visitor.structure_of_expreinlet(tmps,assigns),el=doit_list(tmps,assigns)[]p.exprsin(* recombine sub-expressions into expression *)lete=c{pwithexprs=el}in(* extract top-level call *)matchekindewith|E_call(f,args)->lettmp=mktmp~typ:(etype)()inletv=mk_vartmp(erangee)inletassign=mk_assignve(erangee)in((tmp,erangee)::tmps,assign::assigns),v|_->(tmps,assigns),eanddoit_listxacc=function|[]->x,List.revacc|a::b->letx,a=doitxaindoit_listx(a::acc)binlet(tmps,assigns),e'=doit([],[])einmatchekinde'with|E_var_whenList.lengthassigns=1->(* special case: the original expression was call(e1, ..., en) with no
calls within e1, ..., en
*)[],[],e|_->List.revtmps,List.revassigns,e'(* Extract calls from all the expressions used in a statement.
Retuns the temporary variables, their assignments, and the argument
statement without calls in its expressions.
Does not change the sub-statements of the statement.
*)letextract_calls_stmt(s:stmt):(var*range)list*stmtlist*stmt=letp,c=Visitor.structure_of_stmtsinletrecextracttmpsassignsexprs=function|[]->tmps,assigns,List.revexprs|e::ee->lettmp,assign,e'=extract_calls_expreinextract(tmp@tmps)(assign@assigns)(e'::exprs)eeinlettmps,assigns,exprs=extract[][][]p.exprsinletp={pwithexprs=exprs;}intmps,assigns,cp(* Generate add_var from temporary list. *)letmk_add_tmps(l:(var*range)list):stmtlist=List.map(fun(v,r)->mk_add_varvr)l(* Generate remove_var from temporary list. *)letmk_remove_tmps(l:(var*range)list):stmtlist=List.map(fun(v,r)->mk_remove_varvr)l(* Context used during conversion of a function. *)typectx={ctx_cfg:graph;ctx_return:node;ctx_break:nodelist;ctx_continue:nodelist;ctx_return_var:varoption;}(* Add a node, with given id. *)letadd_node(c:ctx)(id:node_id):node=CFG.add_nodec.ctx_cfgid()(* Add an edge between src and dst nodes, with blk statements. *)letadd_edge(c:ctx)range(src:(port*node)list)(dst:(port*node)list)(blk:stmtlist):unit=lets=matchblkwith|[]->mk_skiprange|[a]->a|_->mk_blockblkrangeinlet_=CFG.add_edgec.ctx_cfg(mk_fresh_edge_idrange)~src~dstsin()(* Insert a block after the src node.
If the block is not empty, add a fresh node to represent the end of
the block; the end node is returned.
If the block is empty, no fresh node is created and src is returned.
*)letadd_block_after(c:ctx)range(src:node)(blk:stmtlist):node=ifblk=[]thensrcelseletsrc'=add_nodec(copy_node_id(CFG.node_idsrc))inadd_edgecrange[T_cur,src][T_cur,src']blk;src'(* Insert a block before the dst node. *)letadd_block_before(c:ctx)range(dst:node)(blk:stmtlist):node=ifblk=[]thendstelseletdst'=add_nodec(copy_node_id(CFG.node_iddst))inadd_edgecrange[T_cur,dst'][T_cur,dst]blk;dst'(* Add a statement to the graph in the context.
Handle compound statements by induction.
Takes care of extracting calls from expressions.
*)letrecadd_stmt(c:ctx)(pre:node)(post:node)(s:stmt):unit=matchskindswith(* compound statements *)|S_if(e,s1,s2)->lettmps,assigns,e=extract_calls_expreinletadds,rems=mk_add_tmpstmps,mk_remove_tmpstmpsin(* add nodes begining the true and false branches *)lettloc=mk_fresh_node_id(get_range_start(sranges1))andfloc=mk_fresh_node_id(get_range_end(sranges2))inlettnode=add_nodectlocandfnode=add_nodecflocin(* add test edge *)letblk=adds@assigns@[mk_teste(erangee)]inadd_edgec(erangee)[T_cur,pre][T_true,tnode;T_false,fnode]blk;lettnode=add_block_afterc(erangee)tnoderemsandfnode=add_block_afterc(erangee)fnoderemsin(* add branches *)add_stmtctnodeposts1;add_stmtcfnodeposts2|S_block(l,locals)->letrecaddprepost=function|[]->(* empty block: connect pre and post nodes with skip *)add_edgec(sranges)[T_cur,pre][T_cur,post][]|[s]->(* connect pre to post with statement *)add_stmtcpreposts|a::b->(* add a new node after the first statement *)letloc=mk_fresh_node_id(get_range_start(srangea))inletnode=add_nodeclocin(* add the first statement between pre and node *)add_stmtcprenodea;(* add the rest between node and post *)addnodepostbinletrems=List.map(funv->mk_remove_varv(sranges))localsinaddprepost(l@rems)|S_while(e,s)->lettmps,assigns,e=extract_calls_expreinletadds,rems=mk_add_tmpstmps,mk_remove_tmpstmpsin(* add node at the begining of the loop body *)letloc=mk_fresh_node_id(get_range_start(sranges))inletentry=add_nodeclocin(* add test edge *)letblk=adds@assigns@[mk_teste(erangee)]inletpost'=add_block_beforec(erangee)postremsinadd_edgec(erangee)[T_cur,pre][T_true,entry;T_false,post']blk;(* add body *)letc={cwithctx_break=post::c.ctx_break;ctx_continue=pre::c.ctx_continue;}inletentry=add_block_afterc(erangee)entryremsinadd_stmtcentrypres(* jump statements *)|S_returnNone->(* translate into a simple jump to the exit node *)ifc.ctx_return_var<>NonethenExceptions.panic"return without an expression for non-void function at %a"pp_range(sranges);(* jump to return node *)add_edgec(sranges)[T_cur,pre][T_cur,c.ctx_return][]|S_return(Somee)->(* translate to a simple "return tmp_var" and jump to the exit node *)lettmps,assigns,e=extract_calls_expreinletadds,rems=mk_add_tmpstmps,mk_remove_tmpstmpsin(* assigns expression to return variable *)letret,retvar=matchc.ctx_return_varwith|Somevar->var,mk_varvar(erangee)|None->Exceptions.panic"return with an expression for void function at %a"pp_range(sranges);inletassign_stmt=mk_assignretvare(erangee)inletreturn_stmt=mk_stmt(S_return(Someretvar))(erangee)inletaddret=mk_add_varret(erangee)inletremret=mk_remove_varret(erangee)in(* intermediate node *)letloc=mk_fresh_node_id(get_range_start(sranges))inletmed=add_nodeclocin(* from cur to return flow *)add_edgec(sranges)[T_cur,pre][T_return(sranges),med](adds@assigns@[addret;assign_stmt;return_stmt]);(* from intermediate to exit node *)add_edgec(sranges)[T_cur,med][T_cur,c.ctx_return](remret::rems)|S_break->ifc.ctx_break=[]thenExceptions.panic"break without a loop at %a"pp_range(sranges);(* goto edge (skip statement) *)add_edgec(sranges)[T_cur,pre][T_cur,List.hdc.ctx_break][]|S_continue->ifc.ctx_continue=[]thenExceptions.panic"continue without a loop at %a"pp_range(sranges);(* goto edge (skip statement) *)add_edgec(sranges)[T_cur,pre][T_cur,List.hdc.ctx_continue][](* atomic statements: add an edge with the statement *)(* framework *)|S_assign_|S_assume_|S_add_|S_remove_|S_invalidate_|S_rename_|S_forget_|S_expand_|S_fold_(* universal *)|S_expression_|S_assert_|S_satisfy_|S_print_state|S_free_->lettmps,assigns,s=extract_calls_stmtsinletadds,rems=mk_add_tmpstmps,mk_remove_tmpstmpsinletblk=adds@assigns@[s]@remsinadd_edgec(sranges)[T_cur,pre][T_cur,post]blk(* unknown *)|_->Exceptions.panic"cannot convert statement %a to CFG"pp_stmts(** Creates a new graph and fill-in with the given statement. *)letconvert_stmt?(name="cfg")?(ret:varoption)(s:stmt):stmt=(* create empty graph *)letcfg=CFG.create()in(* entry and exit nodes *)letentry=CFG.add_nodecfg(mk_fresh_node_id(get_range_start(sranges)))()andexit=CFG.add_nodecfg(mk_fresh_node_id(get_range_end(sranges)))()inCFG.node_set_entrycfgentry(SomeT_cur);CFG.node_set_exitcfgexit(SomeT_cur);(* fill-in graph *)letc={ctx_cfg=cfg;ctx_return=exit;ctx_break=[];ctx_continue=[];ctx_return_var=ret;}inadd_stmtcentryexits;letc={cfg_graph=cfg;cfg_order=CFG.weak_topological_ordercfg;}inifdump_dotthenPp.output_dotname("tmp/"^name^".dot")c;mk_cfgc(sranges)(** Converts a function AST to a CFG. *)letconvert_fundec(f:fundec)=(* get the variable to denote the return value *)letret=matchf.fun_return_typewith|None->None|Somet->iff.fun_return_var=NonethenSome(mktmp~typ:t())elsef.fun_return_varin(* convert the body in-place *)f.fun_body<-convert_stmt~name:f.fun_orig_name?retf.fun_body;debug"CFG for function %s:@.%a@."f.fun_orig_namepp_stmtf.fun_body(** Converts a full universal program. *)letconvert_program(p:program)=matchp.prog_kindwith|P_universalu->List.iterconvert_fundecu.universal_fundecs;{pwithprog_kind=P_universal{uwithuniversal_main=convert_stmt~name:"__main__"u.universal_main;}}|_->Exceptions.panic"cannot convert program to CFG"(** From source to CFG. *)letparse_program(files:stringlist):program=convert_program(Universal.Frontend.parse_programfiles)(** Front-end registration *)let()=register_frontend{lang="cfg";parse=parse_program;on_panic=fun___->();}