123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083(* Yoann Padioleau
*
* Copyright (C) 2012, 2014 Facebook
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation, with the
* special exception on linking described in file license.txt.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
* license.txt for more details.
*)openCommonmoduleFlag=Flag_parsingmoduleE=Entity_codemoduleG=Graph_codemoduleP=Graph_code_prologopenAst_cmoduleAst=Ast_c(*****************************************************************************)(* Prelude *)(*****************************************************************************)(*
* Graph of dependencies for C (and partially cpp). See graph_code.ml and
* main_codegraph.ml for more information.
*
* See also lang_clang/analyze/graph_code_clang.ml if you can afford yourself
* to use clang. Lots of code of graph_code_clang.ml have been ported
* to this file now. Being cpp-aware has many advantages:
* - we can track dependencies of cpp constants which is useful in codemap!
* With bddbddb we can also track the flow of specific constants to
* fields! (but people could use enum in clang to solve this problem)
* - we can find dead macros, duplicated macros
* - we can find wrong code in ifdef not compiled
* - we can detect ugly macros that use locals insteaf of globals or
* parameters; again graphcode is a perfect clowncode detector!
* - ...
*
* schema:
* Root -> Dir -> File (.c|.h) -> Function | Prototype
* -> Global | GlobalExtern
* -> Type (for Typedef)
* -> Type (struct|union|enum)
* -> Field TODO track use! but need type
* -> Constructor (enum)
* -> Constant | Macro
* -> Dir -> SubDir -> ...
*
* Note that here as opposed to graph_code_clang.ml constants and macros
* are present.
* What about nested structures? they are lifted up in ast_c_build!
*
* todo:
* - fields!!!!!!! but need type information for expressions
* - the management of dupes has become a bit complicated, easy to miss cases,
* maybe should actually work at namespace level and consider dupes
* constant and functions as they are kind of in the same namespace
* - there is different "namespaces" in C?
* - functions/locals
* - tags (struct name, enum name)
* - cpp?
* - ???
* => maybe we don't need those add_prefix, S__, E__ hacks.
* - Type is a bit overloaded maybe (used for struct/union/enum/typedefs)
* - and of course improve lang_cpp/ parser, lang_c/ ast builder to cover
* more cases, better infer typedef, better handle cpp, etc.
*)(*****************************************************************************)(* Types *)(*****************************************************************************)(* for the extract_uses visitor *)typeenv={g:Graph_code.graph;(* now in Graph_code.gensym: cnt: int ref; *)phase:phase;current:Graph_code.node;c_file_readable:Common.filename;ctx:Graph_code_prolog.context;(* for prolog use/4, todo: merge in_assign with context? *)in_assign:bool;(* mostly to remove some warnings on lookup failures *)in_define:bool;(* for datalog *)in_return:bool;(* covers also the parameters; the type_ is really only for datalog_c *)locals:(string*type_option)listref;(* for static functions, globals, 'main', and local enums/constants/macros *)local_rename:(string,string)Hashtbl.t;conf:config;(* to accept duplicated typedefs if they are the same, and of course to
* expand typedefs for better dependencies
* less: we could also have a local_typedefs field
*)typedefs:(string,Ast.type_)Hashtbl.t;(* to accept duplicated structs if they are the same, and at some point
* maybe also for ArrayInit which should be transformed in a RecordInit.
*)structs:(string,Ast.struct_def)Hashtbl.t;(* error reporting *)dupes:(Graph_code.node,bool)Hashtbl.t;(* for ArrayInit when actually used for structs *)fields:(string,stringlist)Hashtbl.t;log:string->unit;pr2_and_log:string->unit;}andphase=Defs|Usesandconfig={types_dependencies:bool;fields_dependencies:bool;macro_dependencies:bool;(* We normally expand references to typedefs, to normalize and simplify
* things. Set this variable to true if instead you want to know who is
* using a typedef.
*)typedefs_dependencies:bool;propagate_deps_def_to_decl:bool;(* todo: readable_filenames_in_graph_nodeinfo *)}typekind_file=Source|Header(* for prolog and other code indexer (e.g. syncweb's indexer) *)lethook_use_edge=ref(fun_ctx_in_assign(_src,_dst)_g_loc->())lethook_def_node=ref(fun_node_g->())(* for datalog *)letfacts=refNone(*****************************************************************************)(* Parsing *)(*****************************************************************************)(* less: could maybe call Parse_c.parse to get the parsing statistics *)letparse~show_parse_errorfile=try(* less: make this parameters of parse_program? *)Common.save_excursionFlag.error_recoverytrue(fun()->Common.save_excursionFlag.show_parsing_errorshow_parse_error(fun()->Common.save_excursionFlag.verbose_parsingshow_parse_error(fun()->Parse_c.parse_programfile)))with|Timeout->raiseTimeout|exn->pr2_once(spf"PARSE ERROR with %s, exn = %s"file(Common.exn_to_sexn));raiseexn(*****************************************************************************)(* Helpers *)(*****************************************************************************)leterrorstok=failwith(spf"%s: %s"(Parse_info.string_of_infotok)s)(* we can have different .c files using the same function name, so to avoid
* dupes we locally rename those entities, e.g. main -> main__234.
*)letnew_name_if_defsenv(s,tok)=ifenv.phase=DefsthenbeginifHashtbl.memenv.local_renamesthenerror(spf"Duped new name: %s"s)tok;lets2=Graph_code.gensymsinHashtbl.addenv.local_renamess2;s2,tokendelseHashtbl.findenv.local_renames,tok(* anywhere you get a string from the AST you must use this function to
* get the final "value" *)letstrenv(s,tok)=ifHashtbl.memenv.local_renamesthenHashtbl.findenv.local_renames,tokelses,tokletadd_prefixprefix(s,tok)=prefix^s,tokletkind_fileenv=matchenv.c_file_readablewith|swhens=~".*\\.[h]"->Header|swhens=~".*\\.[c]"->Source|_s->(* failwith ("unknown kind of file: " ^ s) *)Sourceletrecexpand_typedefsenvt=matchtwith|TBase_|TStructName_|TEnumName_->t|TTypeNamename->lets=Ast.str_of_namenameinifHashtbl.memenv.typedefss&¬(Hashtbl.memenv.dupes("T__"^s,E.Type))thenlett'=(Hashtbl.findenv.typedefss)in(* right now 'typedef enum { ... } X' results in X being
* typedefed to ... itself
*)ift'=*=tthentelseexpand_typedefsenvt'elset|TPointerx->TPointer(expand_typedefsenvx)(* less: eopt could contain some sizeof(typedefs) that we should expand
* but does not matter probably
*)|TArray(eopt,x)->TArray(eopt,expand_typedefsenvx)|TFunction(ret,params)->TFunction(expand_typedefsenvret,params|>List.map(funp->{pwithp_type=expand_typedefsenvp.p_type}))letfinal_typeenvt=ifenv.conf.typedefs_dependenciesthentelse(* Can we do that anytime? like in Defs phase?
* No we need to wait for the first pass to have all the typedefs
* before we can expand them!
*)expand_typedefsenvtletfind_existing_node_optenvnamecandidateslast_resort=lets=Ast.str_of_namenameinletkind_with_a_dupe=candidates|>Common.find_opt(funkind->Hashtbl.memenv.dupes(s,kind)&&kind<>E.Prototype&&kind<>E.GlobalExtern)inletexisting_nodes=candidates|>List.filter(funkind->G.has_node(s,kind)env.g)inletnon_proto_existing_nodes=existing_nodes|>Common.exclude(funkind->kind=*=E.Prototype||kind=*=E.GlobalExtern)in(matchkind_with_a_dupewith(* If there is a dupe, then we don't want to create an edge to an
* unrelated entity that happens to be unique and listed first
* in the list of candidates. This is especially useful
* when processing large codebase globally such as plan9.
* So in that case we return the kind of the dupe; add_edge() will
* then do its job and skip the edge.
*)|Somekind_dupe->Somekind_dupe|None->(matchexisting_nodeswith(* could not find anything, use last_resort to get a "lookup failure" *)|[]->Somelast_resort|[x]->Somex(* ambiguity, maybe be proto vs def, give priority to kind listed first *)|x::_::_->(matchnon_proto_existing_nodeswith|[]->Somex|[y]->Somey(* real ambiguity, real dupe across different kind of entity *)|x::y::_->env.pr2_and_log(spf"skipping edge, multiple def kinds for entity %s (%s<>%s)"s(E.string_of_entity_kindx)(E.string_of_entity_kindy));letxfile=G.file_of_node(s,x)env.ginletyfile=G.file_of_node(s,y)env.ginenv.log(spf" orig = %s"xfile);env.log(spf" dupe = %s"yfile);None)))letis_localenvs=(Common.find_opt(fun(x,_)->x=$=s)!(env.locals))<>None(*****************************************************************************)(* For datalog *)(*****************************************************************************)(* less: could mv this conrete hooks in datalog_c at some point *)letwith_datalog_envenvf=!facts|>Common.do_option(funaref->letenv2={Datalog_c.scope=fstenv.current;c_file_readable=env.c_file_readable;long_format=!Datalog_c.long_format;globals=env.g;(* need to pass the ref because instrs_of_expr will add new
* local variables
*)locals=env.locals;facts=aref;globals_renames=(funn->strenvn)}infenv2)lethook_expr_toplevelenv_origx=(* actually always called from a Uses phase, but does not hurt to x2 check*)ifenv_orig.phase=Usesthenwith_datalog_envenv_orig(funenv->letinstrs=Datalog_c.instrs_of_exprenvxininstrs|>List.iter(funinstr->letfacts=Datalog_c.facts_of_instrenvinstrinfacts|>List.iter(funfact->Common.pushfactenv.Datalog_c.facts););ifenv_orig.in_returnthenletfact=Datalog_c.return_factenv(Common2.list_lastinstrs)inCommon.pushfactenv.Datalog_c.facts)(* To be called normally after each add_node_and_edge_if_defs_mode.
* subtle: but take care of code that use new_name_if_defs, you must
* call hook_def after those local renames have been added!
*)lethook_defenvdef=ifenv.phase=Defsthenwith_datalog_envenv(funenv->letfacts=Datalog_c.facts_of_defenvdefinfacts|>List.iter(funfact->Common.pushfactenv.Datalog_c.facts);)(*****************************************************************************)(* Add Node *)(*****************************************************************************)letadd_node_and_edge_if_defs_modeenv(name,kind)typopt=letstr=Ast.str_of_namenameinletstr'=matchkind,env.currentwith|E.Field,(s,E.Type)->s^"."^str|_->strinletnode=(str',kind)inifenv.phase=Defsthenbeginmatch()with(* if parent is a dupe, then don't want to attach yourself to the
* original parent, mark this child as a dupe too.
*)|_whenHashtbl.memenv.dupesenv.current->(* todo: hmm but for a struct, might not detect at first that this
* will become a dupe later because of another struct with the same
* name, which means the first set of fields will be added to this
* soon-to-be dupe struct. Is this why I get some SRC fail later?
*)Hashtbl.replaceenv.dupesnodetrue(* already there? a dupe? *)|_whenG.has_nodenodeenv.g->(matchkindwith|E.Function|E.Global|E.Constructor|E.Type|E.Field|E.Constant|E.Macro->(matchkind,strwith(* dupe typedefs/structs are ok as long as they are equivalent,
* and this check is done below in toplevel().
*)|E.Type,swhens=~"[ST]__"->()|_whenenv.c_file_readable=~".*EXTERNAL"->()|_->env.pr2_and_log(spf"DUPE entity: %s"(G.string_of_nodenode));letorig_file=G.file_of_nodenodeenv.ginenv.log(spf" orig = %s"orig_file);env.log(spf" dupe = %s"env.c_file_readable);Hashtbl.replaceenv.dupesnodetrue;)(* todo: have no Use for now for those so skip errors *)|E.Prototype|E.GlobalExtern->(* It's common to have multiple times the same prototype declared.
* It can also happen that the same prototype have
* different types (e.g. in plan9 newword() had an argument with type
* 'Word' and another 'word'). We don't want to add to the same
* entity dependencies to this different types so we need to mark
* the prototype as a dupe too!
* Anyway normally we should add the dependency to the Function or
* Global first so we should hit this code only for really external
* entities (and when we don't find the Function or Global we will
* get some "skipping edge because of dupe" errors).
*)Hashtbl.replaceenv.dupesnodetrue;|_->failwith(spf"Unhandled category: %s"(G.string_of_nodenode)))(* ok not a dupe, let's add it then *)|_->lettyp=matchtypoptwith|None->None|Somet->(* hmmm can't call final_type here, no typedef PASS yet
let t = final_type env t in
*)letv=Meta_ast_c.vof_any(Typet)inlet_s=Ocaml.string_of_vvin(* hmmm this is fed to prolog so need to be a simple string
* without special quote in it, so for now let's skip
*)Some"_TODO_type"in(* try but should never happen, see comment below *)tryletpos=Parse_info.token_location_of_info(sndname)in(* let pos = { pos with Parse_info.file = env.c_file_readable } in *)letnodeinfo={Graph_code.pos;typ;props=[];}in(* less: hmmm actually it could be a dupe it there is another node
* with a different def kind and same name. But we should at
* least warn about in find_existing_node_opt().
*)env.g|>G.add_nodenode;env.g|>G.add_edge(env.current,node)G.Has;env.g|>G.add_nodeinfonodenodeinfo;!hook_def_nodenodeenv.g;(* this should never happen, but it's better to give a good err msg *)withNot_found->error("Not_found:"^str)(sndname)end;ifHashtbl.memenv.dupesnodethenenvelse{envwithcurrent=node}(*****************************************************************************)(* Add edge *)(*****************************************************************************)letadd_use_edgeenv(name,kind)=lets=Ast.str_of_namenameinletsrc=env.currentinletdst=(s,kind)inmatch()with|_whenHashtbl.memenv.dupessrc||Hashtbl.memenv.dupesdst->(* todo: stats *)env.pr2_and_log(spf"skipping edge (%s -> %s), one of it is a dupe"(G.string_of_nodesrc)(G.string_of_nodedst));(* plan9, those are special functions in kencc? *)|_whens=$="USED"||s=$="SET"->()|_whennot(G.has_nodesrcenv.g)->error(spf"SRC FAIL: %s (-> %s)"(G.string_of_nodesrc)(G.string_of_nodedst))(sndname)(* the normal case *)|_whenG.has_nodedstenv.g->G.add_edge(src,dst)G.Useenv.g;letpos=Parse_info.token_location_of_info(sndname)in(* let pos = { pos with Parse_info.file = env.c_file_readable } in *)!hook_use_edgeenv.ctxenv.in_assign(src,dst)env.gpos(* try to 'rekind'? we use find_existing_node now so no need to rekind *)|_->letprfn=ifenv.in_definethenenv.logelseenv.pr2_and_loginprfn(spf"Lookup failure on %s (%s)"(G.string_of_nodedst)(Parse_info.string_of_info(sndname)))(* todo? still need code below?*)(*
| E.Type when s =~ "S__\\(.*\\)" ->
add_use_edge env ("T__" ^ Common.matched1 s, E.Type)
| E.Type when s =~ "U__\\(.*\\)" ->
add_use_edge env ("T__" ^ Common.matched1 s, E.Type)
| E.Type when s =~ "E__\\(.*\\)" ->
add_use_edge env ("T__" ^ Common.matched1 s, E.Type)
*)(*****************************************************************************)(* Defs/Uses *)(*****************************************************************************)letrecextract_defs_usesenvast=ifenv.phase=Defsthenbeginletdir=Common2.dirnameenv.c_file_readableinG.create_intermediate_directories_if_not_presentenv.gdir;letnode=(env.c_file_readable,E.File)inenv.g|>G.add_nodenode;env.g|>G.add_edge((dir,E.Dir),node)G.Has;end;letenv={envwithcurrent=(env.c_file_readable,E.File);}intoplevelsenvast(* ---------------------------------------------------------------------- *)(* Toplevels *)(* ---------------------------------------------------------------------- *)andtoplevelenvx=matchxwith|Define(name,body)->letname=ifkind_fileenv=*=Sourcethennew_name_if_defsenvnameelsenameinletenv=add_node_and_edge_if_defs_modeenv(name,E.Constant)Noneinhook_defenvx;ifenv.phase=Uses&&env.conf.macro_dependenciesthendefine_bodyenvbody|Macro(name,params,body)->letname=ifkind_fileenv=*=Sourcethennew_name_if_defsenvnameelsenameinletenv=add_node_and_edge_if_defs_modeenv(name,E.Macro)Noneinhook_defenvx;letenv={envwithlocals=ref(params|>List.map(funp->Ast.str_of_namep,None(*TAny*)))}inifenv.phase=Uses&&env.conf.macro_dependenciesthendefine_bodyenvbody|FuncDefdef|Prototypedef->letname=def.f_nameinlettyp=Some(TFunctiondef.f_type)inletkind=matchxwith|Prototype_->E.Prototype|FuncDef_->E.Function|_->raiseImpossibleinletstatic=(* if we are in an header file, then we don't want to rename
* the inline static function because would have a different
* local_rename hash. Renaming in the header file would lead to
* some unresolved lookup in the c files.
*)(def.f_static&&kind_fileenv=*=Source)||Ast.str_of_namename="main"in(matchkindwith(* todo: when static and prototype, we should create a new_str_if_defs
* that will match the one created later for the Function, but
* right now we just don't create the node, it's simpler.
*)|E.Prototypewhenstatic->()|E.Prototype->(* todo: when prototype and in .c, then it's probably a forward
* decl that we could just skip?
*)add_node_and_edge_if_defs_modeenv(name,kind)typ|>ignore|E.Function->letname=ifstaticthennew_name_if_defsenvnameelsenameinletenv=add_node_and_edge_if_defs_modeenv(name,kind)typintype_env(TFunctiondef.f_type);hook_defenvx;letxs=snddef.f_type|>Common.map_filter(funx->matchx.p_namewith|None->None|Somen->Some(Ast.str_of_namen,Somex.p_type))inletenv={envwithlocals=refxs}inifenv.phase=Usesthenstmtsenvdef.f_body|_->raiseImpossible)|Globalv->let{v_name=name;v_type=t;v_storage=sto;v_init=eopt}=vin(* can have code like 'Readfn chardraw;' that looks like a global but
* is actually a Prototype.
* todo: we rely on the typedef decl being in the same file and before
* because normally typedefs are computed in Defs phase but we
* need also in this phase to know if this global is actually a proto
*)letfinalt=expand_typedefsenvtinlettyp=Somet(* or finalt? *)inletkind=matchsto,finalt,eoptwith|_,TFunction_,_->E.Prototype|Extern,_,_->E.GlobalExtern(* when have 'int x = 1;' in a header, it's actually the def.
* less: print a warning asking to mv in a .c
*)|_,_,Some_whenkind_fileenv=Header->E.Global(* less: print a warning; they should put extern decl *)|_,_,_whenkind_fileenv=Header->E.GlobalExtern|DefaultStorage,_,_|Static,_,_->E.Globalinletstatic=sto=*=Static(* && kind_file env =*= Source? need that? *)in(matchkindwith(* see comment above in the FuncDef case *)|E.Prototypewhenstatic->()(* note that no need | E.GlobalExtern when static, it can't happen *)|E.Prototype|E.GlobalExtern->add_node_and_edge_if_defs_modeenv(name,kind)typ|>ignore|E.Global->letname=ifstaticthennew_name_if_defsenvnameelsenameinletenv=add_node_and_edge_if_defs_modeenv(name,kind)typinhook_defenvx;type_envt;ifenv.phase=Usestheneopt|>Common.do_option(fune->letn=nameinexpr_toplevelenv(Assign((Cst_cpp.SimpleAssign,sndn),Idn,e)))|_->raiseImpossible)|StructDefdef->let{s_name=name;s_kind=kind;s_flds=flds}=definletprefix=matchkindwithStruct->"S__"|Union->"U__"inletname=add_prefixprefixnameinlets=Ast.str_of_namenameinifenv.phase=DefsthenbeginifHashtbl.memenv.structssthenletold=Hashtbl.findenv.structssinif(Meta_ast_c.vof_any(Toplevel(StructDefold)))=*=(Meta_ast_c.vof_any(Toplevel(StructDefdef)))(* Why they don't factorize? because they don't like recursive
* #include in plan I think
*)thenenv.log(spf"you should factorize struct %s definitions"s)elsebeginenv.pr2_and_log(spf"conflicting structs for %s, %s <> %s"s(Common.dumpold)(Common.dumpdef));Hashtbl.replaceenv.dupes(fstname,E.Type)trueendelsebeginHashtbl.addenv.structssdef;letenv=add_node_and_edge_if_defs_modeenv(name,E.Type)Noneinhook_defenvx;(* this is used for InitListExpr *)letfields=flds|>Common.map_filter(function|{fld_name=Somename;_}->Some(Ast.str_of_namename)|_->None)inHashtbl.replaceenv.fields(prefix^s)fields;flds|>List.iter(fun{fld_name=nameopt;fld_type=t;}->nameopt|>Common.do_option(funname->add_node_and_edge_if_defs_modeenv(name,E.Field)(Somet)|>ignore;));endendelsebeginletenv=add_node_and_edge_if_defs_modeenv(name,E.Type)Noneinflds|>List.iter(fun{fld_name=nameopt;fld_type=t;}->matchnameoptwith|Somename->lettyp=Sometinletenv=add_node_and_edge_if_defs_modeenv(name,E.Field)typintype_envt|None->(* TODO: kencc: anon substruct, invent anon? *)(* (spf "F__anon__%s" (str_of_angle_loc env loc), E.Field) None *)type_envt)end|EnumDef(name,xs)->letname=add_prefix"E__"nameinletenv=add_node_and_edge_if_defs_modeenv(name,E.Type)Noneinxs|>List.iter(fun(name,eopt)->letname=ifkind_fileenv=*=Sourcethennew_name_if_defsenvnameelsenameinletenv=add_node_and_edge_if_defs_modeenv(name,E.Constructor)Noneinifenv.phase=UsesthenCommon2.opt(expr_toplevelenv)eopt);(* subtle: called here after all the local renames have been created! *)hook_defenvx;(* I am not sure about the namespaces, so I prepend strings *)|TypeDef(name,t)->lets=Ast.str_of_namenameinletname=add_prefix"T__"nameinifenv.phase=DefsthenbeginifHashtbl.memenv.typedefssthenletold=Hashtbl.findenv.typedefssinif(Meta_ast_c.vof_any(Typeold)=*=(Meta_ast_c.vof_any(Typet)))then()elsebeginenv.pr2_and_log(spf"conflicting typedefs for %s, %s <> %s"s(Common.dumpold)(Common.dumpt));Hashtbl.replaceenv.dupes(fstname,E.Type)trueend(* todo: if are in Source, then maybe can add in local_typedefs *)elseHashtbl.addenv.typedefsstend;lettyp=Sometinlet_env=add_node_and_edge_if_defs_modeenv(name,E.Type)typin(* no hook_def here *)(* type_ env typ; *)()(* less: should analyze if s has the form "..." and not <> and
* build appropriate link? but need to find the real File
* corresponding to the string, so may need some -I
*)|Include_->()andtoplevelsenvxs=List.iter(toplevelenv)xsanddefine_bodyenvv=letenv={envwithin_define=true}inmatchvwith|CppExpre->expr_toplevelenve|CppStmtst->stmtenvst(* ---------------------------------------------------------------------- *)(* Stmt *)(* ---------------------------------------------------------------------- *)(* Mostly go through without doing anything; stmts do not use
* any entities (expressions do).
*)andstmtenv=function|ExprSte->expr_toplevelenve|Blockxs->stmtsenvxs|Asmxs->List.iter(expr_toplevelenv)xs|If(e,st1,st2)->expr_toplevelenve;stmtsenv[st1;st2]|Switch(e,xs)->expr_toplevelenve;casesenvxs|While(e,st)|DoWhile(st,e)->expr_toplevelenve;stmtenvst|For(e1,e2,e3,st)->Common2.opt(expr_toplevelenv)e1;Common2.opt(expr_toplevelenv)e2;Common2.opt(expr_toplevelenv)e3;stmtenvst|Returneopt->Common2.opt(expr_toplevel{envwithin_return=true})eopt;|Continue|Break->()|Label(_name,st)->stmtenvst|Goto_name->()|Varsxs->xs|>List.iter(funx->let{v_name=n;v_type=t;v_storage=sto;v_init=eopt}=xinifsto<>Externthenbeginenv.locals:=(Ast.str_of_namen,Somet)::!(env.locals);type_envt;end;(matcheoptwith|None->()|Somee->expr_toplevelenv(Assign((Cst_cpp.SimpleAssign,sndn),Idn,e))))andcaseenv=function|Case(e,xs)->expr_toplevelenve;stmtsenvxs|Defaultxs->stmtsenvxsandstmtsenvxs=List.iter(stmtenv)xsandcasesenvxs=List.iter(caseenv)xs(* ---------------------------------------------------------------------- *)(* Expr *)(* ---------------------------------------------------------------------- *)(* can assume we are in Uses phase *)andexpr_toplevelenvx=exprenvx;hook_expr_toplevelenvx(* can assume we are in Uses phase *)andexprenv=function|Int_|Float_|Char_->()|String_->()(* Note that you should go here only when it's a constant. You should
* catch the use of Id in other contexts before. For instance you
* should match on Id in Call, so that this code
* is executed really as a last resort, which usually means when
* there is the use of a constant or global.
*)|Idname->lets=Ast.str_of_namenameinifis_localenvsthen()elseletname=strenvnameinletkind_opt=find_existing_node_optenvname[E.Constant;E.Constructor;E.Global;E.Function;(* can pass address of func *)E.Prototype;(* can be asm function *)E.GlobalExtern;](iflooks_like_macronamethenE.ConstantelseE.Global)inkind_opt|>Common.do_option(funkind->add_use_edgeenv(name,kind))|Call(e,es)->(matchewith|Idname->lets=Ast.str_of_namenameinifis_localenvsthen()elseletname=strenvnameinletkind_opt=find_existing_node_optenvname[E.Macro;E.Constant;(* for DBG-like macro *)E.Function;E.Global;(* can do foo() even with a function pointer *)E.Prototype;E.GlobalExtern;](iflooks_like_macronamethenE.MacroelseE.Function)in(* we don't call call like foo(bar(x)) to be counted
* as special calls in prolog, hence the NoCtx here.
*)kind_opt|>Common.do_option(funkind->add_use_edge{envwithctx=P.NoCtx}(name,kind);exprs{envwithctx=(P.CallCtx(fstname,kind))}es)(* todo: unexpected form of call? function pointer call? add to stats *)|_->exprenve;exprsenves)|Assign(_,e1,e2)->(* mostly for generating use/read or use/write in prolog *)expr{envwithin_assign=true}e1;exprenve2;|ArrayAccess(e1,e2)->exprsenv[e1;e2](* todo: determine type of e and make appropriate use link *)|RecordPtAccess(e,_name)->exprenve|Cast(t,e)->type_envt;exprenve(* potentially here we would like to treat as both a write and read
* of the variable, so maybe a trivalue would be better than a boolean
*)|Postfix(e,_op)|Infix(e,_op)->expr{envwithin_assign=true}e|Unary(e,op)->(matchAst.unwrapopwith(* if get the address probably one wants to modify it *)|Cst_cpp.GetRef->expr{envwithin_assign=true}e|_->exprenve)|Binary(e1,_op,e2)->exprsenv[e1;e2]|CondExpr(e1,e2,e3)->exprsenv[e1;e2;e3]|Sequence(e1,e2)->exprsenv[e1;e2]|ArrayInitxs->xs|>List.iter(fun(eopt,init)->Common2.opt(exprenv)eopt;exprenvinit)(* todo: add deps on field *)|RecordInitxs->xs|>List.mapsnd|>exprsenv|SizeOfx->(matchxwith(* ugly: because of bad typedef inference what we think is an Id
* could actually be a TTypename. So add a hack here.
*)|Left(Id(origname))->lets=Ast.str_of_nameorignameinifis_localenvsthen()elseletname=strenvorignameinifnot(G.has_node(Ast.str_of_namename,E.Global)env.g)&¬(G.has_node(Ast.str_of_namename,E.GlobalExtern)env.g)thentype_env(TTypeNameorigname)elseexprenv(Idorigname)|Lefte->exprenve|Rightt->type_envt)|GccConstructor(t,e)->type_envt;exprenve|Ellipses_->()andexprsenvxs=List.iter(exprenv)xs(* ---------------------------------------------------------------------- *)(* Types *)(* ---------------------------------------------------------------------- *)andtype_envtyp=ifenv.phase=Uses&&env.conf.types_dependenciesthenbeginlett=final_typeenvtypinletrecauxt=matchtwith|TBase_->()(* The use of prefix below is consistent with what is done for the defs.
* We do that right now because we are not clear on the namespaces ...
* whether we can have both struct x and enum x in a file
* (I think no, but right now easier to just prefix)
*)|TStructName(Struct,name)->add_use_edgeenv(add_prefix"S__"name,E.Type)|TStructName(Union,name)->add_use_edgeenv(add_prefix"U__"name,E.Type)|TEnumNamename->add_use_edgeenv(add_prefix"E__"name,E.Type)|TTypeNamename->lets=Ast.str_of_namenamein(* could be a type parameter *)ifis_localenvs&&env.in_definethen()elseifenv.conf.typedefs_dependenciesthenadd_use_edgeenv(add_prefix"T__"name,E.Type)elseifHashtbl.memenv.typedefssthenlett'=(Hashtbl.findenv.typedefss)in(* right now 'typedef enum { ... } X' results in X being
* typedefed to ... itself
*)ift'=tthenadd_use_edgeenv(add_prefix"T__"name,E.Type)(* should be done in expand_typedefs? unless we had a dupe *)elseenv.pr2_and_log(spf"skipping edge, probably dupe typedef %s (%s)"s(Parse_info.string_of_info(sndname)))elseenv.pr2_and_log(spf"typedef not found: %s (%s)"s(Parse_info.string_of_info(sndname)))|TPointerx->auxx|TArray(eopt,x)->Common2.opt(exprenv)eopt;auxx|TFunction(t,xs)->auxt;xs|>List.iter(funp->auxp.p_type)inauxtend(*****************************************************************************)(* Main entry point *)(*****************************************************************************)letbuild?(verbose=true)rootfiles=letg=G.create()inG.create_initial_hierarchyg;letchan=open_out(Filename.concatroot"pfff.log")in(* less: we could also have a local_typedefs_of_files to avoid conflicts *)letconf={types_dependencies=true;(* TODO currently unused, we do not process fields :( *)fields_dependencies=true;macro_dependencies=false;propagate_deps_def_to_decl=false;(* let's expand typedefs, it's simpler, hence false *)typedefs_dependencies=false;}inletenv={g;phase=Defs;current=G.pb;ctx=P.NoCtx;c_file_readable="__filled_later__";conf;in_assign=false;in_define=false;in_return=false;(* will be overriden by file specific hashtbl *)local_rename=Hashtbl.create0;dupes=Hashtbl.create101;typedefs=Hashtbl.create101;structs=Hashtbl.create101;fields=Hashtbl.create101;locals=ref[];log=(funs->output_stringchan(s^"\n");flushchan;);pr2_and_log=(funs->(*if verbose then *)pr2s;output_stringchan(s^"\n");flushchan;);}in(* step0: parsing *)env.pr2_and_log"\nstep0: parsing";(* we could run the parser in the different steps
* but we need to make sure to reset some counters because
* the __anon_struct_xxx build in ast_c_simple_build
* must be stable when called another time with the same file!
* alt: fix ast_c_simple_build to not use gensym and generate stable
* names.
*)letelems=files|>Console.progress~show:verbose(funk->List.map(funfile->k();letast=parse~show_parse_error:truefileinletreadable=Common.readable~rootfileinletlocal_rename=Hashtbl.create101inast,readable,local_rename))in(* step1: creating the nodes and 'Has' edges, the defs *)env.pr2_and_log"\nstep1: extract defs";elems|>Console.progress~show:verbose(funk->List.iter(fun(ast,c_file_readable,local_rename)->k();extract_defs_uses{envwithphase=Defs;c_file_readable;local_rename;}ast));(* step2: creating the 'Use' edges *)env.pr2_and_log"\nstep2: extract Uses";elems|>Console.progress~show:verbose(funk->List.iter(fun(ast,c_file_readable,local_rename)->k();extract_defs_uses{envwithphase=Uses;c_file_readable;local_rename;}ast));env.pr2_and_log"\nstep3: adjusting";ifconf.propagate_deps_def_to_declthenGraph_code_helpers.propagate_users_of_functions_globals_types_to_prototype_extern_typedefsg;G.remove_empty_nodesg[G.not_found;G.dupe;G.pb];g