123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671(* Yoann Padioleau
*
* Copyright (C) 2014 Facebook
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation, with the
* special exception on linking described in file license.txt.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
* license.txt for more details.
*)openCommonopenAst_copenAst_cilmoduleA=Ast_cmoduleA2=Cst_cppmoduleD=Datalog_codemoduleG=Graph_codemoduleE=Entity_code(*****************************************************************************)(* Prelude *)(*****************************************************************************)(*
* Generating dataflow-related datalog facts for C.
*
* See pfff/mini/datalog_minic.ml for more comments, history, and notes.
* Lots of code in this file is copy pasted from datalog_minic.ml
* (but now actually improved, e.g. with the notion of lvalue/rvalue).
*
* todo:
* - could also add the AST of macros in the environment to
* expand sometimes
* - less: could split in different files, ast_cil.ml, ast_cil_build.ml,
* datalog_c.ml, datalog_c_hooks.ml
*)(*****************************************************************************)(* Types *)(*****************************************************************************)typefact=Datalog_code.facttypeenv={scope:string;(* qualifier, usually the current function *)c_file_readable:Common.filename;long_format:bool;globals:Graph_code.graph;(* we may also want the AST of macros *)(* Because of the trick we use in graph_code_c for e.g. renaming
* static functions to avoid name collisions,
* you need to use this function each time you think
* a name refers to a global entity.
*)globals_renames:Ast_c.name->Ast_c.name;(* Have option type because of macro parameters ... could have a TAny also.
* Need a ref because instrs_of_expr will add new local variables.
*)locals:(string*type_option)listref;(* the output *)facts:factlistref;}(* less: type format = Classic | Bddbddb | BddbddbLong ? *)letlong_format=reftrue(*****************************************************************************)(* Helpers *)(*****************************************************************************)letdebugany=letv=Meta_ast_c.vof_anyanyinlets=Ocaml.string_of_vvinletii=Lib_parsing_c.ii_of_anyanyinpr2(spf"PB: %s"(Parse_info.string_of_info(List.hdii)));pr2s(* location are unique normally so nice to generate unique readable names *)letloc_ofenvtok=letline=Parse_info.line_of_infotokinletcol=Parse_info.col_of_infotokinifenv.long_formatthenspf"%d:%d"linecol(* lua datalog does not handle ':' *)elsespf"line_%d_col_%d"linecol(* for int* x[10][10] we want to generate a list of Alloc and
* we want to create fresh array each time
* (we could use gensym for that though).
* TODO: good enough with the way types are represented? Need
* Ast_c.Pointer of tok * type_ ?
*)lettok_of_typet=List.hd(Lib_parsing_c.ii_of_any(A.Typet))lettokwrap_of_expre=(),List.hd(Lib_parsing_c.ii_of_any(A.Expre))letvar_of_instrinstr=matchinstrwith|Assign(v,_)|AssignAddress(v,_)|AssignLvalue(_,v)->vexceptionNotSimpleExprletstring_of_op_str="_op_todo"letis_localenvs=(Common.find_opt(fun(x,_)->x=$=s)!(env.locals))<>None(*****************************************************************************)(* Normalize *)(*****************************************************************************)(* The goal is to transform constructs from Ast_c to simpler constructs.
* Here are the simplifications done:
* - linearization of expression
* - sugar removal for postfix/infix increments
* - ???
*)(* could also use gensym() *)letcounter=ref0(* note that we still use var_of_name to generate the extra scope info
* so no need to add it there
*)letfresh_varenv(_,tok)=incrcounter;lets=spf"_v_%d"(* env.scope? no! *)!counterin(* todo type! *)env.locals:=(s,None)::!(env.locals);s,tok(* Need env.globals to unsugar certain calls like error(xxx) when error
* is actually a global it's really a ( *error)(xxx).
* Need also to know the current set of locals, so need both local
* and global analysis results.
*)letinstrs_of_exprenve=letinstrs=ref[]in(* let _new_locals = ref [] with their types? ... *)letrecinstr_of_expre=matchewith|A.Int_|A.Float_|A.String_|A.Char_|A.Id_|A.Unary(_,(A2.DeRef,_))|A.Call_|A.ArrayAccess_|A.RecordPtAccess_|A.Binary_|A.Unary(_,((A2.UnPlus|A2.UnMinus|A2.Tilde|A2.Not),_))|A.SizeOf_|A.GccConstructor_|A.Ellipses_->(* less: we could generate a special fresh_var that datalog would not
* have to track? hmm but maybe the var is actually used
* by code using var_of_instr().
*)Assign(fresh_varenv(tokwrap_of_expre),rvalue_of_simple_expre)(* todo: actually an alloc is hidden there! *)|A.Assign(op,e1,A.ArrayInitxs)->letys=xs|>List.map(fun(idxopt,value)->(* less? recompute e1 each time? should store in intermediate val? *)letaccess=matchidxoptwith|Somee->A.ArrayAccess(e1,e)|None->A.ArrayAccess(e1,A.Int("0",sndop))inA.Assign(op,access,value))inletseq=Common2.foldl1(funerest->Sequence(e,rest))ysininstr_of_exprseq(* todo: actually an alloc is hidden there! *)|A.Assign(op,e1,A.RecordInitxs)->letys=xs|>List.map(fun(name,value)->(* less? recompute e1 each time? should store in intermediate val? *)letaccess=A.RecordPtAccess(A.Unary(e1,(A2.GetRef,sndop)),name)inA.Assign(op,access,value))inletseq=Common2.foldl1(funerest->Sequence(e,rest))ysininstr_of_exprseq(* ok, an actual instr! For our analysis we don't care about op (we are
* not even control flow sensitive anyway)
*)|A.Assign(_op,e1,e2)->letlv=lvalue_of_expre1in(matchlv,e2with|Idv,A.Unary(e,(A2.GetRef,_))->(matchlvalue_of_exprewith(* less: what &( *x ) means? *)|DeRef_->debug(A.Expre);raiseImpossible|lv->AssignAddress(v,lv))|_->(matchlvwith|Idname->Assign(name,tryrvalue_of_simple_expre2withNotSimpleExpr->Lv(Id(var_of_expre2)))|lv->AssignLvalue(lv,var_of_expre2)))|A.Unary(e,(A2.GetRef,tok))->letv=fresh_varenv((),tok)inletlv=lvalue_of_exprein(matchlvwith|DeRef_->debug(A.Expre);raiseImpossible|lv->AssignAddress(v,lv))|A.Unary(_,((A2.GetRefLabel,_)))->(* ast_c_build should forbid that gccext *)debug(A.Expre);raiseImpossible|A.Sequence(e1,e2)->leti1=instr_of_expre1inCommon.pushi1instrs;instr_of_expre2|A.Cast(_tTODO,e)->instr_of_expre(* for pointer analysis we don't care to respect the exact semantic, we
* are not even control flow sensitive
*)|A.Postfix(e,_op)|A.Infix(e,_op)->instr_of_expre(* Could try to expand to a '_builtin_cond(e1, e2, e3)' but
* what would be the type of this function? bool -> T -> T -> T ...
* need polymorphic type. So for now just expand to
* 'v1 = e1; v2 = e2; v2 = e3;'
*)|A.CondExpr(e1,e2,e3)->leti1=instr_of_expre1inCommon.pushi1instrs;lettokwrap=tokwrap_of_expre2inletv=fresh_varenvtokwrapinlettok=sndtokwrapinleti2=instr_of_expr(A.Assign((Cst_cpp.SimpleAssign,tok),A.Idv,e2))inCommon.pushi2instrs;instr_of_expr(A.Assign((Cst_cpp.SimpleAssign,tok),A.Idv,e3));(* like GccConstructor can be outside Assign context when in macro *)(* todo: an alloc is hidden here?? *)|A.ArrayInit_|A.RecordInit_->debug(A.Expre);lettokwrap=tokwrap_of_expreinletv=fresh_varenvtokwrapinlettok=sndtokwrapininstr_of_expr(A.Assign((Cst_cpp.SimpleAssign,tok),A.Idv,e))andrvalue_of_simple_expre=matchewith|A.Intx->Intx|A.Floatx->Floatx|A.Stringx->Stringx|A.Charx->Stringx(* could be lots of things, global, local, param, constant, function! *)|A.Idname->Lv(Idname)|A.Unary(e,(A2.DeRef,_))->Lv(DeRef(var_of_expre))(* todo: xalloc, smalloc, and other wrappers? *)|A.Call(A.Id("malloc",tok),es)->(matcheswith|[SizeOf(Right(t))]->Alloc(t)|[Binary(e,(Cst_cpp.Arith(Cst_cpp.Mul),_),SizeOf(Right(t)))]->letv=var_of_expreinAllocArray(v,t)|[SizeOf(Left(_e))]->(* todo: need potentially to resolve the type of e *)(* debug (Expr e); *)Alloc(A.TBase("_unknown_",tok))|_->debug(Expre);Alloc(A.TBase("_unknown_",tok)))|A.Call(e,es)->letvs=List.mapvar_of_expresin(matchewith|A.Idname->ifis_localenv(fstname)(* fn(...) when fn is a local is really a ( *fn)(...) *)thenDynamicCall(var_of_expre,vs)else(* fn(...) is actually sugar when fn is a global *)letname=env.globals_renamesnameinletstr=fstnameinifnot(G.has_node(str,E.Function)env.globals)&¬(G.has_node(str,E.Macro)env.globals)&&G.has_node(str,E.Global)env.globalsthenDynamicCall(var_of_expre,vs)(* could assert there is E.Function or E.Macro or E.Prototype *)elseStaticCall(name,vs)(* ( *f)(...) *)|A.Unary(e,(A2.DeRef,_))->DynamicCall(var_of_expre,vs)(* x->f(...) is actually sugar for ( * x->f)(...) *)|A.RecordPtAccess(_,_)(* x[y](...) is also sugar for ( * x[y](...) *)|A.ArrayAccess(_,_)->DynamicCall(var_of_expre,vs)|_->debug(Expre);raiseTodo)|A.Binary(e1,(_op,tok),e2)->letvs=List.mapvar_of_expr[e1;e2]inBuiltinCall(("_builtin_"^(string_of_optok),tok),vs)|A.Unary(e,((A2.UnPlus|A2.UnMinus|A2.Tilde|A2.Not),tok))->letvs=[var_of_expre]inBuiltinCall(("_builtin_"^(string_of_optok),tok),vs)|A.ArrayAccess(e1,e2)->letv1=var_of_expre1inletv2=var_of_expre2inLv(ArrayAccess(v1,v2))|A.RecordPtAccess(e,name)->letv=var_of_expreinLv(ObjField(v,name))|A.SizeOf(Lefte)->letinstr=instr_of_expreinCommon.pushinstrinstrs;Int("0_sizeof",tokwrap_of_expre|>snd)|A.SizeOf(Rightt)->Int("0_sizeof",tok_of_typet)(* can be in macro context, e.g. #define SEG (struct x) { ... } *)|A.GccConstructor(t,_eTODO)->Alloct|_->(* hmmm maybe better to have this function return a rvalue option *)raiseNotSimpleExprandvar_of_expre=matchewith|A.Idname->name|_->letinstr=instr_of_expreinCommon.pushinstrinstrs;var_of_instrinstrandlvalue_of_expre=try(matchrvalue_of_simple_exprewith|Lvx->x|_->Id(var_of_expre))withNotSimpleExpr->Id(var_of_expre)inleti=instr_of_expreinList.rev(i::!instrs)(*****************************************************************************)(* Abstract memory locations *)(*****************************************************************************)letvar_of_globalenvname=letname=env.globals_renamesnameinlets=fstnamein(*
if Common.find_opt (fun (x,_) -> x =$= s) env.globals = None
then error (spf "unknown global: %s" s) name;
*)ifenv.long_formatthen(* bug: no!! spf "%s#%s" env.c_file_readable
* we must actually get the file at definition time, not use time
*)letcandidates=[E.Macro;E.Constant;E.Function;E.Constructor;E.Global;]inletres=candidates|>Common.map_filter(funkind->ifG.has_node(s,kind)env.globalsthenSome(s,kind)elseNone)in(matchreswith|[node]->letfile=G.file_of_nodenodeenv.globalsinspf"%s#%s"files|x::y::xs->pr2(spf"Conflicting entities for %s [%s]"s((x::y::xs)|>List.mapG.string_of_node|>Common.join","));letfile=G.file_of_nodexenv.globalsinspf"%s#%s"files(* maybe a prototype or extern *)|[]->(match()with|_whens=~"_builtin_.*"->()|_->ifG.has_node(s,E.Prototype)env.globals||G.has_node(s,E.GlobalExtern)env.globals(* todo: could print a warning to force people to give
* a "model" for the external or asm function
*)then()elsepr2_once(spf"Could not find any definition nor prototype for %s"s););s)elsesletvar_of_localenvname=ifenv.long_formatthenspf"%s#%s:%s"env.c_file_readableenv.scope(fstname)elsespf"%s__%s"env.scope(fstname)letvar_of_nameenvvar_or_name=lets=fstvar_or_nameinmatchCommon.find_opt(fun(x,_)->x=$=s)!(env.locals)with|None->var_of_globalenvvar_or_name|Some_t->var_of_localenvvar_or_name(* the variable name is also its heap abstract location as in C
* you can get the address of any local variables.
*)letheap_of_nameenvvar_or_name=var_of_nameenvvar_or_name(* heap location, abstract memory location, heap abstraction, etc *)letheap_of_cstenvname=spf"_val_of_%s_%s"(fstname)(loc_ofenv(sndname))letheap_of_mallocenvt=lettok=tok_of_typetinspf"_malloc_in_%s_%s"env.scope(loc_ofenvtok)letheap_of_malloc_arrayenvt=lettok=tok_of_typetin(* old: used to have
* let pt = spf "_array_in_%s_%s" env.scope (loc_of env tok) in
* let pt2 = spf "_array_elt_in_%s_%s" env.scope (loc_of env tok) in
* and an array_point_to/2 but it does not work
*)spf"_array_elt_in_%s_%s"env.scope(loc_ofenvtok)letinvoke_loc_of_nameenvname=ifenv.long_formatthenspf"%s#%s"env.c_file_readable(loc_ofenv(sndname))elsespf"_in_%s_%s"env.scope(loc_ofenv(sndname))(* TODO: need to look for type of v in env to actually qualify ... *)letfully_qualified_field_env_vfldname=letfld=fstfldnameinspf"_fld__%s"fld(* TODO: need to use _struct at some point *)letfully_qualified_field_of_struct_strucfld=spf"_fld__%s"fld(*****************************************************************************)(* Fact generation *)(*****************************************************************************)(* ------------------------------------------------------------------------- *)(* Instr *)(* ------------------------------------------------------------------------- *)letfacts_of_instrenv=function|Assign(var,e)->letdest=var_of_nameenvvarin(matchewith|Intx->[D.PointTo(dest,spf"_int__%s"(fstx))]|Floatx->[D.PointTo(dest,spf"_float__%s"(loc_ofenv(sndx)))]|Stringx->[D.PointTo(dest,spf"_str__%s"(loc_ofenv(sndx)))](* like in miniC *)|StaticCall(("printf",_),_args)->[]|StaticCall(name,args)|DynamicCall(name,args)|BuiltinCall(name,args)->letinvoke=invoke_loc_of_nameenvnamein(args|>Common.index_list_1|>List.map(fun(v,i)->D.Argument(invoke,i,var_of_nameenvv)))@[D.ReturnValue(invoke,dest)]@(matchewith|StaticCall_|BuiltinCall_->[D.CallDirect(invoke,var_of_globalenvname)]|DynamicCall_->[D.CallIndirect(invoke,var_of_nameenvname)]|_->raiseImpossible)(* TODO: could be enum or constant! lookup g *)|Lv(Idname)->[D.Assign(dest,var_of_nameenvname)]|Lv(DeRefvar2)->[D.AssignContent(dest,var_of_nameenvvar2)]|Lv(ObjField(var2,fld))->[D.AssignLoadField(dest,var_of_nameenvvar2,fully_qualified_fieldenvvar2fld)]|Lv(ArrayAccess(var2,_vidx))->(* less: could also add info that vidx must be an int *)[D.AssignArrayElt(dest,var_of_nameenvvar2)]|Alloct->letpt=heap_of_mallocenvtin[D.PointTo(dest,pt)]|AllocArray(_v,t)->letpt=heap_of_malloc_arrayenvtin[D.PointTo(dest,pt)])|AssignLvalue(ArrayAccess(varr,_vidx),vval)->(* less: could also add info that vidx must be an int *)[D.AssignArrayDeref(var_of_nameenvvarr,var_of_nameenvvval)]|AssignLvalue(ObjField(var,fld),var2)->[D.AssignStoreField(var_of_nameenvvar,fully_qualified_fieldenvvar2fld,var_of_nameenvvar2)]|AssignLvalue(DeRefvar,var2)->[D.AssignDeref(var_of_nameenvvar,var_of_nameenvvar2)]|AssignAddress(var,Idname)->[D.AssignAddress(var_of_nameenvvar,heap_of_nameenvname)]|AssignAddress(var,ArrayAccess(varray,_vidx))->(* less: could also add info that vidx must be an int *)[D.AssignArrayElementAddress(var_of_nameenvvar,var_of_nameenvvarray)]|AssignAddress(v,ObjField(vobj,fld))->[D.AssignFieldAddress(var_of_nameenvv,var_of_nameenvvobj,fully_qualified_fieldenvvobjfld)]|AssignAddress(_var,DeRef_)->raiseImpossible|AssignLvalue(Id_name,_var)->raiseImpossible(* ------------------------------------------------------------------------- *)(* Return *)(* ------------------------------------------------------------------------- *)letreturn_factenvinstr=letvar=var_of_instrinstrinD.Assign(spf"ret_%s"env.scope,var_of_nameenvvar)(* ------------------------------------------------------------------------- *)(* Defs *)(* ------------------------------------------------------------------------- *)letfacts_of_defenvdef=matchdefwith|StructDefdef->def.s_flds|>Common.map_filter(funfld->matchfld.fld_namewith(* todo: kencc ext field! *)|None->None|Somename->(matchfld.fld_typewith|TBase_->Some(D.PointTo((fully_qualified_field_of_struct(fstdef.s_name)(fstname)),(heap_of_cstenvname)))(* TODO: like for Global, if fields is an array, we should
* do an hidden alloc! see mini/struct_array.c
*)|_->None))|Define(name,_body)->[D.PointTo(var_of_globalenvname,heap_of_cstenvname)]|EnumDefdef->let(_name,xs)=definxs|>List.map(fun(name,_eopt)->D.PointTo(var_of_globalenvname,heap_of_cstenvname))|Macro_->(* todo? *)[]|FuncDefdef->let(_ret,params)=def.f_typein(params|>Common.index_list_1|>Common.map_filter(fun(p,i)->matchp.p_namewith|None->None|Somename->Some(D.Parameter(var_of_globalenvdef.f_name,i,var_of_localenvname))))@(* less: could skip when return void *)(letname=env.globals_renamesdef.f_namein[D.Return(var_of_globalenvdef.f_name,spf"ret_%s"(fstname));(* This is because in C there is some sugar on assign on functions.
* One can do trapenable(myfunc) instead of trapenable(&myfunc),
* so in such case it's simpler to consider every funcname as
* a pointer to itself.
*)D.PointTo(var_of_globalenvdef.f_name,var_of_globalenvdef.f_name);])|Globalvar->letname=var.v_nameinletrecauxcurrent_vcurrent_type=matchcurrent_typewith(* int* x[...]; <=> x = malloc(n*sizeof(int* ) *)|TArray(_eopt,t)->letvsize_dontcare=fresh_varenv((),tok_of_typet)inletinstr=Assign(current_v,AllocArray(vsize_dontcare,t))infacts_of_instrenvinstr@(* hmm got int* x[...][...] need to recurse *)(matchtwith|TArray_->(* new_v = x[0]; new_v = malloc(n*sizeof(int * )) *)letnew_v=fresh_varenv((),tok_of_typet)inletvidx_dontcare=fresh_varenv((),tok_of_typet)inletinstr_index=Assign(new_v,Lv(ArrayAccess(current_v,vidx_dontcare)))infacts_of_instrenvinstr_index@(auxnew_vt)|_->[])(* | TBase _ -> [D.PointTo (var_of_global env name, heap_of_cst env name)]*)|_->[]inauxnamevar.v_type|Include_|TypeDef_|Prototype_->raiseImpossible