123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561(****************************************************************************)(* *)(* This file is part of MOPSA, a Modular Open Platform for Static Analysis. *)(* *)(* Copyright (C) 2017-2019 The MOPSA Project. *)(* *)(* This program is free software: you can redistribute it and/or modify *)(* it under the terms of the GNU Lesser General Public License as published *)(* by the Free Software Foundation, either version 3 of the License, or *)(* (at your option) any later version. *)(* *)(* This program is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *)(* GNU Lesser General Public License for more details. *)(* *)(* You should have received a copy of the GNU Lesser General Public License *)(* along with this program. If not, see <http://www.gnu.org/licenses/>. *)(* *)(****************************************************************************)(** Heap addresses of Python objects. *)openMopsaopenAstopenUniversal.Astletdebugfmt=Debug.debug~channel:"python.addr"fmt(*==========================================================================*)(** {2 Addresses} *)(*==========================================================================*)(** Classes *)typeclass_address=|C_builtinofstring(* name of a built-in class *)|C_userofpy_clsdec(* declaration of a user class *)|C_unsupportedofstring(** unsupported class *)|C_annotofpy_cls_annot(** class annotations *)(** Functions *)typefunction_address=|F_builtinofstring*string(* name of a builtin function, and its type (among builtin_function_or_method, wrapper_descriptor, method_descriptor usually) *)|F_userofpy_fundec(* declaration of a user function *)|F_unsupportedofstring(** unsupported function *)|F_annotofpy_func_annot(* function annotations *)typemodule_address=|M_userofstring(** name *)*varlist(** globals *)|M_builtinofstring(** name *)(** Kinds of Python addresses *)(** These addresses refer only to static objects *)typeaddr_kind+=|A_py_classofclass_address(** class *)*py_objectlist(** mro *)|A_py_functionoffunction_address(** function *)|A_py_methodofpy_object(** address of the function to bind *)*py_object(** method instance *)*string(* type. method or method-wrapper or ... *)|A_py_moduleofmodule_address(** Allocate an object on the heap and return its address as an evaluation *)leteval_alloc?(mode=STRONG)mankindrangeflow=letexp=mk_alloc_addr~mode:modekindrangeinman.evalexpflow>>$funexpflow->matchekindexpwith|E_addr(addr,_)->Cases.singletonaddrflow|_->panic"eval_alloc: allocation returned a non-address express %a"pp_exprexp(*==========================================================================*)(** {2 Built-ins} *)(*==========================================================================*)(** Lists of built-ins *)letclasses=Hashtbl.create100letfunctions=Hashtbl.create100letmodules=Hashtbl.create10lettype_aliases=Hashtbl.create100lettyped_functions=Hashtbl.create100(* and typed classes *)(** Name of a builtin with an optional dot notation in case of
sub-objects (methods of classes, etc.) *)letmk_dot_namebasename=matchbasewith|None->name|Somebase->base^"."^name(** Return the base and the attribute of a dot name *)letsplit_dot_namex=letl=String.split_on_char'.'xinmatchlwith|[cls;attr]->Some(cls,attr)|[modul;cls;attr]->Some(modul^"."^cls,attr)|_->None(** Address of an object *)letaddr_of_object(obj:py_object):addr=fstobjletkind_of_object(obj:py_object):addr_kind=letaddr=addr_of_objectobjinaddr.addr_kind(** Name of an object *)letoobject_nameobj=letsome=funx->Somexinmatchkind_of_objectobjwith|A_py_class(C_builtinname,_)|A_py_class(C_unsupportedname,_)|A_py_function(F_builtin(name,_))|A_py_function(F_unsupportedname)|A_py_module(M_builtinname)|A_py_module(M_user(name,_))->somename|A_py_function(F_userf)->some@@get_orig_vnamef.py_func_var|A_py_function(F_annotf)->some@@get_orig_vnamef.py_funca_var|A_py_class(C_userc,_)->some@@get_orig_vnamec.py_cls_var|A_py_class(C_annotc,_)->some@@get_orig_vnamec.py_cls_a_var|_->Noneletobject_nameobj=matchoobject_nameobjwith|Someo->o|None->panic"builtin_name: %a is not a builtin"pp_addr(addr_of_objectobj)letadd_type_alias(v:var)(e:expr)=Hashtbl.replacetype_aliasesveletfind_type_alias_by_name(s:string):expr=letexceptionFounditofexprintryHashtbl.iter(funve->ifget_orig_vnamev=sthenraise(Foundite))type_aliases;raiseNot_foundwithFoundite->eletadd_builtin_classobj()=Hashtbl.addclasses(object_nameobj)objletprint_classesfmt_=Hashtbl.iter(funcl_->Format.fprintffmt"%s\n"cl)classesletprint_functionsfmt_=Hashtbl.iter(funcl_->Format.fprintffmt"%s\n"cl)functionsletadd_builtin_functionobj()=debug"added builtin function %a"pp_addr(fstobj);Hashtbl.addfunctions(object_nameobj)objletadd_typedobj=letname=object_nameobjindebug"adds %s -> %a"namepp_expr(mk_py_objectobj(Location.R_fresh(-1)));Hashtbl.addtyped_functionsnameobjletadd_typed_overloadobj=matchHashtbl.find_opttyped_functions(object_nameobj)with|None->add_typedobj|Some({addr_kind=A_py_function(F_annotoldf)}asa,_)->letobj_sig=matchakind@@fstobjwith|A_py_function(F_annotf)->f.py_funca_sig|_->assertfalseinletnewf={oldfwithpy_funca_sig=(oldf.py_funca_sig@obj_sig)}inHashtbl.removetyped_functions(object_nameobj);Hashtbl.addtyped_functions(object_nameobj)({awithaddr_kind=A_py_function(F_annotnewf)},sndobj);|_->assertfalseletadd_builtin_moduleobj()=Hashtbl.addmodules(object_nameobj)obj(** Search for the address of a builtin given its name *)letfind_builtinname=letsearch=funtbl->Hashtbl.findtblnameintrysearchtyped_functionswithNot_found->trysearchclasseswithNot_found->trysearchfunctionswithNot_found->searchmodulesletfind_builtin_function=Hashtbl.findfunctionsletis_object_unsupportedobj=matchkind_of_objectobjwith|A_py_class(C_unsupported_,_)|A_py_function(F_unsupported_)->true|_->false(** Check whether a built-in exists given its name *)letis_builtin_namename=letexists=funtbl->Hashtbl.memtblnameinexistsclasses||existsfunctions||existsmodules||existstyped_functionsletis_builtin_varv=matchvkindvwith|V_uniq_->is_builtin_name@@get_orig_vnamev|_->falseletis_builtin_modulename=Hashtbl.memmodulesnameletfind_builtin_modulename=Hashtbl.findmodulesname(** Check whether an attribute of a built-in object exists, given its name *)letis_builtin_attributebaseattr=letname=object_namebaseinifis_object_unsupportedbasethenpanic"Unsupported builtin %s"nameelsematchkind_of_objectbasewith|A_py_class(C_builtinname,_)|A_py_module(M_builtinname)->is_builtin_name(mk_dot_name(Somename)attr)|A_py_class(C_annotc,_)->debug"searching for %s"(mk_dot_name(Some(get_orig_vnamec.py_cls_a_var))attr);is_builtin_name(mk_dot_name(Some(get_orig_vnamec.py_cls_a_var))attr)|_->false(** Search for the address of a builtin attribute *)letfind_builtin_attributebaseattr=letname=object_namebaseinifis_object_unsupportedbasethenpanic"Unsupported builtin %s"nameelsematchkind_of_objectbasewith|A_py_class(C_builtinname,_)|A_py_module(M_builtinname)|A_py_module(M_user(name,_))->find_builtin(mk_dot_name(Somename)attr)|A_py_class(C_usercls,_)->letname=get_orig_vnamecls.py_cls_varinfind_builtin(mk_dot_name(Somename)attr)|A_py_class(C_annotcls,_)->letname=get_orig_vnamecls.py_cls_a_varinfind_builtin(mk_dot_name(Somename)attr)|_->assertfalse(** Check whether a dot-named function [f] is a member of the class [cls] *)letis_builtin_class_functionclsf=matchsplit_dot_namefwith|None->false|Some(cls',_)->cls=cls'&&is_builtin_namef(*==========================================================================*)(** {2 Utility functions} *)(*==========================================================================*)letmk_py_z_intervallurange=mk_z_intervallurangeletmk_py_float_intervallurange={(mk_float_intervallurange)withetyp=(T_py(Some(FloatF_DOUBLE)))}letmk_py_issubclasse1e2range=mk_py_call(mk_py_object(find_builtin"issubclass")range)[e1;e2]rangeletmk_py_issubclass_builtin_rebuiltinrange=letobj=find_builtinbuiltininmk_py_issubclasse(mk_py_objectobjrange)rangeletmk_py_issubclass_builtin_lbuiltinerange=letobj=find_builtinbuiltininmk_py_issubclass(mk_py_objectobjrange)erangeletmk_py_issubclass_builtin2blt1blt2range=letobj1=find_builtinblt1inletobj2=find_builtinblt2inmk_py_issubclass(mk_py_objectobj1range)(mk_py_objectobj2range)rangeletmk_py_hasattreattrrange=mk_py_call(mk_py_object(find_builtin"hasattr")range)[e;mk_constant~etyp:(T_pyNone)(C_stringattr)range]rangeletmk_py_isinstancee1e2range=mk_py_call(mk_py_object(find_builtin"isinstance")range)[e1;e2]rangeletmk_py_isinstance_builtinebuiltinrange=letobj=find_builtinbuiltininmk_py_isinstancee(mk_py_objectobjrange)rangeletmk_py_typeerange=letobj=find_builtin"type"inmk_py_call(mk_py_objectobjrange)[e]rangetypepy_c_function_kind=|Builtin_function_or_method|Wrapper_descriptorofstringoption(* optional wrapper: in that case name *)|Method_descriptorletstr_of_py_c_function_kind=function|Builtin_function_or_method->"builtin_function_or_method"|Wrapper_descriptor_->"wrapper_descriptor"|Method_descriptor->"method_descriptor"(* multilanguage *)typeaddr_kind+=|A_py_c_moduleofstring(** name *)(** Mopsa.program (* C program *)*)|A_py_c_functionofstring(** name *)*int(** function uid *)*py_c_function_kind*intoption(* ml_flags *)*py_object(** self *)|A_py_c_classofstring(** name *)exceptionC3_lin_failure(** Computes the c3 linearization of an object. This is Python's
approach to deal with redundant parents in the inheritance *)letrecc3_lin(obj:py_object):py_objectlist=(* Spec of c3_lin : (C(B1, ..., BN) meaning class C inherits directly from B1, ..., BN
* c3_lin(C(B1, ..., BN)) = C::merge(c3_lin(B1), ..., c3_lin(BN), [B1], ..., [BN])
* c3_lin(object) = [object]
*
* and merge(L1, ..., Ln) =
* let k = min_{1 <= i <= n} { k | hd(L_k) \not \in tail(L_j) \forall j \neq k } in
* let c = hd(L_k) in
* c :: merge(L1 \ {c}, ..., Ln \ {c})
* ** Examples
* Due to wikipedia:
* class O: pass
* class A(O): pass
* class B(O): pass
* class C(O): pass
* class D(O): pass
* class E(O): pass
* class K1(A, B, C): pass
* class K2(D, B, E): pass
* class K3(D, A): pass
* class Z(K1, K2, K3): pass
*
* a = Z()
* Then, the MRO is Z, K1, K2, K3, D, A, B, C, E, O
*
* Found in "Linearization in Multiple Inheritance", by Michael Petter, Winter term 2016:
* class G: pass
* class F: pass
* class E(F): pass
* class D(G): pass
* class C(D, E): pass
* class B(F, G): pass
* class A(B, C): pass
*
* a = A()
*
* No MRO in this case
*)matchkind_of_objectobjwith|A_py_class(C_builtin"object",b)->[obj]|A_py_class(_,[])|A_py_c_class_->[obj;find_builtin"object"]|A_py_class(c,bases)->letl_bases=List.mapc3_linbasesinletbases=List.map(funx->[x])basesinobj::merge(l_bases@bases)|_->assertfalseandmerge(l:py_objectlistlist):py_objectlist=matchsearch_clwith|Somec->letl'=List.filter(funx->x<>[])(List.map(funli->List.filter(funx->compare_addr_kind(akind@@fstc)(akind@@fstx)<>0)li)l)in(* l' is l with all c removed *)beginmatchl'with|[]->[c]|_->c::mergel'end|None->raiseC3_lin_failureandsearch_c(l:py_objectlistlist):py_objectoption=letindexed_l=List.mapi(funill->(i,ll))linList.fold_left(funacc(i,li)->ifacc<>None||li=[]thenaccelseletc=List.hdliinleta=List.for_all(fun(k,lk)->i=k||lk=[]||not(List.exists(funx->compare_addr(fstc)(fstx)=0)(List.tllk)))indexed_linifathenSomecelseacc)Noneindexed_lletcreate_builtin_classkindnameclsbasesrange=letmro=c3_lin({addr_kind=(A_py_class(kind,bases));addr_partitioning=G_all;addr_mode=STRONG},None)inletaddr={addr_kind=A_py_class(kind,mro);addr_partitioning=G_all;addr_mode=STRONG}inadd_builtin_class(addr,None)()let()=Format.(register_addr_kind{print=(fundefaultfmta->matchawith|A_py_class(C_userc,_)->fprintffmt"%a"pp_varc.py_cls_var|A_py_class((C_builtinc|C_unsupportedc),_)->fprintffmt"%s"c|A_py_class(C_annotc,_)->fprintffmt"%a"pp_varc.py_cls_a_var;|A_py_function(F_userf)->fprintffmt"function %a"pp_varf.py_func_var|A_py_function(F_annotf)->fprintffmt"f-annot %a"pp_varf.py_funca_var(* Ast.pp_py_func_annot f*)|A_py_function(F_builtin(f,t))->fprintffmt"%s %s"tf|A_py_function(F_unsupportedf)->fprintffmt"unsupported-builtin %s"f|A_py_method(f,e,t)->fprintffmt"%s %a of %a"tpp_addr(addr_of_objectf)Pp.pp_py_objecte|A_py_module(M_builtin(m))->fprintffmt"module %s"m|A_py_module(M_user(m,globals))->fprintffmt"module %s[defined globals = %a]"m(pp_print_list~pp_sep:(funfmt()->pp_print_stringfmt", ")pp_var)globals|_->defaultfmta);compare=(fundefaulta1a2->matcha1,a2with|A_py_class(c1,_),A_py_class(c2,_)->beginmatchc1,c2with|C_builtins1,C_builtins2|C_unsupporteds1,C_unsupporteds2->Stdlib.compares1s2|C_userc1,C_userc2->compare_varc1.py_cls_varc2.py_cls_var|C_annotc1,C_annotc2->compare_varc1.py_cls_a_varc2.py_cls_a_var;|_,_->defaulta1a2end|A_py_functionf1,A_py_functionf2->beginmatchf1,f2with|F_builtin(s1,t1),F_builtin(s2,t2)->Compare.compose[(fun()->Stdlib.compares1s2);(fun()->Stdlib.comparet1t2)]|F_unsupporteds1,F_unsupporteds2->Stdlib.compares1s2|F_useru1,F_useru2->compare_varu1.py_func_varu2.py_func_var|F_annotf1,F_annotf2->compare_varf1.py_funca_varf2.py_funca_var|_,_->defaulta1a2end|A_py_modulem1,A_py_modulem2->beginmatchm1,m2with|M_user(s1,_),M_user(s2,_)|M_builtins1,M_builtins2->Stdlib.compares1s2|_,_->defaulta1a2end|A_py_method((addr1,oexpr1),expr1,t1),A_py_method((addr2,oexpr2),expr2,t2)->Compare.compose[(fun()->compare_addraddr1addr2);(fun()->Compare.optioncompare_exproexpr1oexpr2);(fun()->compare_py_objectexpr1expr2);(fun()->Stdlib.comparet1t2);]|_->defaulta1a2)})letbuiltin_cl_and_mros=lettyo=kind_of_object(find_builtins)inmatchtyowith|A_py_class(c,b)->c,b|_->assertfalsetypeaddr_kind+=|A_py_instanceofaddr(* it's the class from which the object is instantiated *)let()=Format.(register_addr_kind{print=(fundefaultfmta->matchawith|A_py_instancec->fprintffmt"Inst{%a}"pp_addr_kind(akindc)|_->defaultfmta);compare=(fundefaulta1a2->matcha1,a2with|A_py_instancec1,A_py_instancec2->compare_addrc1c2|_->defaulta1a2);})let()=Universal.Heap.Policies.register_mk_addr(fundefaultak->matchakwith|A_py_function_->Universal.Heap.Policies.mk_addr_stack_rangeak|A_py_class_|A_py_module_->Universal.Heap.Policies.mk_addr_allak|_->defaultak)letnominal_type_of_addr_kind:(addr_kind->string)ref=ref(funak->panic"unknown nominal type for addr_kind %a"pp_addr_kindak)letstructural_type_of_addr_kind:(addr_kind->string->bool)ref=ref(funaks->panic"unknown structural type for addr_kind %a"pp_addr_kindak)letregister_addr_kind_nominal_typef=nominal_type_of_addr_kind:=f!nominal_type_of_addr_kindletregister_addr_kind_structural_typef=structural_type_of_addr_kind:=f!structural_type_of_addr_kindletaddr_kind_find_nominal_typeak=!nominal_type_of_addr_kindakletaddr_kind_find_structural_typeaks=!structural_type_of_addr_kindaksletmro(obj:py_object):py_objectlist=matchkind_of_objectobjwith|A_py_class(c,b)->b|A_py_c_class_->(* FIXME *)obj::(find_builtin"object")::[]|_->assertfalselet()=Format.(register_addr_kind{print=(fundefaultfmta->matchawith|A_py_c_module(c(*, p*))->fprintffmt"c module %s"c|A_py_c_function(f,_,_,_,_)->fprintffmt"c function %s"f|A_py_c_classv->fprintffmt"c class %s"v|_->defaultfmta);compare=(fundefaulta1a2->matcha1,a2with|A_py_c_function(f1,i1,k1,of1,o1),A_py_c_function(f2,i2,k2,of2,o2)->Compare.compose[(fun()->Stdlib.comparef1f2);(fun()->Stdlib.comparei1i2);(fun()->Stdlib.comparek1k2);(fun()->Stdlib.compareof1of2);(fun()->compare_py_objecto1o2);]|_->defaulta1a2);});register_addr_kind_nominal_type(fundefaultak->matchakwith|A_py_c_module_->"module"|A_py_c_function(_,_,k,_,_)->str_of_py_c_function_kindk|A_py_c_class_->"type"(* FIXME: we should/could call C's Py_TYPE? which currently assigns PyType_Type, ~ ok up to reduction... *)|_->defaultak);register_addr_kind_structural_type(fundefaultaks->matchakwith|A_py_c_class_|A_py_c_function_->false|_->defaultaks)let()=Universal.Heap.Policies.register_mk_addr(fundefaultak->matchakwith|A_py_c_module_->Universal.Heap.Policies.mk_addr_allak|A_py_instance{addr_kind=A_py_class(C_builtin"member_descriptor",_)}->Universal.Heap.Policies.mk_addr_rangeak|A_py_instance{addr_kind=A_py_class(C_builtin"cell",_)}->Universal.Heap.Policies.mk_addr_stack_rangeak(* FIXME: only if cpython analysis. A bit expensive too... *)|A_py_instance{addr_kind=A_py_c_class_}->Universal.Heap.Policies.mk_addr_stack_rangeak|A_py_instance{addr_kind=A_py_class(C_builtin"int",_)}|A_py_instance{addr_kind=A_py_class(C_builtin"float",_)}|A_py_instance{addr_kind=A_py_class(C_builtin"str",_)}|A_py_instance{addr_kind=A_py_class(C_builtin"bytes",_)}->Universal.Heap.Policies.mk_addr_stack_rangeak|_->defaultak)