123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355(*s: lib_parsing_php.ml *)(*s: Facebook copyright *)(* Yoann Padioleau
*
* Copyright (C) 2009-2011 Facebook
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation, with the
* special exception on linking described in file license.txt.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
* license.txt for more details.
*)(*e: Facebook copyright *)openCommon(*s: basic pfff module open and aliases *)openCst_phpmoduleAst=Cst_phpmoduleFlag=Flag_parsing(*e: basic pfff module open and aliases *)moduleV=Visitor_phpmoduleV2=Map_phpmodulePI=Parse_info(*****************************************************************************)(* Wrappers *)(*****************************************************************************)letpr2,_pr2_once=Common2.mk_pr2_wrappersFlag.verbose_parsing(*****************************************************************************)(* Filenames *)(*****************************************************************************)letis_php_scriptfile=Common.with_open_infilefile(funchan->tryletl=input_linechaninl=~"#!/usr/.*/php"||l=~"#!/bin/env php"||l=~"#!/usr/bin/env php"withEnd_of_file->false)letis_php_filenamefilename=(filename=~".*\\.php$")||(filename=~".*\\.phpt$")||(* hotcrp uses this extension *)(filename=~".*\\.inc")||(* hack uses this extension *)(* todo: can not include those files for now because
* they conflict with pfff/data/php_stdlib and generate lots
* of DUPE in codegraph
*
* (filename =~ ".*\\.hhi")
*)falseletis_hhi_filenamefilename=(filename=~".*\\.hhi$")||falseletis_php_filename_pharfilename=(filename=~".*\\.phar$")||falseletis_php_filefilename=not(is_php_filename_pharfilename)&&(is_php_filenamefilename||is_php_scriptfilename)(*
* In command line tools like git or mercurial, many operations works
* when a file, a set of files, or even dirs are passed as parameters.
* We want the same with pfff, hence this small helper function that
* transform such files_or_dirs into a flag set of filenames.
*)letfind_source_files_of_dir_or_files?(verbose=false)?(include_hack=false)xs=Common.files_of_dir_or_files_no_vcs_nofilterxs|>List.filter(funfilename->(* note: there was a possible race here because between the time we
* do the 'find' and the time we call is_php_file(), the file may have
* disappeared (this happens for instance because of watchman).
* Hence the Sys.file_exists guard.
*)letvalid=(* note that there is still a race between the call to file_exists
* and is_php_file, but this one is far shorter :)
*)Sys.file_existsfilename&&(is_php_filefilename||(include_hack&&is_hhi_filenamefilename))inifnotvalid&&verbosethenpr2("not analyzing: "^filename);valid)|>Common.sort(*****************************************************************************)(* Extract infos *)(*****************************************************************************)(*s: extract infos *)letextract_info_visitorrecursor=letglobals=ref[]inlethooks={V.default_visitorwithV.kinfo=(fun(_k,_)i->(* most of the time when you use ii_of_any, you want to use
* functions like max_min_pos which works only on origin tokens
* hence the filtering done here.
*
* ugly: For PHP we use a fakeInfo only for generating a fake left
* brace for abstract methods.
*)matchi.Parse_info.tokenwith|Parse_info.OriginTok_->Common.pushiglobals|_->())}inbeginletvout=V.mk_visitorhooksinrecursorvout;List.rev!globalsend(*x: extract infos *)letii_of_anyany=extract_info_visitor(funvisitor->visitorany)(*e: extract infos *)(*****************************************************************************)(* Abstract position *)(*****************************************************************************)(*s: abstract infos *)letabstract_position_visitorrecursor=lethooks={V2.default_visitorwithV2.kinfo=(fun(_k,_)i->{iwithParse_info.token=Parse_info.Ab})}inbeginletvout=V2.mk_visitorhooksinrecursorvout;end(*x: abstract infos *)letabstract_position_info_anyx=abstract_position_visitor(funvisitor->visitor.V2.vanyx)(*e: abstract infos *)(*****************************************************************************)(* Max min, range *)(*****************************************************************************)(*s: max min range *)(*x: max min range *)let(range_of_origin_ii:Cst_php.toklist->(int*int)option)=funii->letii=List.filterParse_info.is_origintokiiintrylet(min,max)=Parse_info.min_max_ii_by_posiiinassert(PI.is_origintokmax);assert(PI.is_origintokmin);letstrmax=PI.str_of_infomaxinSome(PI.pos_of_infomin,PI.pos_of_infomax+String.lengthstrmax)with_->None(*e: max min range *)(*****************************************************************************)(* Ast getters *)(*****************************************************************************)(*s: ast getters *)letget_funcalls_anyany=leth=Hashtbl.create101inlethooks={V.default_visitorwith(* TODO if nested function ??? still wants to report ? *)V.kexpr=(fun(k,_vx)x->matchxwith|Call(Idcallname,_args)->letstr=Cst_php.str_of_namecallnameinHashtbl.replacehstrtrue;kx|_->kx);}inletvisitor=V.mk_visitorhooksinvisitorany;Common.hashset_to_listh(*x: ast getters *)(*x: ast getters *)letget_constant_strings_anyany=leth=Hashtbl.create101inlethooks={V.default_visitorwithV.kconstant=(fun(k,_vx)x->matchxwith|String(str,_ii)->Hashtbl.replacehstrtrue;|_->kx);V.kencaps=(fun(k,_vx)x->matchxwith|EncapsString(str,_ii)->Hashtbl.replacehstrtrue;|_->kx);}in(V.mk_visitorhooks)any;Common.hashset_to_listh(*e: ast getters *)letget_static_vars_anyany=any|>V.do_visit_with_ref(funaref->{V.default_visitorwithV.kstmt=(fun(k,_vx)x->matchxwith|StaticVars(_tok,xs,_tok2)->xs|>Ast.uncomma|>List.iter(fun(dname,_affect_opt)->Common.pushdnamearef);|_->kx);})(* todo? do last_stmt_is_a_return isomorphism ? *)letget_returns_anyany=V.do_visit_with_ref(funaref->{V.default_visitorwithV.kstmt=(fun(k,_vx)x->matchxwith|Return(_tok1,Somee,_tok2)->Common.pushearef|_->kx)})anyletget_vars_anyany=V.do_visit_with_ref(funaref->{V.default_visitorwithV.kexpr=(fun(k,_vx)x->matchxwith|IdVar(dname,_scope)->Common.pushdnamearef(* todo? sure ?? *)|Lambda(l_use,_def)->l_use|>Common.do_option(fun(_tok,xs)->xs|>Ast.unparen|>Ast.uncomma|>List.iter(function|LexicalVar(_is_ref,dname)->Common.pushdnamearef));kx|_->kx);})any(*****************************************************************************)(* Ast adapters *)(*****************************************************************************)lettop_statements_of_programast=ast|>List.map(function|StmtListxs->xs|FinalDef_|NotParsedCorrectly_|ClassDef_|FuncDef_|ConstantDef_|TypeDef_|NamespaceDef_|NamespaceBracketDef_|NamespaceUse_->[])|>List.flatten(* We often do some analysis on "unit" of code like a function,
* a method, or toplevel statements. One can not use the
* 'toplevel' type for that because it contains Class and Interface which
* are too coarse grained; the method granularity is better.
*
* For instance it makes sense to have a CFG for a function, a method,
* or toplevel statements but a CFG for a class does not make sense.
*)letfunctions_methods_or_topstms_of_programprog=letfuncs=ref[]inletmethods=ref[]inlettoplevels=ref[]inletvisitor=V.mk_visitor{V.default_visitorwithV.kfunc_def=(fun(_k,_)def->matchdef.f_typewith|FunctionRegular->Common.pushdeffuncs|MethodRegular|MethodAbstract->Common.pushdefmethods|FunctionLambda->());V.ktop=(fun(k,_)top->matchtopwith|StmtListxs->Common.pushxstoplevels|_->ktop);}invisitor(Programprog);!funcs,!methods,!toplevels(* do some isomorphisms for declaration vs assignement *)letget_vars_assignements_anyrecursor=(* We want to group later assignement by variables, and
* so we want to use function like Common.group_by_xxx
* which requires to have identical key. Each dname occurence
* below has a different location and so we can use dname as
* key, but the name of the variable can be used, hence the use
* of Ast.dname
*)V.do_visit_with_ref(funaref->{V.default_visitorwithV.kstmt=(fun(k,_)x->matchxwith|StaticVars(_tok,xs,_tok2)->xs|>Ast.uncomma|>List.iter(fun(dname,affect_opt)->lets=Ast.str_of_dnamednameinaffect_opt|>Common.do_option(fun(_tok,scalar)->Common.push(s,scalar)aref;););|_->kx);V.kexpr=(fun(k,_vx)x->matchxwith|Assign(lval,_,e)|AssignOp(lval,_,e)->(* the expression itself can contain assignements *)kx;(* for now we handle only simple direct assignement to simple
* variables *)(matchlvalwith|IdVar(dname,_scope)->lets=Ast.str_of_dnamednameinCommon.push(s,e)aref;|_->())(* todo? AssignRef AssignNew ? *)|_->kx);})recursor|>Common.group_assoc_bykey_eff(*e: lib_parsing_php.ml *)