123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725(* Yoann Padioleau
*
* Copyright (C) 2019 Yoann Padioleau
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation, with the
* special exception on linking described in file license.txt.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
* license.txt for more details.
*)openCommonmoduleE=Entity_codemoduleG=Graph_codemodulePI=Parse_infoopenAst_jsmoduleAst=Ast_js(*****************************************************************************)(* Prelude *)(*****************************************************************************)(* Graph of dependencies for Javascript. See graph_code.ml
* and main_codegraph.ml for more information.
*
* schema:
* Root -> Dir -> File -> Function
* -> Class (and Obj)
* -> TODO Field
* -> Const
* -> Global TODO need type to know if not a func|class?
* -> Dir -> SubDir -> ...
*
* todo:
* - look at the deadcode detected by codegraph on the graph built
* by this file; they usually contains FP indicating bugs in this file
* - too many stuff
*)(*****************************************************************************)(* Types *)(*****************************************************************************)(* for the extract_uses visitor *)typeenv={g:Graph_code.graph;phase:phase;current:Graph_code.node;file_readable:Common.filename;root:Common.dirname;(* to find node_modules/ *)(* imports of external entities; also abused to create
* fake imports of the entities defined in the current file *)imports:(string,qualified_name(* orig name *))Hashtbl.t;(* 'locals' covers also the parameters; I handle block scope by not using
* a ref of mutable here! Just build a new list and passed it down.
*)locals:stringlist;(* 'var' has a function scope.
* alt: lift var up in a ast_js_build.ml transforming phase
*)vars:(string,bool)Hashtbl.t;(* less: use it? could use to check if the import match the exports *)exports:(Common.filename,stringlist)Hashtbl.t;(* error reporting *)dupes:(Graph_code.node,bool)Hashtbl.t;(* this is for the abstract interpreter *)db:(Ast_js.qualified_name,Ast_js.var)Hashtbl.t;asts:(Common.filename(* readable *)*Ast_js.program(* resolved*))listref;log:string->unit;pr2_and_log:string->unit;lookup_fail:env->Graph_code.node->Parse_info.t->unit;}andphase=Defs|Uses(* useful to evaluate the amount of constructs not yet handled *)leterror_recovery=reftrue(*****************************************************************************)(* Parsing *)(*****************************************************************************)(* because we use a 2 passes process (should do like in PHP all in 1 pass?) *)let_hmemo=Hashtbl.create101letparsefile=Common.memoized_hmemofile(fun()->tryletcst=Parse_js.parse_programfilein(* far easier representation to work on than the CST *)Ast_js_build.programcstwith|Timeout->raiseTimeout|Ast_js_build.TodoConstruct(s,tok)|Ast_js_build.UnhandledConstruct(s,tok)->pr2s;pr2(Parse_info.error_message_infotok);if!error_recoverythen[]elsefailwiths|exn->pr2(spf"PARSE ERROR with %s, exn = %s"file(Common.exn_to_sexn));if!error_recoverythen[]elseraiseexn)(*****************************************************************************)(* Helpers *)(*****************************************************************************)leterrorstok=leterr=spf"%s: %s"(Parse_info.string_of_infotok)sinfailwitherrlets_of_nn=Ast.str_of_namenletpos_of_toktokfile={(Parse_info.token_location_of_infotok)withPI.file}letis_undefined_ok(src,_kindsrc)(dst,_kinddst)=src=~"^node_modules/.*"||(*dst =~ "^NOTFOUND-.*" || *)(* r2c: too specific? *)dst=~"^src/images/"(*****************************************************************************)(* Qualified Name *)(*****************************************************************************)letmk_qualified_namereadables=assert(not(readable=~"^\\./"));letstr=tryFilename.chop_extensionreadablewithInvalid_argument_->failwith(spf"readable filename without any extension: %s"readable)instr^"."^sletqualified_nameenvname=(lets=s_of_nnameinifHashtbl.memenv.importssthenHashtbl.findenv.importsselses)|>(funs->assert(not(s=~"^\\./"));s)(*****************************************************************************)(* Other helpers *)(*****************************************************************************)letis_localenvn=lets=s_of_nninList.memsenv.locals||Hashtbl.memenv.varssletadd_localsenvvs=letlocals=vs|>Common.map_filter(funv->lets=s_of_nv.v_nameinmatchv.v_kindwith|Let|Const->Somes|Var->Hashtbl.replaceenv.varsstrue;None)in{envwithlocals=locals@env.locals}letkind_of_exprv_kinde=matchewith|Fun_->E.Function|Class_->E.Class|Obj_->E.Class|_->(* without types, this might be wrong; a constant might
* actually refer to a function, and a global to an object
*)ifv_kind=ConstthenE.ConstantelseE.Global(*****************************************************************************)(* Add Node *)(*****************************************************************************)letadd_node_and_edge_if_defs_modeenv(name,kind)=letstr=s_of_nnameinletstr'=matchenv.currentwith|(readable,E.File)->mk_qualified_namereadablestr|(s,_)->s^"."^strinletnode=(str',kind)inifenv.phase=Defsthenbeginmatch()with(* if parent is a dupe, then don't want to attach yourself to the
* original parent, mark this child as a dupe too.
*)|_whenHashtbl.memenv.dupesenv.current->Hashtbl.replaceenv.dupesnodetrue(* already there? a dupe? *)|_whenG.has_nodenodeenv.g->env.pr2_and_log(spf"DUPE entity: %s"(G.string_of_nodenode));letorig_file=G.file_of_nodenodeenv.ginenv.log(spf" orig = %s"orig_file);env.log(spf" dupe = %s"env.file_readable);Hashtbl.replaceenv.dupesnodetrue;(* ok not a dupe, let's add it then *)|_->(* try but should never happen, see comment below *)tryletpos=pos_of_tok(sndname)env.file_readableinletnodeinfo={Graph_code.pos;typ=None;props=[];}inenv.g|>G.add_nodenode;env.g|>G.add_edge(env.current,node)G.Has;env.g|>G.add_nodeinfonodenodeinfo;(* this should never happen, but it's better to give a good err msg *)withNot_found->error("Not_found:"^str)(sndname)end;ifHashtbl.memenv.dupesnodethenenvelse{envwithcurrent=node}(*****************************************************************************)(* Add edges *)(*****************************************************************************)letadd_use_edgeenv(name,kind)=lets=qualified_nameenvnameinletsrc=env.currentinletdst=(s,kind)inletloc=sndnameinmatch()with|_whenHashtbl.memenv.dupessrc||Hashtbl.memenv.dupesdst->(* todo: stats *)env.pr2_and_log(spf"skipping edge (%s -> %s), one of it is a dupe"(G.string_of_nodesrc)(G.string_of_nodedst));|_whennot(G.has_nodesrcenv.g)->error(spf"SRC FAIL: %s (-> %s)"(G.string_of_nodesrc)(G.string_of_nodedst))loc(* the normal case *)|_whenG.has_nodedstenv.g->G.add_edge(src,dst)G.Useenv.g;(* error *)|_->env.lookup_failenvdstlocletadd_use_edge_candidatesenv(name,kind)scope=letkind=lets=qualified_nameenvnameinletdst=(s,kind)inifG.has_nodedstenv.gthenkindelseletcandidates=[E.Function;E.Class;E.Constant;E.Global]inletvalids=candidates|>List.filter(funk->G.has_node(s,k)env.g)in(matchvalidswith|[x]->x|_->kind(* default to first kind, but could report error *))inadd_use_edgeenv(name,kind);lets=qualified_nameenvnameinscope:=Globals;()(*****************************************************************************)(* Defs/Uses *)(*****************************************************************************)letrecextract_defs_usesenvast=ifenv.phase=Defsthenbeginletdir=Common2.dirnameenv.file_readableinG.create_intermediate_directories_if_not_presentenv.gdir;letnode=(env.file_readable,E.File)inenv.g|>G.add_nodenode;env.g|>G.add_edge((dir,E.Dir),node)G.Has;end;letenv={envwithcurrent=(env.file_readable,E.File);}intoplevels_entities_adjust_importsenvast;toplevelsenvast(* The order of toplevel declarations do not matter in Javascript.
* It is a dynamic language without static checking; if in the body of
* a function you use an entity declared further, nothing will complain
* about it.
* So we need to first extract all those toplevel entities and
* create an import for them to not get lookup failures otherwise
* on those forward uses.
*)andtoplevels_entities_adjust_importsenvxs=xs|>List.iter(function|M_|S_->()|Vv->letstr=s_of_nv.v_nameinHashtbl.replaceenv.importsstr(mk_qualified_nameenv.file_readablestr);)(* ---------------------------------------------------------------------- *)(* Toplevels *)(* ---------------------------------------------------------------------- *)andtoplevelenvx=matchxwith|V{v_name;v_kind;v_init;v_resolved}->name_exprenvv_namev_kindv_initv_resolved|S(tok,st)->letkind=E.TopStmtsinlets=spf"__top__%d:%d"(Parse_info.line_of_infotok)(Parse_info.col_of_infotok)inletname=s,tokinletenv=add_node_and_edge_if_defs_modeenv(name,kind)inifenv.phase=Usesthenstmtenvst|Mx->module_directiveenvxandmodule_directiveenvx=matchxwith|Import(name1,name2,(file,tok))->ifenv.phase=Usesthenbeginletstr1=s_of_nname1inletstr2=s_of_nname2inletpath_opt=Module_path_js.resolve_path~root:env.root~pwd:(Filename.dirnameenv.file_readable)fileinletreadable=matchpath_optwith|None->env.pr2_and_log(spf"could not resolve path %s at %s"file(Parse_info.string_of_infotok));spf"NOTFOUND-|%s|.js"file|Somefullpath->Common.readableenv.rootfullpathinHashtbl.replaceenv.importsstr2(mk_qualified_namereadablestr1)end|Export(name)->ifenv.phase=Defsthenbeginletexports=tryHashtbl.findenv.exportsenv.file_readablewithNot_found->[]inletstr=s_of_nnameinHashtbl.replaceenv.exportsenv.file_readable(str::exports)end|ModuleAlias(name,_fileTODO)->(* for now just add name as a local; anyway we do not
* generate dependencies for fields yet
*)lets=s_of_nnameinHashtbl.replaceenv.varsstrue;|ImportCss(_file)->()|ImportEffect(_file)->()andtoplevelsenvxs=List.iter(toplevelenv)xsandname_exprenvnamev_kindev_resolved=letkind=kind_of_exprv_kindeinletenv=add_node_and_edge_if_defs_modeenv(name,kind)inifenv.phase=Usesthenbeginexprenve;let(qualified,_kind)=env.currentinv_resolved:=Globalqualified;Hashtbl.addenv.dbqualified{v_name=name;v_kind;v_init=e;v_resolved}end(* ---------------------------------------------------------------------- *)(* Statements *)(* ---------------------------------------------------------------------- *)andstmtenv=function|VarDeclv->letenv=add_localsenv[v]inexprenvv.v_init|Blockxs->stmtsenvxs|ExprStmte->exprenve|If(e,st1,st2)->exprenve;stmtenvst1;stmtenvst2|Do(st,e)->stmtenvst;exprenve;|While(e,st)->exprenve;stmtenvst|For(header,st)->letenv=for_headerenvheaderinstmtenvst|Switch(e,xs)->exprenve;casesenvxs|Continuelopt->Common.opt(labelenv)lopt|Breaklopt->Common.opt(labelenv)lopt|Returne->exprenve|Label(l,st)->labelenvl;stmtenvst|Throwe->exprenve|Try(st1,catchopt,finalopt)->stmtenvst1;catchopt|>Common.opt(fun(n,st)->letv={v_name=n;v_kind=Let;v_init=Nop;v_resolved=refLocal}inletenv=add_localsenv[v]instmtenvst);finalopt|>Common.opt(fun(st)->stmtenvst);andfor_headerenv=function|ForClassic(e1,e2,e3)->letenv=matche1with|Leftvars->(* less: need fold_with_env? *)vars|>List.iter(funv->stmtenv(VarDeclv));add_localsenvvars|Righte->exprenve;envinexprenve2;exprenve3;env|ForIn(e1,e2)->letenv=matche1with|Leftvar->(* less: need fold_with_env? *)[var]|>List.iter(funv->stmtenv(VarDeclv));add_localsenv[var]|Righte->exprenve;envinexprenve2;env(* less: could check def/use of lbls, but less important *)andlabel_env_lbl=()andcasesenvxs=List.iter(caseenv)xsandcaseenv=function|Case(e,st)->exprenve;stmtenvst|Defaultst->stmtenvstandstmtsenvxs=letrecauxenv=function|[]->()|x::xs->stmtenvx;letenv=matchxwith|VarDeclv->add_localsenv[v]|_->envinauxenvxsinauxenvxs(* ---------------------------------------------------------------------- *)(* Expessions *)(* ---------------------------------------------------------------------- *)andexprenve=matchewith|Bool_|Num_|String_|Regexp_->()|Id(n,scope)->ifnot(is_localenvn)then(* the big one! *)add_use_edge_candidatesenv(n,E.Global)scope;|IdSpecial_->()|Nop->()|Assign(e1,e2)->exprenve1;exprenve2|Objo->obj_envo|Arrxs->List.iter(exprenv)xs|Class(c,nopt)->letenv=matchnoptwith|None->env|Somen->letv={v_name=n;v_kind=Let;v_init=Nop;(* TODO *)v_resolved=refLocal}inadd_localsenv[v]inclass_envc|ObjAccess(e,prop)->(matchewith|Id(n,scope)whennot(is_localenvn)->add_use_edge_candidatesenv(n,E.Class)scope|_->exprenve);property_nameenvprop|ArrAccess(e1,e2)->(matche1with|Id(n,scope)whennot(is_localenvn)->add_use_edge_candidatesenv(n,E.Class)scope|_->exprenve1);exprenve2|Fun(f,nopt)->letenv=matchnoptwith|None->env|Somen->letv={v_name=n;v_kind=Let;v_init=Nop;(* TODO *)v_resolved=refLocal}inadd_localsenv[v]infun_envf|Apply(e,es)->(matchewith|Id(n,scope)whennot(is_localenvn)->add_use_edge_candidatesenv(n,E.Function)scope|IdSpecial(special,_tok)->(matchspecial,eswith|New,_->(* TODO *)()|_->())|_->exprenve);List.iter(exprenv)es|Conditional(e1,e2,e3)->List.iter(exprenv)[e1;e2;e3]|Ellipses_->()(* ---------------------------------------------------------------------- *)(* Entities *)(* ---------------------------------------------------------------------- *)(* todo: create nodes if global var? *)andobj_envxs=List.iter(propertyenv)xsandclass_envc=Common.opt(exprenv)c.c_extends;List.iter(propertyenv)c.c_bodyandpropertyenv=function|Field(pname,_props,e)->property_nameenvpname;exprenve|FieldSpreade->exprenveandproperty_nameenv=function|PN_n2->()|PN_Computede->exprenveandfun_envf=(* less: need fold_with_env here? can not use previous param in p_default? *)parametersenvf.f_params;letparams=f.f_params|>List.map(funp->s_of_np.p_name)inletenv={envwithlocals=params@env.locals;(* new scope, but still inherits enclosing vars *)vars=Hashtbl.copyenv.vars;}instmtenvf.f_bodyandparametersenvxs=List.iter(parameterenv)xsandparameterenvp=Common.opt(exprenv)p.p_default(*****************************************************************************)(* Main entry point *)(*****************************************************************************)letbuild_gen?(verbose=false)rootfiles=letg=G.create()inG.create_initial_hierarchyg;(* use Hashtbl.find_all property *)lethstat_lookup_failures=Hashtbl.create101inletchan=open_out(Filename.concatroot"pfff.log")inletenv={g;phase=Defs;current=G.pb;file_readable="__filled_later__";root;imports=Hashtbl.create0;locals=[];vars=Hashtbl.create0;exports=Hashtbl.create101;dupes=Hashtbl.create101;db=Hashtbl.create101;asts=ref[];log=(funs->output_stringchan(s^"\n");flushchan;);pr2_and_log=(funs->ifverbosethenpr2s;output_stringchan(s^"\n");flushchan;);lookup_fail=(funenvdstloc->letsrc=env.currentinletfprinter=ifnotverbose||is_undefined_oksrcdstthenenv.logelseenv.pr2_and_loginfprinter(spf"PB: lookup_fail on %s (in %s, at %s)"(G.string_of_nodedst)(G.string_of_nodesrc)(Parse_info.string_of_infoloc));(* note: could also use Hashtbl.replace to count entities only once *)Hashtbl.addhstat_lookup_failuresdsttrue;);}in(* step1: creating the nodes and 'Has' edges, the defs *)env.pr2_and_log"\nstep1: extract defs";(Stdlib_js.path_stdlib::files)|>Console.progress~show:verbose(funk->List.iter(funfile->k();letast=parsefileinletfile_readable=iffile=Stdlib_js.path_stdlibthen"Stdlib.js"elseCommon.readable~rootfileinextract_defs_uses{envwithphase=Defs;file_readable;imports=Hashtbl.create13;}ast));(* step2: creating the 'Use' edges *)letdefault_import=letast=parseStdlib_js.path_stdlibinletenv={envwithphase=Uses;file_readable="Stdlib.js";locals=[];imports=Hashtbl.create13;}intoplevels_entities_adjust_importsenvast;env.importsinenv.pr2_and_log"\nstep2: extract uses";files|>Console.progress~show:verbose(funk->List.iter(funfile->k();letast=parsefileinletfile_readable=Common.readable~rootfileinextract_defs_uses{envwithphase=Uses;file_readable;locals=[];imports=Hashtbl.copydefault_import;}ast;Common.push(file_readable,ast)env.asts;));env.pr2_and_log"\nstep3: adjusting";G.remove_empty_nodesg[G.not_found;G.dupe;G.pb];(* less: lookup failures summary *)letxs=Common2.hkeyshstat_lookup_failuresinletcounts=xs|>List.map(fun(x)->G.string_of_nodex,List.length(Hashtbl.find_allhstat_lookup_failuresx))|>Common.sort_by_val_highfirst|>Common.take_safe20inpr2"Top lookup failures per modules";counts|>List.iter(fun(s,n)->pr2(spf"%-30s = %d"sn));(* finally return the graph *)g,env.db,!(env.asts)letbuild?verboserootfiles=let(g,_,_)=build_gen?verboserootfilesing(*****************************************************************************)(* For abstract interpreter *)(*****************************************************************************)(* todo: actually probably better to generate the code database on demand
* while abstract-interpreting the code and its import/require so we
* can actually even handle some dynamic imports.
*)letbuild_for_airootfiles=let(_,db,asts)=build_gen~verbose:falserootfilesindb,asts