123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237(* Yoann Padioleau
*
* Copyright (C) 2012 Facebook
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation, with the
* special exception on linking described in file license.txt.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
* license.txt for more details.
*)openCommonmoduleFlag=Flag_parsingmoduleE=Entity_codemoduleG=Graph_codeopenCst_mlmoduleV=Visitor_ml(*****************************************************************************)(* Prelude *)(*****************************************************************************)(*
* Obsolete file: see graph_code_cmt.ml for a more complete graph.
*
* Partial graph of dependencies for OCaml using essentially just the
* open directives.
*
* todo? if give edges a weight, then we need to modulate it depending on
* the type of the reference. Two references to a function in another
* module is more important than 10 references to some constructors?
* If we do some pattern matching on 20 constructors, is it more
* important than two functions calls?
* So Type|Exception > Function|Class|Global >> Constructors|constants ?
*
* notes:
* - ml vs mli? just get rid of mli? but one can also want to
* care only about mli dependencies, like I did with my 'make doti'.
* We can introduce a Module entity that is the parent of the
* ml and mli file (this has-graph unify many things :) ).
*
* TODO but there is still the issue about where to put the edge
* when one module call a function in another module. Do we
* link the call to the def in the mli or in the ml?
*
* schema:
* Root -> Dir -> Module -> File (.ml) -> # TODO
*
* -> File (.mli)
* -> Dir -> File # no intermediate Module node when there is a dupe
* # on a module name (e.g. for main.ml)
*
* -> Dir -> SubDir -> Module -> ...
*)(*****************************************************************************)(* Types *)(*****************************************************************************)(*****************************************************************************)(* Helpers *)(*****************************************************************************)letparsefile=Common.save_excursionFlag.show_parsing_errorfalse(fun()->Common.save_excursionFlag.exn_when_lexical_errortrue(fun()->tryParse_ml.parse_programfilewithParse_info.Parsing_error_->pr2("PARSING problem in: "^file);[]))(* todo: move this code in another file? module_analysis_ml.ml ? *)letlookup_module_nameh_module_aliasess=tryHashtbl.findh_module_aliasesswithNot_found->s(*****************************************************************************)(* Defs *)(*****************************************************************************)(*
* We just create the Dir, File, and Module entities.
* See graph_code_cmt.ml if you want Function, Type, etc.
*)letextract_defs~g~duplicate_modules~ast~readable~file=ignore(ast);letdir=Common2.dirnamereadableinG.create_intermediate_directories_if_not_presentgdir;letm=Module_ml.module_name_of_filenamefileing|>G.add_node(readable,E.File);match()with|_whenList.memm(Common2.keysduplicate_modules)->(* we could attach to two parents when we are almost sure that
* nobody will reference this module (e.g. because it's an
* entry point), but then all the uses in those files would
* propagate to two parents, so when we have a dupe, we
* don't create the intermediate Module node. If it's referenced
* somewhere then it will generate a lookup failure.
* So just Dir -> File here, no Dir -> Module -> File.
*)g|>G.add_edge((dir,E.Dir),(readable,E.File))G.Has;(matchmwith|swhens=~"Main.*"||s=~"Demo.*"||s=~"Test.*"||s=~"Foo.*"->()|_whenfile=~".*external/"->()|_->pr2(spf"PB: module %s is already present (%s)"m(Common.dump(List.assocmduplicate_modules)));)|_whenG.has_node(m,E.Module)g->(matchG.parents(m,E.Module)gwith(* probably because processed .mli or .ml before which created the node *)|[p]whenp=*=(dir,E.Dir)->g|>G.add_edge((m,E.Module),(readable,E.File))G.Has|x->pr2"multiple parents or no parents or wrong dir";pr2_gen(x,dir,m);raiseImpossible)|_->(* Dir -> Module -> File *)g|>G.add_node(m,E.Module);g|>G.add_edge((dir,E.Dir),(m,E.Module))G.Has;g|>G.add_edge((m,E.Module),(readable,E.File))G.Has(*****************************************************************************)(* Uses *)(*****************************************************************************)letextract_uses~g~ast~readable~dupes=letsrc=(readable,E.File)in(* when do module A = Foo, A.foo is actually a reference to Foo.foo *)leth_module_aliases=Hashtbl.create101inletadd_edge_if_existing_modules=lets=lookup_module_nameh_module_aliasessinlettarget=(s,E.Module)inifG.has_nodetargetgtheng|>G.add_edge(src,target)G.Useelsebeging|>G.add_nodetarget;letparent_target=ifList.memsdupesthenG.dupeelseG.not_founding|>G.add_edge(parent_target,target)G.Has;g|>G.add_edge(src,target)G.Use;pr2(spf"PB: lookup fail on module %s in %s"(fsttarget)readable)endinletvisitor=V.mk_visitor{V.default_visitorwith(* todo? does it cover all use cases of modules ? maybe need
* to introduce a kmodule_name_ref helper in the visitor
* that does that for us.
* todo: if want to give more information on edges, need
* to intercept the module name reference at a upper level
* like in FunCallSimple. C-s for long_name in ast_ml.ml
*)V.kitem=(fun(k,_)x->(matchxwith|Open(_tok,(_qu,(Name(s,_))))->add_edge_if_existing_modules|Module(_,Name(s,_),_,(ModuleName([],Name(s2,__))))->Hashtbl.addh_module_aliasesss2;|_->());kx);V.kqualifier=(fun(k,_)qu->(matchquwith|[]->()|(Name(s,_),_tok)::_rest->add_edge_if_existing_modules);kqu);}invisitor(Programast);()(*****************************************************************************)(* Main entry point *)(*****************************************************************************)letbuild?(verbose=true)rootfiles=letg=G.create()inG.create_initial_hierarchyg;letduplicate_modules=files|>Common.group_by_mapped_key(funf->Common2.basenamef)|>List.filter(fun(_k,xs)->List.lengthxs>=2)|>List.map(fun(k,xs)->Module_ml.module_name_of_filenamek,xs)in(* step1: creating the nodes and 'Has' edges, the defs *)ifverbosethenpr2"\nstep1: extract defs";files|>Console.progress~show:verbose(funk->List.iter(funfile->k();letreadable=Common.readable~rootfileinletast=(* parse file *)()inextract_defs~g~duplicate_modules~ast~readable~file;));(* step2: creating the 'Use' edges, the uses *)ifverbosethenpr2"\nstep2: extract uses";files|>Console.progress~show:verbose(funk->List.iter(funfile->k();letreadable=Common.readable~rootfilein(* skip files under external/ for now *)ifreadable=~".*external/"||readable=~"web/.*"then()elsebeginletast=parsefileinextract_uses~g~ast~readable~dupes:(List.mapfstduplicate_modules);end));g