123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153(* Yoann Padioleau
*
* Copyright (C) 2010 Facebook
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation, with the
* special exception on linking described in file license.txt.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
* license.txt for more details.
*)openCommonopenArchi_code(*****************************************************************************)(* Prelude *)(*****************************************************************************)(*
* The "inference" of the architecture category from a filename
* used to be slow. The "parser" used to be a 'match' with a long series
* of '_ when f =~ ...' but it was getting really slow when
* applied on thousands of filenames. Then we provided a fast-path
* for files that do not match any category, but it was still slow
* when most of the files had a category (for instance because
* most of the files in a project are under something like lib/ or intern/).
* Then we used ocamllex and that was fine!
*
* Current stat of -profile on codemap.opt ~/www:
* Archi.source_of_filename : 1.690 sec 112755 count
*)(*****************************************************************************)(* Helpers *)(*****************************************************************************)let(==~)=Common2.(==~)letre_c_yaccfile=Str.regexp"\\(.*\\).tab"(* coupling: don't forget to extend re_auto_generated below too *)letis_auto_generatedfile=let(d,b,e)=Common2.dbe_of_filename_noext_okfileinmatchewith|"ml"->Sys.file_exists(Common2.filename_of_dbe(d,b,"mll"))||Sys.file_exists(Common2.filename_of_dbe(d,b,"mly"))||Sys.file_exists(Common2.filename_of_dbe(d,b,"mlb"))|"mli"->Sys.file_exists(Common2.filename_of_dbe(d,b,"mly"))|"tex"->Sys.file_exists(Common2.filename_of_dbe(d,b^".tex","nw"))|"info"->Sys.file_exists(Common2.filename_of_dbe(d,b,"texi"))(* Makefile.in *)|"in"->Sys.file_exists(Common2.filename_of_dbe(d,b,"am"))|"c"->b=$="y.tab"||Sys.file_exists(Common2.filename_of_dbe(d,b,"y"))||Sys.file_exists(Common2.filename_of_dbe(d,b,"l"))||(* bigloo (hmm but then conflict with s9 that have s9.c and s9.scm *)(* Sys.file_exists (Common2.filename_of_dbe (d,b, "scm")) || *)(ifb==~re_c_yaccfilethenletb'=Common.matched1binSys.file_exists(Common2.filename_of_dbe(d,b',"y"))elsefalse)|_whenb="Makefile"&&e="NOEXT"->Sys.file_exists(Common2.filename_of_dbe(d,b,"am"))||Sys.file_exists(Common2.filename_of_dbe(d,b,"in"))||Sys.file_exists(Common2.filename_of_dbe(d,"Imakefile",""))|_->false(* opti: for some fastpath *)letre_auto_generated=Str.regexp"\\(.*\\.\\(ml\\|mli\\|tex\\|info\\|in\\|c\\)\\)\\|.*Makefile"(*****************************************************************************)(* Filename->archi *)(*****************************************************************************)let_hmemo_categ_dir=Hashtbl.create101(* Why taking the root ? Because if the data are in /tmp/data/soft/... then
* you would get the rule for tmp and data :( should not consider
* directories too far away.
* Why not passing a readable path then? Because most of the functions
* in common expect full path, and also because I use file operations
* like Sys.file_exists in is_auto_generated() which is used by this
* function.
*)letsource_archi_of_filename3~rootfile=letbase=Filename.basenamefileinletf=Common.readable~rootfileinifbase==~re_auto_generated&&is_auto_generatedfilethenAutoGeneratedelseletb="/"^String.lowercase_asciibase^"/"in(* we try to give the most specialized category by first considering
* the extension of the file, then its basename, and then its
* directory component starting from the last one (hence the List.rev)
*)letlexbuf=Lexing.from_stringbinletcateg1=Archi_code_lexer.categorylexbufinletd=Filename.dirnamefin(* try the directory, caching the result.
*
* note: should perhaps put (root, d) as the key for the memoized call
* because when we start from a nested dir and go up,
* the root has changed and so what was considered Regular
* could not be considered Intern. But then
* when we click to go down, we can't reuse the cached
* archi and the color may actually change which can be confusing.
*
*)letcateg2=Common.memoized_hmemo_categ_dird(fun()->letd=String.lowercase_asciidinletxs=Common.split"/"dinletxs=List.revxsinletstr="/"^Common.join"/"xs^"/"inletlexbuf=Lexing.from_stringstrinArchi_code_lexer.categorylexbuf)in(matchcateg1,categ2with|_,(Data|AutoGenerated|ThirdParty|Ffi|Legacy)->categ2|Regular,_x->categ2|_,_->categ1)letsource_archi_of_filename~rootf=Common.profile_code"Archi.source_of_filename"(fun()->source_archi_of_filename3~rootf)