123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710(* Yoann Padioleau
*
* Copyright (C) 2010, 2013 Facebook
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation, with the
* special exception on linking described in file license.txt.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
* license.txt for more details.
*)openCommonopenCst_mlmoduleAst=Cst_mlopenHighlight_codemoduleV=Visitor_mlmodulePI=Parse_infomoduleT=Parser_mlopenEntity_codemoduleE=Entity_code(*****************************************************************************)(* Prelude *)(*****************************************************************************)(* Syntax highlighting for OCaml code for codemap (and now also for efuns).
*
* This code can also be abused to generate the light database
* and the TAGS file (because codemap needs to know about
* def and use of entities), but you should now prefer to
* base such analysis on graph_code_cmt.ml instead of this file.
*)(*****************************************************************************)(* Helpers when have global analysis information *)(*****************************************************************************)(* pad-specific: see my ~/.emacs *)leth_pervasives_pad=Common.hashset_of_list["pr2";"pr";"pr2_gen";"sprintf";"i_to_s";"pp2";"spf";"log";"log2";"log3"]leth_builtin_modules=Common.hashset_of_list["Pervasives";"Common";"List";"Hashtbl";"Array";"Stack";"String";"Bytes";"Str";"Sys";"Unix";"Gc";"Filename";]leth_builtin_bool=Common.hashset_of_list["not";"exists";"forall";]letfake_no_def2=NoUseletfake_no_use2=(NoInfoPlace,UniqueDef,MultiUse)(* set to true when want to debug the AST based tagger *)letdisable_token_phase2=false(*****************************************************************************)(* AST helpers *)(*****************************************************************************)letkind_of_bodyx=letdef2=Def2fake_no_def2inmatchAst.uncommaxwith|(Ast.Fun_|Ast.Function_)::_xs->Entity(Function,def2)|Ast.FunCallSimple(([],Name("ref",_)),_)::_xs->Entity(Global,def2)|Ast.FunCallSimple(([Name("Hashtbl",_),_],Name("create",_)),_)::_xs->Entity(Global,def2)|_->Entity(Constant,def2)(* todo: actually it can be a typedef alias to a function too
* but this would require some analysis
*)letkind_of_tyty=letdef2=Def2fake_no_def2inmatchtywith|TyFunction_->(FunctionDeclNoUse)|TyApp(_,([],Name("ref",_)))->Entity(Global,def2)(* todo: should handle module aliases there too *)|TyApp(_,([Name("Hashtbl",_),_],Name("t",_)))->Entity(Global,def2)|_->Entity(Constant,def2)(*****************************************************************************)(* Code highlighter *)(*****************************************************************************)(* The idea of the code below is to visit the program either through its
* AST or its list of tokens. The tokens are easier for tagging keywords,
* number and basic entities. The AST is better for tagging idents
* to figure out what kind of ident it is.
*)letvisit_program?(lexer_based_tagger=false)~tag_hook_prefs(*db_opt *)(ast,toks)=letalready_tagged=Hashtbl.create101inlettag=(funiicateg->tag_hookiicateg;Hashtbl.addalready_taggediitrue)in(* -------------------------------------------------------------------- *)(* AST phase 1 *)(* -------------------------------------------------------------------- *)(* try to better colorize identifiers which can be many different things
* e.g. a field, a type, a function, a parameter, etc
*)letin_let=reffalseinletin_try_with=reffalseinletv=V.mk_visitor{V.default_visitorwithV.kitem=(fun(k,_)x->(matchxwith|Val(_tok,name,_tok2,ty)|External(_tok,name,_tok2,ty,_,_)->letinfo=Ast.info_of_namenameintaginfo(kind_of_tyty)|Ast.Exception(_tok,name,_args)->letinfo=Ast.info_of_namenameintaginfo(Entity(E.Exception,Def2fake_no_def2));|Open(_tok,lname)->letinfo=Ast.info_of_name(Ast.name_of_long_namelname)intaginfo(Entity(Module,Use2fake_no_use2));|Ast.Module(_tok,uname,_tok2,_mod_expr)->letii=Ast.info_of_nameunameintagii(Entity(Module,Def2fake_no_def2));|Let_|Ast.Type_|ItemTodo_->());kx);V.kqualifier=(fun(_k,_bigf)qu->letmodule_infos=Ast.module_infos_of_long_name(qu,())inmodule_infos|>List.iter(funii->tagii(Entity(Module,Use2fake_no_use2))));V.kmodule_expr=(fun(k,_bigf)mod_expr->(matchmod_exprwith|ModuleNamelname->letinfo=Ast.info_of_name(Ast.name_of_long_namelname)intaginfo(Entity(Module,Use2fake_no_use2));|_->());kmod_expr);V.kparameter=(fun(k,_bigf)x->(matchxwith|ParamPat(PatVarname)->letinfo=Ast.info_of_namenameintaginfo(ParameterDef);|_->());kx);V.kargument=(fun(k,_)x->(matchxwith|ArgImplicitTildeExpr(_,name)->letinfo=Ast.info_of_namenamein(* todo: could be a Parameter use, need scope analysis *)taginfo(LocalUse)|_->());kx);V.klet_binding=(fun(k,_bigf)x->matchxwith|LetClassiclet_def->letname=let_def.l_nameinletinfo=Ast.info_of_namenamein(ifnot!in_letthenifList.lengthlet_def.l_params>0thentaginfo(Entity(Function,(Def2NoUse)))elsetaginfo(kind_of_bodylet_def.l_body)elsetaginfo(Local(Def)));Common.save_excursionin_lettrue(fun()->kx)|LetPattern(pat,_tok,body)->(matchpatwith|PatTyped(_,PatVarname,_,_ty,_)->letinfo=Ast.info_of_namenameinifnot!in_letthentaginfo(kind_of_bodybody)elsetaginfo(Local(Def))|_->());Common.save_excursionin_lettrue(fun()->kx));V.kexpr=(fun(k,bigf)x->matchxwith|Llong_name->letinfo=Ast.info_of_name(Ast.name_of_long_namelong_name)in(* could have been tagged as a function name in the rule below *)ifnot(Hashtbl.memalready_taggedinfo)thenbegin(* TODO could be a param, could be a local. Need scope analysis
* TODO could also be actually a func passed to a higher
* order function, as in List.map snd, or even x +> Common.sort
*)taginfo(LocalUse)end;kx|FunCallSimple(long_name,_args)->letname=Ast.name_of_long_namelong_nameinletinfo=Ast.info_of_namenameinletmodule_name=Ast.module_of_long_namelong_nameinletmodule_infos=Ast.module_infos_of_long_namelong_nameinlets=Ast.str_of_namenamein(match()with|_whens="ref"->taginfoUseOfRef|_whenHashtbl.memh_builtin_modulesmodule_name->module_infos|>List.iter(funii->tagiiBuiltinCommentColor);taginfoBuiltin;|_->taginfo(Entity(Function,(Use2fake_no_use2))););kx(* disambiguate "with" which can be used for match, try, or record *)|Match(_match,_e1,tok_with,_match_cases)->tagtok_with(KeywordConditional);kx|Try(try_tok,e,tok_with,match_cases)->tagtok_with(KeywordExn);k(Try(try_tok,e,tok_with,[]));Common.save_excursionin_try_withtrue(fun()->match_cases|>Ast.unpipe|>List.iter(funmatch_case->bigf(MatchCasematch_case)))|FieldAccess(_e,_tok,long_name)|FieldAssign(_e,_tok,long_name,_,_)->letinfo=Ast.info_of_name(Ast.name_of_long_namelong_name)intaginfo(Entity(Field,(Use2fake_no_use2)));kx|ObjAccess(_e,_tok,name)->letinfo=Ast.info_of_namenameintaginfo(Entity(Method,(Use2fake_no_use2)));kx|Constr(long_name,_eopt)->letinfo=Ast.info_of_name(Ast.name_of_long_namelong_name)intaginfo(Entity(Constructor,(Use2fake_no_use2)));kx(* very pad specific ... *)|Infix(_l1,("=~",_),C(Ast.String(_s,tok)))->tagtokRegexp;kx|_->kx);V.kpattern=(fun(k,_)x->(matchxwith|PatConstr(long_name,_popt)->letinfo=Ast.info_of_name(Ast.name_of_long_namelong_name)inif!in_try_withthentaginfo(KeywordExn)elsetaginfo(ConstructorMatchfake_no_use2)|PatVarname->letinfo=Ast.info_of_namenameintaginfo(ParameterDef)|_->());kx);V.kty=(fun(k,_)t->(matchtwith|TyNamelong_name->letinfo=Ast.info_of_name(Ast.name_of_long_namelong_name)intaginfo(Entity(Type,(Use2fake_no_use2)));|TyApp(_ty_args,long_name)->letname=Ast.name_of_long_namelong_nameinletinfo=Ast.info_of_namenamein(* different color for higher-order types *)taginfoTypeVoid;(* todo: ty_args *)|TyVar(_tok,name)->letinfo=Ast.info_of_namenameintaginfoTypeVoid;|TyTuple_|TyTuple2_|TyFunction_|TyTodo->());kt);V.ktype_declaration=(fun(k,_)x->matchxwith|TyDef(_ty_params,name,_tok,type_kind)->letinfo=Ast.info_of_namenameintaginfo(Entity(E.Type,Def2fake_no_def2));(* todo: ty_params *)(matchtype_kindwith|TyAlgebricxs->xs|>Ast.unpipe|>List.iter(fun(name,_args)->letinfo=Ast.info_of_namenameintaginfo(Entity(Constructor,Def2fake_no_def2)));|TyCore_|TyRecord_->());kx|TyAbstract_->kx);V.kfield_decl=(fun(k,_)fld->letinfo=Ast.info_of_namefld.fld_nameintaginfo(Entity(Field,(Def2fake_no_def2)));kfld);V.kfield_expr=(fun(k,_)x->matchxwith|FieldExpr(long_name,_,_)|FieldImplicitExprlong_name->letname=Ast.name_of_long_namelong_nameinletinfo=Ast.info_of_namenameintaginfo(Entity(Field,(Use2fake_no_use2)));kx);V.kfield_pat=(fun(k,_)x->matchxwith|PatField(long_name,_,_)|PatImplicitFieldlong_name->letname=Ast.name_of_long_namelong_nameinletinfo=Ast.info_of_namenameintaginfo(Entity(Field,(Use2fake_no_use2)));kx);}inv(Programast);(* -------------------------------------------------------------------- *)(* toks phase 1 (sequence of tokens) *)(* -------------------------------------------------------------------- *)(* note: all TCommentSpace are filtered in xs so it should be easier to
* write rules (but regular comments are kept as well as newlines).
*)letrecaux_toksxs=matchxswith|[]->()(* pad-specific: *)|T.TComment(ii)::T.TCommentNewline(_ii2)::T.TComment(ii3)::T.TCommentNewline(ii4)::T.TComment(ii5)::xs->lets=PI.str_of_infoiiinlets5=PI.str_of_infoii5in(match()with|_whens=~".*\\*\\*\\*\\*"&&s5=~".*\\*\\*\\*\\*"->tagiiCommentEstet;tagii5CommentEstet;tagii3CommentSection1|_whens=~".*------"&&s5=~".*------"->tagiiCommentEstet;tagii5CommentEstet;tagii3CommentSection2|_whens=~".*####"&&s5=~".*####"->tagiiCommentEstet;tagii5CommentEstet;tagii3CommentSection0|_->());aux_toks(T.TCommentii3::T.TCommentNewlineii4::T.TCommentii5::xs)|T.TComment(ii)::xswhen(PI.str_of_infoii)=~"(\\*[ ]*coupling:"->tagiiCommentImportance3;aux_toksxs(* When we get a parse error, the AST does not contain the definitions, but
* we can still try to tag certain things. Here is a
* poor's man semantic tagger. We try to infer if an ident is a func,
* or class, or module based on the few tokens around.
*
* This may look ridiculous to do such semantic tagging using tokens
* instead of the full AST but many OCaml files could not parse with
* the default parser because of camlp4 extensions so having
* a solid token-based tagger is still useful as a last resort.
* update: with attributes this becomes less true as attributes have
* a far more regular-syntax (that's what they were designed for)
*)|T.Tlet(ii)::T.TLowerIdent(_s,ii3)::T.TEq_ii5::xswhenPI.col_of_infoii=0->ifnot(Hashtbl.memalready_taggedii3)&&lexer_based_taggerthentagii3(Entity(Global,(Def2NoUse)));aux_toksxs;|T.Tlet(ii)::T.TLowerIdent(_s,ii3)::xswhenPI.col_of_infoii=0->ifnot(Hashtbl.memalready_taggedii3)&&lexer_based_taggerthentagii3(Entity(Function,(Def2NoUse)));aux_toksxs;|(T.Tval(ii)|T.Texternal(ii))::T.TLowerIdent(_s,ii3)::xswhenPI.col_of_infoii=0->ifnot(Hashtbl.memalready_taggedii3)&&lexer_based_taggerthentagii3(FunctionDeclNoUse);aux_toksxs;|T.Tlet(ii)::T.Trec(_ii)::T.TLowerIdent(_s,ii3)::xswhenPI.col_of_infoii=0->ifnot(Hashtbl.memalready_taggedii3)&&lexer_based_taggerthentagii3(Entity(Function,(Def2NoUse)));aux_toksxs;|T.Tand(ii)::T.TLowerIdent(_s,ii3)::xswhenPI.col_of_infoii=0->ifnot(Hashtbl.memalready_taggedii3)&&lexer_based_taggerthentagii3(Entity(Function,(Def2NoUse)));aux_toksxs;|T.Ttype(ii)::T.TLowerIdent(_s,ii3)::xswhenPI.col_of_infoii=0->ifnot(Hashtbl.memalready_taggedii3)&&lexer_based_taggerthentagii3(Entity(Type,Def2NoUse));aux_toksxs;(* module defs *)|T.Tmodule(_)::T.TUpperIdent(_,ii_mod)::T.TEq(_)::T.Tstruct(_)::xs->tagii_mod(Entity(Module,Def2fake_no_def2));aux_toksxs|T.Tmodule(_)::T.TUpperIdent(_,ii_mod)::T.TColon(_)::T.Tsig(_)::xs->tagii_mod(Entity(Module,Def2fake_no_def2));aux_toksxs(* bad smell, use of ref *)|T.TBang_ii1::T.TLowerIdent(_s2,ii2)::xs->tagii2(UseOfRef);aux_toksxs|T.TBang_ii1::T.TUpperIdent(_s,ii)::T.TDot_::T.TLowerIdent(_s2,ii2)::xs->tagii(Entity(Module,Use2fake_no_use2));tagii2(UseOfRef);aux_toksxs|T.TLowerIdent(_,ii1)::(T.TAssignii2|T.TAssignMutableii2)::xs->tagii1(UseOfRef);tagii2(UseOfRef);aux_toksxs(* module use, and function call! *)|T.TUpperIdent(_s,ii)::T.TDot_ii2::T.TUpperIdent(_s2,_ii3)::xs->tagii(Entity(Module,Use2fake_no_use2));aux_toksxs;|T.TUpperIdent(s,ii)::T.TDot_ii2::T.TLowerIdent(_s2,ii3)::xs->(* see my .emacs *)ifHashtbl.memh_builtin_modulessthenbegintagiiBuiltinCommentColor;ifnot(Hashtbl.memalready_taggedii3)&&lexer_based_taggerthentagii3Builtin;endelsebegintagii(Entity(Module,Use2fake_no_use2));(* tag ii3 (Function Use); *)end;aux_toksxs;(* labels *)(* can be a def or use, no way to know
| T.TTilde ii1::T.TLowerIdent (_s, ii2)::xs ->
(* TODO when parser, can also have Use *)
tag ii1 (Parameter Def);
tag ii2 (Parameter Def);
aux_toks xs
*)(* grammar rules in ocamlyacc *)|T.TLowerIdent(_s,ii1)::T.TColon_::xswhenPI.col_of_infoii1=0->tagii1GrammarRule;aux_toksxs(* attributes *)|T.TBracketAtAt_::T.TLowerIdent(_,ii1)::T.TDot_::T.TLowerIdent(_,ii2)::xs->tagii1Attribute;tagii2Attribute;aux_toksxs|T.TBracketAtAt_::T.TLowerIdent(_,ii)::xs->tagiiAttribute;aux_toksxs|_x::xs->aux_toksxsinlettoks'=toks|>Common.exclude(function|T.TCommentSpace_->true|_->false)inaux_tokstoks';(* -------------------------------------------------------------------- *)(* toks phase 2 (individual tokens) *)(* -------------------------------------------------------------------- *)ifnotdisable_token_phase2thentoks|>List.iter(funtok->matchtokwith|T.TCommentii->ifnot(Hashtbl.memalready_taggedii)then(* a little bit syncweb specific *)lets=PI.str_of_infoiiin(matchswith(* yep, s e x are the syncweb markers *)|_whens=~"(\\*[sex]:"->tagiiCommentSyncweb(* normally then use of *** or ### or --- should be enough,
* but in some files like ocamlyacc files the preceding
* heuristic fail in which case it's useful to have those
* rules. Moreover ocamldoc use something similar
*)|_whens=~"(\\*1 "->tagiiCommentSection1|_whens=~"(\\*2 "->tagiiCommentSection2|_whens=~"(\\*3 "->tagiiCommentSection3|_->tagiiComment)|T.TCommentMiscii->tagiiCppOther|T.TCommentNewline_ii|T.TCommentSpace_ii->()|T.TUnknownii->tagiiError|T.EOF_ii->()|T.TSharpDirectiveii->tagiiIfdef|T.TString(_s,ii)->(* can have been tagged as a regexp *)ifnot(Hashtbl.memalready_taggedii)thentagiiString|T.TChar(_s,ii)->tagiiString|T.TFloat(_s,ii)|T.TInt(_s,ii)->tagiiNumber|T.Tfalseii|T.Ttrueii->tagiiBoolean|T.Tletii|T.Tinii|T.Tandii|T.Trecii|T.Tvalii|T.Texternalii->tagiiKeyword|T.Tfunii|T.Tfunctionii->tagiiKeyword|T.Ttypeii|T.Tofii->tagiiKeyword|T.Tifii|T.Tthenii|T.Telseii->tagiiKeywordConditional|T.Tmatchii->(* TODO: should also colorize it's with *)tagiiKeywordConditional|T.Twhenii->(* TODO: should also colorize it's with, when parser *)tagiiKeywordConditional|T.Ttryii->tagiiKeywordExn|T.Twithii->ifnot(Hashtbl.memalready_taggedii)thentagiiKeyword|T.Tforii|T.Tdoii|T.Tdoneii|T.Twhileii|T.Ttoii|T.Tdowntoii->tagiiKeywordLoop|T.Tbeginii|T.Tendii->tagiiKeywordLoop(* TODO: better categ ? *)|T.TBangii|T.TAssignii|T.TAssignMutableii->ifnot(Hashtbl.memalready_taggedii)thentagiiUseOfRef|T.TEqii->ifnot(Hashtbl.memalready_taggedii)thentagiiPunctuation|T.TSemiColonii|T.TPipeii|T.TCommaii|T.TOBracketii|T.TCBracketii|T.TOBraceii|T.TCBraceii|T.TOParenii|T.TCParenii|T.TOBracketPipeii|T.TPipeCBracketii|T.TPlusii|T.TMinusii|T.TLessii|T.TGreaterii|T.TDot(ii)|T.TColon(ii)|T.TBracketAtii|T.TBracketAtAtii|T.TBracketAtAtAtii|T.TBracketPercentii|T.TBracketPercentPercentii->tagiiPunctuation|T.TUpperIdent(s,ii)->(matchswith|"Todo"->tagiiBadSmell|_->()(* tag ii Constructor *))|T.TLabelDecl(_s,ii)->tagii(ParameterDef)|T.Topenii->tagiiBadSmell|T.Tmoduleii|T.Tstructii|T.Tsigii|T.Tincludeii|T.Tfunctorii->tagiiKeywordModule|T.Tclassii|T.Tvirtualii|T.Tprivateii|T.Tobjectii|T.Tnewii|T.Tmethodii|T.Tinitializerii|T.Tinheritii|T.Tconstraintii->tagiiKeywordObject|T.Tmutableii->tagiiKeywordLoop|T.Tasii->tagiiKeyword|T.Texceptionii->tagiiKeywordExn|T.Tlazyii|T.Tassertii->tagiiKeyword|T.TUnderscoreii|T.TTildeii|T.TStarii|T.TSemiColonSemiColonii->tagiiPunctuation|T.Tlandii|T.Tasrii|T.Tlxorii|T.Tlsrii|T.Tlslii|T.Tlorii|T.Tmodii|T.Torii->tagiiPunctuation|T.TSharpii|T.TQuoteii|T.TBackQuoteii|T.TQuestionii|T.TQuestionQuestionii|T.TDotDotii|T.TColonGreaterii|T.TColonColonii|T.TAndii|T.TAndAndii->tagiiPunctuation|T.TPrefixOperator(_,ii)|T.TInfixOperator(_,ii)->tagiiOperator|T.TMinusDotii|T.TPlusDotii|T.TArrowii|T.TBangEqii|T.TOBracketGreaterii|T.TGreaterCBraceii|T.TOBraceLessii|T.TGreaterCBracketii|T.TOBracketLessii|T.TOptLabelUse(_,ii)|T.TLabelUse(_,ii)->tagii(ParameterDef)(* TODO *)|T.TOptLabelDecl(_,ii)->tagii(ParameterDef)|T.TLowerIdent(s,ii)->matchswith|_whenHashtbl.memh_pervasives_pads->tagiiBuiltinCommentColor|_whenHashtbl.memh_builtin_bools->tagiiBuiltinBoolean|"failwith"|"raise"->tagiiKeywordExn|_->())