123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530(* Yoann Padioleau
*
* Copyright (C) 2010 Facebook
* Copyright (C) 2019 r2c
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation, with the
* special exception on linking described in file license.txt.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
* license.txt for more details.
*)openCommonopenAst_pythonopenHighlight_codemoduleT=Parser_pythonmoduleV=Visitor_pythonmoduleE=Entity_code(*****************************************************************************)(* Prelude *)(*****************************************************************************)(* Syntax highlighting for Python code for codemap (and now also efuns)
*)(*****************************************************************************)(* Helpers when have global-analysis information *)(*****************************************************************************)(* we generate fake value here because the real one are computed in a
* later phase in rewrite_categ_using_entities in pfff_visual.
*)letdef2=Def2NoUseletuse2=Use2(NoInfoPlace,UniqueDef,MultiUse)letbuiltin_functions=Common.hashset_of_list["isinstance";"set";"dict";](*****************************************************************************)(* Code highlighter *)(*****************************************************************************)(* The idea of the code below is to visit the program either through its
* AST or its list of tokens. The tokens are easier for tagging keywords,
* number and basic entities. The AST is better for tagging idents
* to figure out what kind of ident it is.
*)letvisit_program~tag_hook_prefs(program,toks)=letalready_tagged=Hashtbl.create101inlettag=(funiicateg->tag_hookiicateg;Hashtbl.replacealready_taggediitrue)inlettag_name(_s,ii)categ=(* so treat the most specific in the enclosing code and then
* do not fear to write very general case patterns later because
* the specific will have priority over the general
* (e.g., a Method use vs a Field use)
*)ifnot(Hashtbl.memalready_taggedii)thentagiicateginlettag_if_not_taggediicateg=ifnot(Hashtbl.memalready_taggedii)thentagiicateginletlexer_based_tagger=(program=None)inprogram|>Common.do_optionResolve_python.resolve;(* -------------------------------------------------------------------- *)(* AST phase 1 *)(* -------------------------------------------------------------------- *)(* try to better colorize identifiers which can be many different things
* e.g. a field, a type, a function, a parameter, etc
*)letin_class=reffalseinletin_type=reffalseinletin_decorator=reffalseinletvisitor=V.mk_visitor{V.default_visitorwith(* use 'k x' as much as possible below. No need to
* do v (Stmt st1); v (Expr e); ... Go deep to tag
* special stuff (e.g., a local var in an exception handler) but then
* just recurse from the top with 'k x'
*)V.kexpr=(fun(k,_)x->matchxwith|Name(name,ctx,resolved)->(match!resolvedwith|_when!in_type->(matchfstnamewith|"int"->tag_namenameTypeInt|_->letkind=E.Typeintag_namename(Entity(kind,use2)))|_when!in_decorator->tag_namenameHighlight_code.Attribute|Ast_python.Parameter->tag_namename(Highlight_code.ParameterUse)|GlobalVar->letusedef=matchctxwith|Store->def2|Load->use2|_->use2(* TODO *)intag_namename(Entity(E.Global,usedef))|ClassField->letusedef=matchctxwith|Store->def2|Load->use2|_->use2(* TODO *)intag_namename(Entity(E.Field,usedef))|LocalVar->letusedef=matchctxwith|Store->Def|Load->Use|_->Use(* TODO *)intag_namename(Localusedef)|ImportedEntity_->letkind=E.Functionintag_namename(Entity(kind,use2))|ImportedModule_->letkind=E.Moduleintag_namename(Entity(kind,use2))|NotResolved->(*
let kind = E.Global in
tag_name name (Entity (kind, (Use2 fake_no_use2)))
*)());kx|Call(f,args)->(matchfwith|Name(name,_ctx,_resolved)->letkind=E.Functionintag_namename(Entity(kind,use2))|Ast_python.Attribute(_e,name,_ctx)->letkind=E.Methodintag_namename(Entity(kind,use2))|_->());args|>List.iter(function|ArgKwd(name,_)->tag_namenameComment|_->(););kx|Ast_python.Attribute(_e,name,_ctx)->(match()with|_when!in_type->(matchfstnamewith|"int"->tag_namenameTypeInt|_->letkind=E.Typeintag_namename(Entity(kind,use2)))|_->letkind=E.Fieldintag_namename(Entity(kind,use2)););kx(* TODO
| ListComp (_, xs) ->
xs |> List.iter (fun (target, _iter, _ifs) ->
match target with
| Name (name, _ctx, _res) ->
tag_name name (Local Def);
(* tuples? *)
| _ -> ()
);
k x
*)(* the general case *)|_->kx);V.kstmt=(fun(k,_)x->matchxwith|FunctionDef(name,_params,_typopt,_body,_decorators)->letkind=if!in_classthenE.MethodelseE.Functionintag_namename(Entity(kind,def2));kx|ClassDef(name,_bases,_body,_decorators)->letkind=E.Classintag_namename(Entity(kind,def2));Common.save_excursionin_classtrue(fun()->kx);|Import(aliases)->aliases|>List.iter(fun(dotted_name,asname_opt)->letkind=E.Moduleindotted_name|>List.iter(funname->tag_namename(Entity(kind,use2)););asname_opt|>Common.do_option(funasname->tag_nameasname(Entity(kind,def2));););kx|ImportFrom(dotted_name,aliases,_)->letkind=E.Moduleindotted_name|>List.iter(funname->tag_namename(Entity(kind,use2)););aliases|>List.iter(fun(name,asname_opt)->letkind=E.Functionintag_namename(Entity(kind,use2));asname_opt|>Common.do_option(funasname->tag_nameasname(Entity(kind,def2));););kx|With(_e,eopt,_stmts)->eopt|>Common.do_option(fune->matchewith|Name(name,_ctx,_res)->tag_namename(LocalDef);(* todo: tuples? *)|_->());kx|TryExcept(_stmts1,excepts,_stmts2)->excepts|>List.iter(fun(ExceptHandler(_typ,e,_))->matchewith|None->()|Some(Name(name,_ctx,_res))->tag_namename(LocalDef)(* tuples? *)|Some_->());kx(* general case *)|_->kx);V.ktype_=(fun(k,_)x->Common.save_excursionin_typetrue(fun()->kx););V.kdecorator=(fun(k,_)x->Common.save_excursionin_decoratortrue(fun()->kx););V.kparameter=(fun(k,_)x->(matchxwith|ParamClassic((name,_),_)|ParamStar(name,_)|ParamPow(name,_)->tag_namename(ParameterDef););kx);}inprogram|>Common.do_option(funprog->visitor(Programprog););(* -------------------------------------------------------------------- *)(* tokens phase 1 (list of tokens) *)(* -------------------------------------------------------------------- *)letrecaux_toksxs=matchxswith|[]->()(* a little bit pad specific *)(*
| T.TComment(ii)
::T.TCommentNewline (_ii2)
::T.TComment(ii3)
::T.TCommentNewline (_ii4)
::T.TComment(ii5)
::xs ->
let s = Parse_info.str_of_info ii in
let s5 = Parse_info.str_of_info ii5 in
(match () with
| _ when s =~ ".*\\*\\*\\*\\*" && s5 =~ ".*\\*\\*\\*\\*" ->
tag ii CommentEstet;
tag ii5 CommentEstet;
tag ii3 CommentSection0
| _ when s =~ ".*------" && s5 =~ ".*------" ->
tag ii CommentEstet;
tag ii5 CommentEstet;
tag ii3 CommentSection1
| _ when s =~ ".*####" && s5 =~ ".*####" ->
tag ii CommentEstet;
tag ii5 CommentEstet;
tag ii3 CommentSection2
| _ ->
()
);
aux_toks xs
*)(* poor's man identifier tagger *)(* defs *)|T.CLASS_ii1::T.NAME(_s,ii2)::xs->ifnot(Hashtbl.memalready_taggedii2)&&lexer_based_taggerthentagii2(Entity(E.Class,def2));aux_toksxs|T.DEF_ii1::T.NAME(_s,ii2)::xs->(* todo: actually could be a method if in class scope *)ifnot(Hashtbl.memalready_taggedii2)&&lexer_based_taggerthentagii2(Entity(E.Function,def2));aux_toksxs(* uses *)|T.NAME(_s,ii1)::T.DOT_::T.NAME(_s3,ii3)::T.LPAREN_::xs->ifnot(Hashtbl.memalready_taggedii3)&&lexer_based_taggerthenbegintagii3(Entity(E.Method,use2));ifnot(Hashtbl.memalready_taggedii1)thentagii1(LocalUse);end;aux_toksxs|T.NAME(s,ii1)::T.LPAREN_::xs->ifnot(Hashtbl.memalready_taggedii1)&&lexer_based_taggerthen(ifHashtbl.membuiltin_functionssthentagii1Builtinelsetagii1(Entity(E.Function,use2)));aux_toksxs|T.NAME(_s,ii1)::T.DOT_::T.NAME(s3,ii3)::xs->(matchxswith|(T.DOT_)::_->ifnot(Hashtbl.memalready_taggedii3)&&lexer_based_taggerthentagii3(Entity(E.Field,use2));ifnot(Hashtbl.memalready_taggedii1)thentagii1(LocalUse);aux_toks(T.NAME(s3,ii3)::xs)|_->ifnot(Hashtbl.memalready_taggedii3)&&lexer_based_taggerthenbegintagii3(Entity(E.Field,use2));(* TODO *)ifnot(Hashtbl.memalready_taggedii1)thentagii1(LocalUse);end;aux_toksxs)|T.NAME(_s,_ii1)::xs->(*
if s =~ "[a-z]" then begin
if not (Hashtbl.mem already_tagged ii1) && lexer_based_tagger
then tag ii1 (Local (Use));
end;
*)aux_toksxs|_x::xs->aux_toksxsinlettoks'=toks|>Common.exclude(function|T.TCommentSpace_->true|_->false)inaux_tokstoks';(* -------------------------------------------------------------------- *)(* Tokens phase 2 (individual tokens) *)(* -------------------------------------------------------------------- *)toks|>List.iter(funtok->matchtokwith(* specials *)|T.TUnknownii->tagiiError|T.EOF_ii->()|T.INDENT|T.DEDENT->raiseImpossible(* filtered in parse_python.ml with is_special *)(* comments *)|T.TCommentii->tag_if_not_taggediiComment(* in lexer_python.mll comments and space and newlines are sometimes
* put together *)|T.TCommentSpaceii->tag_if_not_taggediiComment|T.NEWLINEii->tag_if_not_taggediiComment(* values *)|T.STR(_s,ii)->tagiiString|T.FLOAT(_,ii)|T.INT(_,ii)|T.LONGINT(_,ii)->tagiiNumber|T.IMAG(_,ii)->tagiiNumber|T.TRUE(ii)|T.FALSE(ii)->tagiiBoolean|T.NONE(ii)->tagiiNull(*
| T.TLongString (_s,ii) ->
(* most of the time they are used as documentation strings *)
tag ii Comment
*)|T.FSTRING_STARTii|T.FSTRING_ENDii|T.FSTRING_STRING(_,ii)->tagiiString|T.FSTRING_LBRACEii->tagiiPunctuation(* ident *)|T.NAME(s,ii)->(matchswith|"self"->tagiiKeywordObject|"str"|"list"|"int"|"bool"|"object"|"Exception"->tag_if_not_taggedii(Entity(E.Type,use2))|"__file__"|"__dir__"|"__package__"|"__name__"->tag_if_not_taggediiCppOther|_->tag_if_not_taggediiError)(* keywords *)|T.DEFii|T.LAMBDAii->tagiiKeyword|T.IFii|T.ELIFii|T.ELSEii->tagiiKeywordConditional|T.FORii|T.WHILEii->tagiiKeywordLoop|T.TRYii|T.FINALLYii|T.RAISEii|T.EXCEPTii->tagiiKeywordExn|T.CLASSii->tagiiKeywordObject|T.IMPORTii|T.ASii|T.FROMii->tagiiKeywordModule|T.CONTINUEii|T.BREAKii|T.YIELDii|T.RETURNii|T.ASYNCii|T.AWAITii->tagiiKeyword|T.ISii|T.INii|T.PASSii|T.ASSERTii|T.WITHii|T.DELii|T.GLOBALii|T.NONLOCALii->tagiiKeyword|T.NOTii|T.ANDii|T.ORii->tagiiBuiltinBoolean(* symbols *)|T.EQii->tagiiPunctuation|T.ADDEQii|T.SUBEQii|T.MULTEQii|T.DIVEQii|T.MODEQii|T.POWEQii|T.FDIVEQii|T.ANDEQii|T.OREQii|T.XOREQii|T.LSHEQii|T.RSHEQii->tagiiPunctuation|T.LBRACEii|T.RBRACEii|T.LBRACKii|T.RBRACKii|T.LPARENii|T.RPARENii->tagiiPunctuation|T.ADDii->tagiiPunctuation|T.SUBii->tagiiPunctuation|T.MULTii|T.DIVii|T.MODii|T.FDIVii|T.POWii|T.LSHIFTii|T.RSHIFTii|T.BITXORii|T.BITORii|T.BITANDii|T.BITNOTii|T.EQUALii|T.NOTEQii|T.LTii|T.GTii|T.LEQii|T.GEQii|T.DOT(ii)|T.COLON(ii)|T.COMMAii|T.SEMICOLii|T.BACKQUOTEii|T.ELLIPSESii|T.ATii->tagiiPunctuation(*
| T.TEllipsis ii
-> tag ii Punctuation
*));()