123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752(* Yoann Padioleau
*
* Copyright (C) 2002-2008 Yoann Padioleau
* Copyright (C) 2011 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License (GPL)
* version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* file license.txt for more details.
*)openCommonmoduleTH=Token_helpers_cppmoduleTV=Token_views_cppmoduleParser=Parser_cppmodulePI=Parse_infoopenParser_cppopenToken_views_cppopenParsing_hacks_lib(*****************************************************************************)(* Prelude *)(*****************************************************************************)(*
* This file gathers parsing heuristics related to the C preprocessor cpp.
*)(*****************************************************************************)(* Helpers *)(*****************************************************************************)let(==~)=Common2.(==~)(* the pair is the status of '()' and '{}', ex: (-1,0)
* if too much ')' and good '{}'
* could do for [] too ?
* could do for ',' if encounter ',' at "toplevel", not inside () or {}
* then if have ifdef, then certainly can lead to a problem.
*)let(count_open_close_stuff_ifdef_clause:ifdef_groupedlist->(int*int))=funxs->letcnt_paren,cnt_brace=ref0,ref0inxs|>iter_token_ifdef(funx->(matchx.twith|xwhenTH.is_oparx->incrcnt_paren|xwhenTH.is_obracex->incrcnt_brace|xwhenTH.is_cparx->decrcnt_paren|xwhenTH.is_obracex->decrcnt_brace|_->()));!cnt_paren,!cnt_brace(* look if there is a '{' just after the closing ')', and handling the
* possibility to have nested expressions inside nested parenthesis
*)(*
let is_really_foreach xs =
let rec is_foreach_aux = function
| [] -> false, []
| TCPar _::TOBrace _::xs -> true, xs
(* the following attempts to handle the cases where there is a
single statement in the body of the loop. undoubtedly more
cases are needed.
todo: premier(statement) - suivant(funcall)
*)
| TCPar _::TIdent _::xs -> true, xs
| TCPar _::Tif _::xs -> true, xs
| TCPar _::Twhile _::xs -> true, xs
| TCPar _::Tfor _::xs -> true, xs
| TCPar _::Tswitch _::xs -> true, xs
| TCPar _::xs -> false, xs
| TOPar _::xs ->
let (_, xs') = is_foreach_aux xs in
is_foreach_aux xs'
| x::xs -> is_foreach_aux xs
in
is_foreach_aux xs +> fst
*)(* TODO: set_ifdef_parenthize_info ?? from parsing_c/ *)letfilter_pp_or_comment_stuffxs=letrecauxxs=matchxswith|[]->[]|x::xs->(matchx.TV.twith|tokwhenTH.is_commenttok->auxxs(* don't want drop the define, or if drop, have to drop
* also its body otherwise the line heuristics may be lost
* by not finding the TDefine in column 0 but by finding
* a TDefineIdent in a column > 0
*
* todo? but define often contain some unbalanced {
*)|Parser.TDefine_->x::auxxs|tokwhenTH.is_pp_instructiontok->auxxs|_->x::auxxs)inauxxs(*****************************************************************************)(* Ifdef keeping/passing *)(*****************************************************************************)(* #if 0, #if 1, #if LINUX_VERSION handling *)letrecfind_ifdef_boolxs=xs|>List.iter(function|NotIfdefLine_->()|Ifdefbool(is_ifdef_positif,xxs,info_ifdef_stmt)->ifis_ifdef_positifthenpr2_pp"commenting parts of a #if 1 or #if LINUX_VERSION"elsepr2_pp"commenting a #if 0 or #if LINUX_VERSION or __cplusplus";(matchxxswith|[]->raiseImpossible|firstclause::xxs->info_ifdef_stmt|>List.iter(set_as_commentToken_cpp.CppDirective);ifis_ifdef_positifthenxxs|>List.iter(iter_token_ifdef(set_as_commentToken_cpp.CppOther))elsebeginfirstclause|>iter_token_ifdef(set_as_commentToken_cpp.CppOther);(matchList.revxxswith(* keep only last *)|_last::startxs->startxs|>List.iter(iter_token_ifdef(set_as_commentToken_cpp.CppOther))|[]->(* not #else *)());end);|Ifdef(xxs,_info_ifdef_stmt)->xxs|>List.iterfind_ifdef_bool)letthresholdIfdefSizeMid=6(* infer ifdef involving not-closed expressions/statements *)letrecfind_ifdef_midxs=xs|>List.iter(function|NotIfdefLine_->()|Ifdef(xxs,info_ifdef_stmt)->(matchxxswith|[]->raiseImpossible|[_first]->()|_first::second::rest->(* don't analyse big ifdef *)ifxxs|>List.for_all(funxs->List.lengthxs<=thresholdIfdefSizeMid)&&(* don't want nested ifdef *)xxs|>List.for_all(funxs->xs|>List.for_all(functionNotIfdefLine_->true|_->false))thenletcounts=xxs|>List.mapcount_open_close_stuff_ifdef_clauseinletcnt1,cnt2=List.hdcountsinifcnt1<>0||cnt2<>0(*???? && counts +> List.for_all (fun x -> x = (cnt1, cnt2)) *)(*
if counts +> List.exists (fun (cnt1, cnt2) ->
cnt1 <> 0 || cnt2 <> 0
)
*)thenbeginpr2_pp"found ifdef-mid-something";(* keep only first, treat the rest as comment *)info_ifdef_stmt|>List.iter(set_as_commentToken_cpp.CppDirective);(second::rest)|>List.iter(iter_token_ifdef(set_as_commentToken_cpp.CppOther));end);List.iterfind_ifdef_midxxs(* no need complex analysis for ifdefbool *)|Ifdefbool(_,xxs,_info_ifdef_stmt)->List.iterfind_ifdef_midxxs)letthresholdFunheaderLimit=4(* ifdef defining alternate function header, type *)letrecfind_ifdef_funheaders=function|[]->()|NotIfdefLine_::xs->find_ifdef_funheadersxs(* ifdef-funheader if ifdef with 2 lines and a '{' in next line *)|Ifdef([(NotIfdefLine(({col=0}as_xline1)::_line1))::ifdefblock1;(NotIfdefLine(({col=0}asxline2)::line2))::ifdefblock2],info_ifdef_stmt)::NotIfdefLine(({t=TOBrace_i;col=0})::_line3)::xswhenList.lengthifdefblock1<=thresholdFunheaderLimit&&List.lengthifdefblock2<=thresholdFunheaderLimit->find_ifdef_funheadersxs;info_ifdef_stmt|>List.iter(set_as_commentToken_cpp.CppDirective);letall_toks=[xline2]@line2inall_toks|>List.iter(set_as_commentToken_cpp.CppOther);ifdefblock2|>iter_token_ifdef(set_as_commentToken_cpp.CppOther);(* ifdef with nested ifdef *)|Ifdef([[NotIfdefLine(({col=0}as_xline1)::_line1)];[Ifdef([[NotIfdefLine(({col=0}asxline2)::line2)];[NotIfdefLine(({col=0}asxline3)::line3)];],info_ifdef_stmt2)]],info_ifdef_stmt)::NotIfdefLine(({t=TOBrace_i;col=0})::_line4)::xs->find_ifdef_funheadersxs;info_ifdef_stmt|>List.iter(set_as_commentToken_cpp.CppDirective);info_ifdef_stmt2|>List.iter(set_as_commentToken_cpp.CppDirective);letall_toks=[xline2;xline3]@line2@line3inall_toks|>List.iter(set_as_commentToken_cpp.CppOther);(* ifdef with elseif *)|Ifdef([[NotIfdefLine(({col=0}as_xline1)::_line1)];[NotIfdefLine(({col=0}asxline2)::line2)];[NotIfdefLine(({col=0}asxline3)::line3)];],info_ifdef_stmt)::NotIfdefLine(({t=TOBrace_i;col=0})::_line4)::xs->find_ifdef_funheadersxs;info_ifdef_stmt|>List.iter(set_as_commentToken_cpp.CppDirective);letall_toks=[xline2;xline3]@line2@line3inall_toks|>List.iter(set_as_commentToken_cpp.CppOther)|Ifdef(xxs,_)::xs|Ifdefbool(_,xxs,_)::xs->List.iterfind_ifdef_funheadersxxs;find_ifdef_funheadersxs(*
let adjust_inifdef_include xs =
xs +> List.iter (function
| NotIfdefLine _ -> ()
| Ifdef (xxs, info_ifdef_stmt) | Ifdefbool (_, xxs, info_ifdef_stmt) ->
xxs +> List.iter (iter_token_ifdef (fun tokext ->
match tokext.t with
| Parser.TInclude (s1, s2, ii) ->
(* todo: inifdef_ref := true; *)
()
| _ -> ()
));
)
*)(*****************************************************************************)(* Builtin macros using standard.h or other defs *)(*****************************************************************************)(* now in pp_token.ml *)(*****************************************************************************)(* Stringification *)(*****************************************************************************)letrecfind_string_macro_parenxs=matchxswith|[]->()|Parenthised(xxs,_)::xs->xxs|>List.iter(funxs->ifxs|>List.exists(functionPToken({t=TString_})->true|_->false)&&xs|>List.for_all(functionPToken({t=TString_})|PToken({t=TIdent_})->true|_->false)thenxs|>List.iter(funtok->matchtokwith|PToken({t=TIdent(_s,_)}asid)->change_tokid(TIdent_MacroString(TH.info_of_tokid.t))|_->())elsefind_string_macro_parenxs);find_string_macro_parenxs|PToken_::xs->find_string_macro_parenxs(*****************************************************************************)(* Macros *)(*****************************************************************************)(* don't forget to recurse in each case.
* note that the code below is called after the ifdef phase simplification,
* so if this previous phase is buggy, then it may pass some code that
* could be matched by the following rules but will not.
**)letrecfind_macro_parenxs=matchxswith|[]->()(* attribute *)|PToken({t=Tattribute_}asid)::Parenthised(xxs,info_parens)::xs->pr2_pp("MACRO: __attribute detected ");[Parenthised(xxs,info_parens)]|>iter_token_paren(set_as_commentToken_cpp.CppAttr);set_as_commentToken_cpp.CppAttrid;find_macro_parenxs(* stringification
*
* the order of the matching clause is important
*
*)(* string macro with params, before case *)|PToken({t=TString_})::PToken({t=TIdent(_s,_)}asid)::Parenthised(xxs,info_parens)::xs->change_tokid(TIdent_MacroString(TH.info_of_tokid.t));[Parenthised(xxs,info_parens)]|>iter_token_paren(set_as_commentToken_cpp.CppMacro);find_macro_parenxs(* after case *)|PToken({t=TIdent(_s,_)}asid)::Parenthised(xxs,info_parens)::PToken({t=TString_})::xs->change_tokid(TIdent_MacroString(TH.info_of_tokid.t));[Parenthised(xxs,info_parens)]|>iter_token_paren(set_as_commentToken_cpp.CppMacro);find_macro_parenxs(* for the case where the string is not inside a funcall, but
* for instance in an initializer.
*)(* string macro variable, before case *)|PToken({t=TString((str,_),_)})::PToken({t=TIdent(_s,_)}asid)::xs->(* c++ext: *)ifstr<>"C"thenbeginchange_tokid(TIdent_MacroString(TH.info_of_tokid.t));find_macro_parenxsend(* bugfix, forgot to recurse in else case too ... *)elsefind_macro_parenxs(* after case *)|PToken({t=TIdent(_s,_)}asid)::PToken({t=TString_})::xs->change_tokid(TIdent_MacroString(TH.info_of_tokid.t));find_macro_parenxs(* TODO: cooperating with standard.h *)|PToken({t=TIdent(s,_i1)}asid)::xswhens="MACROSTATEMENT"->change_tokid(TIdent_MacroStmt(TH.info_of_tokid.t));find_macro_parenxs(* recurse *)|(PToken_x)::xs->find_macro_parenxs|(Parenthised(xxs,_))::xs->xxs|>List.iterfind_macro_paren;find_macro_parenxs(* don't forget to recurse in each case *)letrecfind_macro_lineparenxs=matchxswith|[]->()(* firefoxext: ex: NS_DECL_NSIDOMNODELIST *)|(Line([PToken({t=TIdent(s,_)}asmacro);]))::xswhens==~regexp_ns_decl_like->set_as_commentToken_cpp.CppMacromacro;find_macro_lineparen(xs)(* firefoxext: ex: NS_DECL_NSIDOMNODELIST; *)|(Line([PToken({t=TIdent(s,_)}asmacro);PToken({t=TPtVirg_})]))::xswhens==~regexp_ns_decl_like->set_as_commentToken_cpp.CppMacromacro;find_macro_lineparen(xs)(* firefoxext: ex: NS_IMPL_XXX(a) *)|(Line([PToken({t=TIdent(s,_)}asmacro);Parenthised(xxs,info_parens);]))::xswhens==~regexp_ns_decl_like->[Parenthised(xxs,info_parens)]|>iter_token_paren(set_as_commentToken_cpp.CppMacro);set_as_commentToken_cpp.CppMacromacro;find_macro_lineparen(xs)(* linuxext: ex: static [const] DEVICE_ATTR(); *)|(Line([PToken({t=Tstatic_});PToken({t=TIdent(s,_)}asmacro);Parenthised(_xxs,_);PToken({t=TPtVirg_});]))::xswhen(s==~regexp_macro)->letinfo=TH.info_of_tokmacro.tinchange_tokmacro(TIdent_MacroDecl(PI.str_of_infoinfo,info));find_macro_lineparen(xs)(* the static const case *)|(Line([PToken({t=Tstatic_});PToken({t=Tconst_}asconst);PToken({t=TIdent(s,_)}asmacro);Parenthised(_xxs,_info_parens);PToken({t=TPtVirg_});](*as line1*)))::xswhen(s==~regexp_macro)->letinfo=TH.info_of_tokmacro.tinchange_tokmacro(TIdent_MacroDecl(PI.str_of_infoinfo,info));(* need retag this const, otherwise ambiguity in grammar
21: shift/reduce conflict (shift 121, reduce 137) on Tconst
decl2 : Tstatic . TMacroDecl TOPar argument_list TCPar ...
decl2 : Tstatic . Tconst TMacroDecl TOPar argument_list TCPar ...
storage_class_spec : Tstatic . (137)
*)change_tokconst(Tconst_MacroDeclConst(TH.info_of_tokconst.t));find_macro_lineparen(xs)(* same but without trailing ';'
*
* I do not put the final ';' because it can be on a multiline and
* because of the way mk_line is coded, we will not have access to
* this ';' on the next line, even if next to the ')' *)|(Line([PToken({t=Tstatic_});PToken({t=TIdent(s,_)}asmacro);Parenthised(_xxs,_);]))::xswhens==~regexp_macro->letinfo=TH.info_of_tokmacro.tinchange_tokmacro(TIdent_MacroDecl(PI.str_of_infoinfo,info));find_macro_lineparen(xs)(* on multiple lines *)|(Line((PToken({t=Tstatic_})::[])))::(Line([PToken({t=TIdent(s,_)}asmacro);Parenthised(_,_);PToken({t=TPtVirg_});]))::xswhen(s==~regexp_macro)->letinfo=TH.info_of_tokmacro.tinchange_tokmacro(TIdent_MacroDecl(PI.str_of_infoinfo,info));find_macro_lineparenxs(* linuxext: ex: DECLARE_BITMAP();
*
* Here I use regexp_declare and not regexp_macro because
* Sometimes it can be a FunCallMacro such as DEBUG(foo());
* Here we don't have the preceding 'static' so only way to
* not have positive is to restrict to .*DECLARE.* macros.
*
* but there is a grammar rule for that, so don't need this case anymore
* unless the parameter of the DECLARE_xxx are wierd and can not be mapped
* on a argument_list
*)|(Line([PToken({t=TIdent(s,_)}asmacro);Parenthised(_,_);PToken({t=TPtVirg_});]))::xswhen(s==~regexp_declare)->letinfo=TH.info_of_tokmacro.tinchange_tokmacro(TIdent_MacroDecl(PI.str_of_infoinfo,info));find_macro_lineparenxs(* toplevel macros.
* module_init(xxx)
*
* Could also transform the TIdent in a TMacroTop but can have false
* positive, so easier to just change the TCPar and so just solve
* the end-of-stream pb of ocamlyacc
*)|(Line([PToken({t=TIdent(_s,_ii);col=col1;where=ctx}as_macro);Parenthised(_,info_parens);]as_line1))::xswhencol1=0->letcondition=(* to reduce number of false positive *)(matchxswith|(Line(PToken({col=col2}asother)::_restline2))::_->TH.is_eofother.t||(col2=0&&(matchother.twith|TOBrace_->false(* otherwise would match funcdecl *)|TCBrace_whenList.hdctx<>InFunction->false|TPtVirg_|TCol_->false|tokwhenTH.is_binary_operatortok->false|_->true))|_->false)inifconditionthenbegin(* just to avoid the end-of-stream pb of ocamlyacc *)lettcpar=Common2.list_lastinfo_parensinchange_toktcpar(TCPar_EOL(TH.info_of_toktcpar.t));(*macro.t <- TMacroTop (s, TH.info_of_tok macro.t);*)end;find_macro_lineparenxs(* macro with parameters
* ex: DEBUG()
* return x;
*)|(Line([PToken({t=TIdent(_s,_ii);col=col1;where=ctx}asmacro);Parenthised(xxs,info_parens);]as_line1))::(Line(PToken({col=col2}asother)::_restline2)asline2)::xs(* when s ==~ regexp_macro *)->letcondition=(col1=col2&&(matchother.twith|TOBrace_->false(* otherwise would match funcdecl *)|TCBrace_whenList.hdctx<>InFunction->false|TPtVirg_|TCol_->false|tokwhenTH.is_binary_operatortok->false|_->true))||(col2<=col1&&(matchother.twith|TCBrace_whenList.hdctx=InFunction->true|Treturn_->true|Tif_->true|Telse_->true|_->false))inifconditionthenifcol1=0then()elsebeginchange_tokmacro(TIdent_MacroStmt(TH.info_of_tokmacro.t));[Parenthised(xxs,info_parens)]|>iter_token_paren(set_as_commentToken_cpp.CppMacro);end;find_macro_lineparen(line2::xs)(* linuxext:? single macro
* ex: LOCK
* foo();
* UNLOCK
*)|(Line([PToken({t=TIdent(_s,_ii);col=col1;where=ctx}asmacro);]as_line1))::(Line(PToken({col=col2}asother)::_restline2)asline2)::xs->(* when s ==~ regexp_macro *)letcondition=(col1=col2&&col1<>0&&(* otherwise can match typedef of fundecl*)(matchother.twith|TPtVirg_->false|TOr_->false|TCBrace_whenList.hdctx<>InFunction->false|tokwhenTH.is_binary_operatortok->false|_->true))||(col2<=col1&&(matchother.twith|TCBrace_whenList.hdctx=InFunction->true|Treturn_->true|Tif_->true|Telse_->true|_->false))inifconditionthenchange_tokmacro(TIdent_MacroStmt(TH.info_of_tokmacro.t));find_macro_lineparen(line2::xs)|_x::xs->find_macro_lineparenxs(*****************************************************************************)(* #Define tobrace init *)(*****************************************************************************)letis_inittok2tok3=matchtok2.t,tok3.twith|TInt_,TComma_->true|TString_,TComma_->true|TIdent_,TComma_->true|_->falseletfind_define_init_brace_parenxs=letrecauxxs=matchxswith|[]->()(* mainly for firefox *)|(PToken{t=TDefine_})::(PToken{t=TIdent_Define(_s,_)})::(PToken({t=TOBracei1}astokbrace))::(PTokentok2)::(PTokentok3)::xs->ifis_inittok2tok3thenchange_toktokbrace(TOBrace_DefineIniti1);auxxs(* mainly for linux, especially in sound/ *)|(PToken{t=TDefine_})::(PToken{t=TIdent_Define(s,_);col=c})::(Parenthised(_,{col=c2;_}::_))::(PToken({t=TOBracei1}astokbrace))::(PTokentok2)::(PTokentok3)::xswhenc2=c+String.lengths->ifis_inittok2tok3thenchange_toktokbrace(TOBrace_DefineIniti1);auxxs(* ugly: for plan9, too general? *)|(PToken{t=TDefine_})::(PToken{t=TIdent_Define(_s,_)})::(Parenthised(_xxx,_))::(PToken({t=TOBracei1}astokbrace))(* can be more complex expression than just an int, like (b)&... *)::(Parenthised(_,_))::(PToken{t=(TAnd_|TOr_);_})::xs->change_toktokbrace(TOBrace_DefineIniti1);auxxs(* recurse *)|(PToken_)::xs->auxxs|(Parenthised(_,_))::xs->(* not need for tobrace init:
* xxs +> List.iter aux;
*)auxxsinauxxs