123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276(* Yoann Padioleau
*
* Copyright (C) 2011,2014 Facebook
* Copyright (C) 2002-2008 Yoann Padioleau
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License (GPL)
* version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* file license.txt for more details.
*)moduleFlag=Flag_parsing_cppmoduleTH=Token_helpers_cppmoduleTV=Token_views_cppmoduleT=Parser_cppmodulePI=Parse_info(*****************************************************************************)(* Prelude *)(*****************************************************************************)(*
* This module tries to detect some cpp, C, or C++ idioms so that we can
* parse as-is files by adjusting or commenting some tokens.
*
* Sometimes we use some name conventions, sometimes indentation information,
* sometimes we do some kind of lalr(k) by finding patterns. We often try to
* work on a better token representation, like ifdef-paren-ized, brace-ized,
* paren-ized, so that we can pattern-match more easily
* complex idioms (see token_views_cpp.ml).
* We also try to get more contextual information such as whether the
* token is in an initializer because many idioms are different
* depending on the context (see token_views_context.ml).
*
* Examples of cpp idioms:
* - if 0 for commenting stuff (not always code, sometimes any text)
* - ifdef old version
* - ifdef funheader
* - ifdef statements, ifdef expression, ifdef-mid
* - macro toplevel (with or without a trailing ';')
* - macro foreach
* - macro higher order
* - macro declare
* - macro debug
* - macro no ';'
* - macro string, and macro function string taking param and ##
* - macro attribute
*
* Examples of C typedef idioms:
* - x * y
*
* Examples of C++ idioms:
* - x<...> for templates. People rarely do x < y > z to express
* relational expressions, so a < followed later by a > is probably a
* template.
*
* See the TIdent_MacroXxx in parser_cpp.mly and MacroXxx in ast_cpp.ml
*
* We also do other stuff involving cpp like expanding macros,
* and we try to parse define body by finding the end of define virtual
* end-of-line token. But now most of the code is actually in pp_token.ml
* It is related to what is in the yacfe configuration file (e.g. standard.h)
*)(*****************************************************************************)(* Helpers *)(*****************************************************************************)letfilter_comment_stuffxs=xs|>List.filter(funx->not(TH.is_commentx.TV.t))(*****************************************************************************)(* Post processing *)(*****************************************************************************)(* to do at the very very end *)letinsert_virtual_positionsl=letstrlenx=String.length(Parse_info.str_of_infox)inletrecloopprevoffset=function[]->[]|x::xs->letii=TH.info_of_tokxinletinjectpi=TH.visitor_info_of_tok(functionii->Cst_cpp.rewrap_pinfopiii)xinmatchii.Parse_info.tokenwithParse_info.OriginTok_pi->letprev=Parse_info.token_location_of_infoiiinx::(loopprev(strlenii)xs)|Parse_info.ExpandedTok(pi,_,_)->inject(Parse_info.ExpandedTok(pi,prev,offset))::(loopprev(offset+(strlenii))xs)|Parse_info.FakeTokStr(s,_)->inject(Parse_info.FakeTokStr(s,(Some(prev,offset))))::(loopprev(offset+(strlenii))xs)|Parse_info.Ab->failwith"abstract not expected"inletrecskip_fake=function[]->[]|x::xs->letii=TH.info_of_tokxinmatchii.Parse_info.tokenwithParse_info.OriginTok_pi->letprev=Parse_info.token_location_of_infoiiinx::(loopprev(strlenii)xs)|_->x::skip_fakexsinskip_fakel(*****************************************************************************)(* C vs C++ *)(*****************************************************************************)letfix_tokens_for_languagelangxs=xs|>List.map(funtok->iflang=Flag_parsing_cpp.C&&TH.is_cpp_keywordtokthenletii=TH.info_of_toktokinT.TIdent(PI.str_of_infoii,ii)elsetok)(*****************************************************************************)(* Fix tokens *)(*****************************************************************************)(*
* Main entry point for the token reclassifier which generates "fresh" tokens.
*
* The order of the rules is important. For instance if you put the
* action heuristic first, then because of ifdef, can have not closed paren
* and so may believe that higher order macro
* and it will eat too much tokens. So important to do
* first the ifdef heuristic.
*
* Note that the functions below work on a list of token_extended
* or on views on top of a list of token_extended. The token_extended record
* contains mutable fields which explains the (ugly but working) imperative
* style of the code below.
*
* I recompute multiple times 'cleaner' cos the mutable
* can have be changed and so we may have more comments
* in the token original list.
*)(* we could factorize with fix_tokens_cpp, but for debugging purpose it
* might be good to have two different functions and do far less in
* fix_tokens_c (even though the extra steps in fix_tokens_cpp should
* have no effect on regular C code).
*)letfix_tokens_c~macro_defstokens=lettokens=Parsing_hacks_define.fix_tokens_definetokensinlettokens=fix_tokens_for_languageFlag.Ctokensinlettokens2=ref(tokens|>Common2.acc_mapTV.mk_token_extended)in(* ifdef *)letcleaner=!tokens2|>filter_comment_stuffinletifdef_grouped=TV.mk_ifdefcleanerinParsing_hacks_pp.find_ifdef_funheadersifdef_grouped;Parsing_hacks_pp.find_ifdef_boolifdef_grouped;Parsing_hacks_pp.find_ifdef_midifdef_grouped;(* macro part 1 *)letcleaner=!tokens2|>Parsing_hacks_pp.filter_pp_or_comment_stuffinletparen_grouped=TV.mk_parenthisedcleanerinPp_token.apply_macro_defsmacro_defsparen_grouped;(* because the before field is used by apply_macro_defs *)tokens2:=TV.rebuild_tokens_extented!tokens2;letcleaner=!tokens2|>Parsing_hacks_pp.filter_pp_or_comment_stuffinletparen_grouped=TV.mk_parenthisedcleanerinParsing_hacks_pp.find_define_init_brace_parenparen_grouped;Parsing_hacks_pp.find_string_macro_parenparen_grouped;Parsing_hacks_pp.find_macro_parenparen_grouped;letcleaner=!tokens2|>Parsing_hacks_pp.filter_pp_or_comment_stuffin(* tagging contextual info (InFunc, InStruct, etc) *)letmulti_grouped=TV.mk_multicleanerinToken_views_context.set_context_tag_multimulti_grouped;letxxs=Parsing_hacks_typedef.filter_for_typedefmulti_groupedinParsing_hacks_typedef.find_typedefsxxs;insert_virtual_positions(!tokens2|>Common2.acc_map(funx->x.TV.t))letfix_tokens_cpp~macro_defstokens=lettokens=Parsing_hacks_define.fix_tokens_definetokensin(* let tokens = fix_tokens_for_language Flag.Cplusplus tokens in *)lettokens2=ref(tokens|>Common2.acc_mapTV.mk_token_extended)in(* ifdef *)letcleaner=!tokens2|>filter_comment_stuffinletifdef_grouped=TV.mk_ifdefcleanerinParsing_hacks_pp.find_ifdef_funheadersifdef_grouped;Parsing_hacks_pp.find_ifdef_boolifdef_grouped;Parsing_hacks_pp.find_ifdef_midifdef_grouped;(* macro part 1 *)letcleaner=!tokens2|>Parsing_hacks_pp.filter_pp_or_comment_stuffin(* find '<' '>' template symbols. We need that for the typedef
* heuristics. We actually need that even for the paren view
* which is wrong without it.
*
* todo? expand macro first? some expand to lexical_cast ...
* but need correct parenthized view to expand macros => mutually recursive :(
*)Parsing_hacks_cpp.find_template_inf_supcleaner;letparen_grouped=TV.mk_parenthisedcleanerinPp_token.apply_macro_defsmacro_defsparen_grouped;(* because the before field is used by apply_macro_defs *)tokens2:=TV.rebuild_tokens_extented!tokens2;(* could filter also #define/#include *)letcleaner=!tokens2|>filter_comment_stuffin(* tagging contextual info (InFunc, InStruct, etc). Better to do
* that after the "ifdef-simplification" phase.
*)letmulti_grouped=TV.mk_multicleanerinToken_views_context.set_context_tag_multimulti_grouped;(* macro part 2 *)letcleaner=!tokens2|>Parsing_hacks_pp.filter_pp_or_comment_stuffinletparen_grouped=TV.mk_parenthisedcleanerinletline_paren_grouped=TV.mk_line_parenthisedparen_groupedinParsing_hacks_pp.find_define_init_brace_parenparen_grouped;Parsing_hacks_pp.find_string_macro_parenparen_grouped;Parsing_hacks_pp.find_macro_lineparenline_paren_grouped;Parsing_hacks_pp.find_macro_parenparen_grouped;(* todo: at some point we need to remove that and use
* a better filter_for_typedef that also
* works on the nested template arguments.
*)Parsing_hacks_cpp.find_template_commentizemulti_grouped;letcleaner=!tokens2|>Parsing_hacks_pp.filter_pp_or_comment_stuffin(* must be done before the qualifier filtering *)Parsing_hacks_cpp.find_constructor_outside_classcleaner;Parsing_hacks_cpp.find_qualifier_commentizecleaner;letcleaner=!tokens2|>Parsing_hacks_pp.filter_pp_or_comment_stuffinletmulti_grouped=TV.mk_multicleanerinToken_views_context.set_context_tag_cplusmulti_grouped;Parsing_hacks_cpp.find_constructorcleaner;letxxs=Parsing_hacks_typedef.filter_for_typedefmulti_groupedinParsing_hacks_typedef.find_typedefsxxs;(* must be done after the typedef inference *)Parsing_hacks_cpp.find_constructed_object_and_morecleaner;(* the pending of find_qualifier_comentize *)Parsing_hacks_cpp.reclassify_tokens_before_idents_or_typedefsmulti_grouped;insert_virtual_positions(!tokens2|>Common2.acc_map(funx->x.TV.t))letfix_tokens~macro_defslanga=Common.profile_code"C++ parsing.fix_tokens"(fun()->matchlangwith|Flag_parsing_cpp.C->fix_tokens_c~macro_defsa|Flag_parsing_cpp.Cplusplus->fix_tokens_cpp~macro_defsa)