123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172(* Yoann Padioleau
*
* Copyright (C) 2002-2008 Yoann Padioleau
* Copyright (C) 2011 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License (GPL)
* version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* file license.txt for more details.
*)openCommonopenParser_cppmoduleFlag=Flag_parsingmodulePI=Parse_infomoduleTH=Token_helpers_cppmoduleHack=Parsing_hacks_lib(*****************************************************************************)(* Prelude *)(*****************************************************************************)(*
* To parse macro definitions I need to do some tricks
* as some information can be computed only at the lexing level. For instance
* the space after the name of the macro in '#define foo (x)' is meaningful
* but the grammar does not have this information. So define_ident() below
* look at such space and generate a special TOpar_Define token.
*
* In a similar way macro definitions can contain some antislash and newlines
* and the grammar need to know where the macro ends which is
* a line-level and so low token-level information. Hence the
* function define_line'()below and the TCommentNewline_DefineEndOfMacro.
*
* update: TCommentNewline_DefineEndOfMacro is handled in a special way
* at different places, a little bit like EOF, especially for error recovery,
* so this is an important token that should not be retagged!
*
* We also change the kind of TIdent to TIdent_Define to avoid bad interactions
* with other parsing_hack tricks. For instant if keep TIdent then
* the stringication heuristics can believe the TIdent is a string-macro.
* So simpler to change the kind of the TIdent in a macro too.
*
* ugly: maybe a better solution perhaps would be to erase
* TCommentNewline_DefineEndOfMacro from the Ast and list of tokens in parse_c.
*
* note: I do a +1 somewhere, it's for the unparsing to correctly sync.
*
* note: can't replace mark_end_define by simply a fakeInfo(). The reason
* is where is the \n TCommentSpace. Normally there is always a last token
* to synchronize on, either EOF or the token of the next toplevel.
* In the case of the #define we got in list of token
* [TCommentSpace "\n"; TDefEOL] but if TDefEOL is a fakeinfo then we will
* not synchronize on it and so we will not print the "\n".
* A solution would be to put the TDefEOL before the "\n".
*
* todo?: could put a ExpandedTok for that ?
*)(*****************************************************************************)(* Wrappers *)(*****************************************************************************)letpr2,_pr2_once=Common2.mk_pr2_wrappersFlag.verbose_lexing(*****************************************************************************)(* Helpers *)(*****************************************************************************)letmark_end_defineii=letii'={Parse_info.token=Parse_info.OriginTok{(Parse_info.token_location_of_infoii)withParse_info.str="";Parse_info.charpos=PI.pos_of_infoii+1};transfo=Parse_info.NoTransfo;}in(* fresh_tok *)TCommentNewline_DefineEndOfMacro(ii')letposii=Parse_info.string_of_infoii(*****************************************************************************)(* Parsing hacks for #define *)(*****************************************************************************)(* simple automata:
* state1 --'#define'--> state2 --change_of_line--> state1
*)(* put the TCommentNewline_DefineEndOfMacro at the good place
* and replace \ with TCommentSpace
*)letrecdefine_line_1xs=matchxswith|[]->[]|(TDefineiiasx)::xs->letline=PI.line_of_infoiiinx::define_line_2lineiixs|TCppEscapedNewlineii::xs->pr2(spf"WEIRD: a \\ outside a #define at %s"(posii));(* fresh_tok*)TCommentSpaceii::define_line_1xs|x::xs->x::define_line_1xsanddefine_line_2linelastinfoxs=matchxswith|[]->(* should not happened, should meet EOF before *)pr2"PB: WEIRD in Parsing_hack_define.define_line_2";mark_end_definelastinfo::[]|x::xs->letline'=TH.line_of_tokxinletinfo=TH.info_of_tokxin(matchxwith|EOFii->mark_end_definelastinfo::EOFii::define_line_1xs|TCppEscapedNewlineii->if(line'<>line)thenpr2"PB: WEIRD: not same line number";(* fresh_tok*)TCommentSpaceii::define_line_2(line+1)infoxs|x->ifline'=linethenx::define_line_2lineinfoxselsemark_end_definelastinfo::define_line_1(x::xs))(* put the TIdent_Define and TOPar_Define *)letrecdefine_identxs=matchxswith|[]->[]|(TDefineiiasx)::xs->x::(matchxswith|(TCommentSpace_asx)::TIdent(s,i2)::(* no space *)TOPar(i3)::xs->(* if TOPar_Define is just next to the ident (no space), then
* it's a macro-function. We change the token to avoid
* ambiguity between '#define foo(x)' and '#define foo (x)'
*)x::Hack.fresh_tok(TIdent_Define(s,i2))::Hack.fresh_tok(TOPar_Definei3)::define_identxs|(TCommentSpace_asx)::TIdent(s,i2)::xs->x::Hack.fresh_tok(TIdent_Define(s,i2))::define_identxs|_->pr2(spf"WEIRD #define body, at %s"(posii));define_identxs)|x::xs->x::define_identxs(*****************************************************************************)(* Entry point *)(*****************************************************************************)letfix_tokens_define2xs=define_ident(define_line_1xs)letfix_tokens_definea=Common.profile_code"Hack.fix_define"(fun()->fix_tokens_define2a)