123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177(* Yoann Padioleau
*
* Copyright (C) 2010 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License (GPL)
* version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* file license.txt for more details.
*)openCommonmoduleFlag=Flag_parsingmoduleTH=Token_helpers_mlmodulePI=Parse_info(*****************************************************************************)(* Prelude *)(*****************************************************************************)(* Lots of copy paste with my other parsers (e.g. C++, PHP, sql) but
* copy paste is sometimes ok.
*)(*****************************************************************************)(* Types *)(*****************************************************************************)typeprogram_and_tokens=Ast_ml.programoption*Parser_ml.tokenlist(*****************************************************************************)(* Error diagnostic *)(*****************************************************************************)leterror_msg_toktok=Parse_info.error_message_info(TH.info_of_toktok)(*****************************************************************************)(* Lexing only *)(*****************************************************************************)lettokens2file=lettable=Parse_info.full_charpos_to_pos_largefileinCommon.with_open_infilefile(funchan->letlexbuf=Lexing.from_channelchanintryletrectokens_auxacc=lettok=Lexer_ml.tokenlexbufinif!Flag.debug_lexerthenCommon.pr2_gentok;lettok=tok+>TH.visitor_info_of_tok(funii->{iiwithPI.token=matchii.PI.tokenwith|PI.OriginTokpi->PI.OriginTok(PI.complete_token_location_largefiletablepi)|_->raiseTodo})inifTH.is_eoftokthenList.rev(tok::acc)elsetokens_aux(tok::acc)intokens_aux[]with|Lexer_ml.Lexicals->failwith("lexical error "^s^"\n ="^(PI.error_messagefile(PI.lexbuf_to_strposlexbuf)))|e->raisee)lettokensa=Common.profile_code"Parse_ml.tokens"(fun()->tokens2a)(*****************************************************************************)(* Helper for main entry point *)(*****************************************************************************)(* Hacked lex. Ocamlyacc expects a function returning one token at a time
* but we actually lex all the file so we need a wrapper to turn that
* into a stream.
* This function use refs passed by parse. 'tr' means 'token refs'.
*)letreclexer_functiontr=funlexbuf->matchtr.PI.restwith|[]->(pr2"LEXER: ALREADY AT END";tr.PI.current)|v::xs->tr.PI.rest<-xs;tr.PI.current<-v;tr.PI.passed<-v::tr.PI.passed;ifTH.is_commentv(* || other condition to pass tokens ? *)thenlexer_function(*~pass*)trlexbufelsev(*****************************************************************************)(* Main entry point *)(*****************************************************************************)exceptionParse_errorofParse_info.infoletparse2filename=letstat=Parse_info.default_statfilenameinlettoks=tokensfilenameinlettr=Parse_info.mk_tokens_statetoksinletlexbuf_fake=Lexing.from_function(fun_buf_n->raiseImpossible)intry(* -------------------------------------------------- *)(* Call parser *)(* -------------------------------------------------- *)letxs=Common.profile_code"Parser_ml.main"(fun()->iffilename=~".*\\.mli"thenParser_ml.interface(lexer_functiontr)lexbuf_fakeelseParser_ml.implementation(lexer_functiontr)lexbuf_fake)instat.PI.correct<-(Common.catfilename+>List.length);(Somexs,toks),stat(*| Semantic_c.Semantic _ *)with(Lexer_ml.Lexical_|Parsing.Parse_error)asexn->letcur=tr.PI.currentinifnot!Flag.error_recoverythenraise(Parse_error(TH.info_of_tokcur));if!Flag.show_parsing_errorthenbegin(matchexnwith(* Lexical is not anymore launched I think *)|Lexer_ml.Lexicals->pr2("lexical error "^s^"\n ="^error_msg_tokcur)|Parsing.Parse_error->pr2("parse error \n = "^error_msg_tokcur)(* | Semantic_java.Semantic (s, i) ->
pr2 ("semantic error " ^s^ "\n ="^ error_msg_tok tr.current)
*)|_e->raiseImpossible);letfilelines=Common2.cat_arrayfilenameinletcheckpoint2=Common.catfilename+>List.lengthinletline_error=TH.line_of_tokcurinParse_info.print_badline_error(0,checkpoint2)filelines;end;stat.PI.bad<-Common.catfilename+>List.length;(None,toks),statletparsea=Common.profile_code"Parse_ml.parse"(fun()->parse2a)letparse_programfile=let((astopt,_toks),_stat)=parsefileinCommon2.someastopt(*****************************************************************************)(* Fuzzy parsing *)(*****************************************************************************)(* This is similar to what I did for OPA. This is also similar
* to what I do for parsing hacks for C++, but this fuzzy AST can be useful
* on its own, e.g. for a not too bad sgrep/spatch.
*)letparse_fuzzyfile=lettoks=tokensfileinlettrees=Parse_fuzzy.mk_trees{Parse_fuzzy.tokf=TH.info_of_tok;kind=TH.token_kind_of_tok;}toksintrees,toks