123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195(* Yoann Padioleau
*
* Copyright (C) 2010 Facebook
* Copyright (C) 2019 r2c
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License (GPL)
* version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* file license.txt for more details.
*)openCommonmoduleFlag=Flag_parsingmoduleTH=Token_helpers_pythonmodulePI=Parse_info(*****************************************************************************)(* Prelude *)(*****************************************************************************)(* Lots of copy-paste with my other parsers (e.g. C++, PHP, SQL) but
* copy-paste is sometimes ok.
*)(*****************************************************************************)(* Types *)(*****************************************************************************)typeprogram_and_tokens=Ast_python.programoption*Parser_python.tokenlist(*****************************************************************************)(* Error diagnostic *)(*****************************************************************************)leterror_msg_toktok=Parse_info.error_message_info(TH.info_of_toktok)(*****************************************************************************)(* Lexing only *)(*****************************************************************************)lettokens2file=lettable=Parse_info.full_charpos_to_pos_largefileinCommon.with_open_infilefile(funchan->letlexbuf=Lexing.from_channelchaninletstate=Lexer_python.create()inlettokenlexbuf=matchstate.Lexer_python.modewith|Lexer_python.STATE_TOKEN->Lexer_python.tokenstatelexbuf|Lexer_python.STATE_OFFSET->raise(Impossible)|Lexer_python.STATE_UNDERSCORE_TOKEN->lettok=Lexer_python._tokenstatelexbufin(matchtokwith|Parser_python.TCommentSpace_->()|_->state.Lexer_python.mode<-Lexer_python.STATE_TOKEN);tokinletrectokens_auxacc=lettok=tokenlexbufinif!Flag.debug_lexerthenCommon.pr2_gentok;lettok=tok|>TH.visitor_info_of_tok(funii->{iiwithPI.token=(* could assert pinfo.filename = file ? *)matchii.PI.tokenwith|PI.OriginTokpi->PI.OriginTok(PI.complete_token_location_largefiletablepi)|_->raiseTodo})inifTH.is_eoftokthenList.rev(tok::acc)elsetokens_aux(tok::acc)intokens_aux[])lettokensa=Common.profile_code"Parse_python.tokens"(fun()->tokens2a)(*****************************************************************************)(* Helper for main entry point *)(*****************************************************************************)(* Hacked lex. Ocamlyacc expects a function returning one token at a time
* but we actually lex all the file so we need a wrapper to turn that
* into a stream.
* This function use refs passed by parse. 'tr' means 'token refs'.
*)letreclexer_functiontr=funlexbuf->matchtr.PI.restwith|[]->(pr2"LEXER: ALREADY AT END";tr.PI.current)|v::xs->tr.PI.rest<-xs;tr.PI.current<-v;tr.PI.passed<-v::tr.PI.passed;ifTH.is_commentv(* || other condition to pass tokens ? *)thenlexer_function(*~pass*)trlexbufelsev(*****************************************************************************)(* Main entry point *)(*****************************************************************************)letparse2filename=letstat=Parse_info.default_statfilenamein(* this can throw Parse_info.Lexical_error *)lettoks=tokensfilenameinlettoks_final=toks|>Common.excludeTH.is_specialinlettr=Parse_info.mk_tokens_statetoksinletlexbuf_fake=Lexing.from_function(fun_buf_n->raiseImpossible)intry(* -------------------------------------------------- *)(* Call parser *)(* -------------------------------------------------- *)letxs=Common.profile_code"Parser_python.main"(fun()->Parser_python.main(lexer_functiontr)lexbuf_fake)instat.PI.correct<-(Common.catfilename|>List.length);(Somexs,toks_final),statwithParsing.Parse_error->letcur=tr.PI.currentinifnot!Flag.error_recoverythenraise(PI.Parsing_error(TH.info_of_tokcur));if!Flag.show_parsing_errorthenbeginpr2("parse error \n = "^error_msg_tokcur);letfilelines=Common2.cat_arrayfilenameinletcheckpoint2=Common.catfilename|>List.lengthinletline_error=PI.line_of_info(TH.info_of_tokcur)inParse_info.print_badline_error(0,checkpoint2)filelines;end;stat.PI.bad<-Common.catfilename|>List.length;(None,toks_final),statletparsea=Common.profile_code"Parse_python.parse"(fun()->parse2a)letparse_programfile=let((astopt,_toks),_stat)=parsefileinCommon2.someastopt(*****************************************************************************)(* Sub parsers *)(*****************************************************************************)let(program_of_string:string->Ast_python.program)=funs->Common2.with_tmp_file~str:s~ext:"py"(funfile->parse_programfile)(* for sgrep/spatch *)letany_of_strings=Common2.with_tmp_file~str:s~ext:"py"(funfile->lettoks=tokensfileinlettr=PI.mk_tokens_statetoksinletlexbuf_fake=Lexing.from_function(fun_buf_n->raiseImpossible)in(* -------------------------------------------------- *)(* Call parser *)(* -------------------------------------------------- *)Parser_python.sgrep_spatch_pattern(lexer_functiontr)lexbuf_fake)(*****************************************************************************)(* Fuzzy parsing *)(*****************************************************************************)(*
let parse_fuzzy file =
let toks = tokens file in
let trees = Parse_fuzzy.mk_trees { Parse_fuzzy.
tokf = TH.info_of_tok;
kind = TH.token_kind_of_tok;
} toks
in
trees, toks
*)