123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294(*s: unit_parsing_php.ml *)openCommonopenOUnitopenCst_phpmoduleAst=Cst_phpmoduleFlag=Flag_parsing(*****************************************************************************)(* Helpers *)(*****************************************************************************)(* old:
* Back when the PHP parser was quite fragile we used to do some error
* recovery in case of a parse error, and instead of failing hard we
* were returning a NotParsedCorrectly toplevel element. Now
* we fail hard because the PHP parser is better. So the function below
* is not useful anymore:
*
* let assert_no_parser_error ast =
* assert_bool "bad: have a NotParsedCorrectly"
* (List.for_all (function NotParsedCorrectly _ -> false | _ -> true) ast);
* ()
*)(*****************************************************************************)(* Unit tests *)(*****************************************************************************)letunittest="parsing_php">:::[(*-----------------------------------------------------------------------*)(* Lexing *)(*-----------------------------------------------------------------------*)"lexing regular code">::(fun()->lettoks=Parse_php.tokens_of_string"echo 1+2;"inassert_bool"it should have a Echo token"(toks|>List.exists(functionParser_php.T_ECHO_->true|_->false)););"lexing and case sensitivity">::(fun()->lettoks=Parse_php.tokens_of_string"function foo() { echo __function__; }"inassert_bool"it should have a __FUNCTION__ token"(toks|>List.exists(functionParser_php.T_FUNC_C_->true|_->false)););(*-----------------------------------------------------------------------*)(* Parsing *)(*-----------------------------------------------------------------------*)"parsing regular code">::(fun()->let_ast=Parse_php.program_of_string"echo 1+2;"in());(* had such a bug one day ... *)"parsing empty comments">::(fun()->let_ast=Parse_php.program_of_string"$a/**/ =1;"in());"rejecting bad code">::(fun()->Flag.show_parsing_error:=false;trylet_=Parse_php.program_of_string"echo 1+"inassert_failure"it should have thrown a Parse_error exception"withParse_php.Parse_error_->()(* old:
* The PHP parser does not return an exception when a PHP file contains
* an error, to allow some form of error recovery by not stopping
* at the first mistake. Instead it returns a NotParsedCorrectly
* AST toplevel element for parts of the code that were not parsed.
* Here we check that correctly formed code do not contain such
* NotParsedCorrectly element.
*
* assert_bool "bad: should have a NotParsedCorrectly"
* (List.exists (function NotParsedCorrectly _ -> true | _ -> false) ast)
*));"rejecting variadic param with default">::(fun()->Flag.show_parsing_error:=false;trylet_=Parse_php.program_of_string"function foo($x, ...$rest=123) {}"inassert_failure"it should have thrown a Parse_error exception"withParse_php.Parse_error_->());"rejecting multiple variadic params">::(fun()->Flag.show_parsing_error:=false;trylet_=Parse_php.program_of_string"function foo($x, ...$rest, ...$another) {}"inassert_failure"it should have thrown a Parse_error exception"withParse_php.Parse_error_->());"rejecting non-tail variadic param without variable name">::(fun()->Flag.show_parsing_error:=false;trylet_=Parse_php.program_of_string"function foo($x, ..., ...$rest) {}"inassert_failure"it should have thrown a Parse_error exception"withParse_php.Parse_error_->());"rejecting ellipsis with optional constructs">::(fun()->Flag.show_parsing_error:=false;trylet_=Parse_php.program_of_string"function foo(int ...) {}"inassert_failure"it should have thrown a Parse_error exception"withParse_php.Parse_error_->());"regression files">::(fun()->letdir=Filename.concatConfig_pfff.path"/tests/php/parsing"inletfiles=Common2.glob(spf"%s/*.php"dir)infiles|>List.iter(funfile->trylet_=Parse_php.parse_programfilein()withParse_php.Parse_error_->assert_failure(spf"it should correctly parse %s"file)));(*-----------------------------------------------------------------------*)(* XHP *)(*-----------------------------------------------------------------------*)"parsing xhp code">::(fun()->(* old:
* The PHP parser now understands PHP code containing XHP elements.
* In the past, pfff would call a preprocessor before parsing a file. By
* setting the preprocessor to "xhpize", the XHP command line
* preprocessor, we could then parse the regular preprocessed code.
* Now pfff can directly parse XHP code.
*
* Flag_parsing_php.pp_default := Some "xhpize";
*)let_ast=Parse_php.program_of_string"return <x:frag />;"inlet_ast=Parse_php.program_of_string"return $this->foo()[2];"in());(* XHP was mainly a preprocessor to allow embbeding HTML-like tags in
* PHP. It also fixes some limitations of the original PHP grammar
* regarding array access. You can do foo()['fld'] in XHP, which is
* not allowed in PHP (for stupid reasons IMHO).
* The pfff PHP parser must handle this syntactic sugar too.
*)"parsing xhp fn_idx sugar code">::(fun()->let_ast=Parse_php.program_of_string"return foo()[2];"in(* If the rule is added in the wrong place in the grammar, then
* the previous test will work but not this one.
*)let_ast=Parse_php.program_of_string"return $this->foo()[2];"in());(*-----------------------------------------------------------------------*)(* Types *)(*-----------------------------------------------------------------------*)"sphp">::(fun()->lettx=trylet_=Parse_php.program_of_stringxin()withParse_php.Parse_error_->assert_failure(spf"it should correctly parse %s"x)int"class A<T> { }";t"class A<T1, T2> { }";t"trait A<T1, T2> { }";t"interface A<T1, T2> { }";t"class A<T> extends B<int> { }";t"interface A extends B<int>, C {}";t"class A { use B<int>; }";t"function foo(): int { }";t"class A { public function foo(): int { }}";t"function foo(mixed $x): int { }";t"function foo(): void { }";t"function id<T>(T $x): T { return $x; }";t"function id((A, B) $x): T { return $x; }";t"function id(?(A, B) $x): ?int { return $x; }";t"function id( (function(?A) : int) $x): int { return $x; }";t"function id( (function() : int) $x): int { }";t"function test(int $x) { return 0; }";t"class A { private ?(int, int) $x; }";t"class A { const ?A<T1, T2> X = 0; }";t"$x = function(): ?int { return null; };";t"function foo(A<A<int>> $x): ?int { return null; };";t"class A { public static function foo<T>(): ?int { } }";);(*-----------------------------------------------------------------------*)(* Misc *)(*-----------------------------------------------------------------------*)(* Check that the visitor implementation correctly visit all AST
* subelements, even when they are deep inside the AST tree (e.g.
* sub-sub expressions inside parenthesis).
*)"visitor">::(fun()->letast=Parse_php.program_of_string"echo 1+2+(3+4);"inletcnt=ref0in(* This is very tricky. See docs/manual/Parsing_php.pdf section
* 2.1.2 for a tutorial on visitors in OCaml. *)lethooks={Visitor_php.default_visitorwithVisitor_php.kexpr=(fun(k,_)e->matchewith|Sc_->incrcnt|_->ke)}inletvisitor=Visitor_php.mk_visitorhooksinvisitor(Programast);assert_equal4!cnt;);"checking column numbers">::(fun()->(* See bug reported by dreiss, because the lexer had a few todos
* regarding objects. *)lete=Parse_php.expr_of_string"$o->foo"inmatchewith|ObjGet(_v,_tok,Idname)->letinfo=Ast.info_of_namenameinassert_equal4(Parse_info.col_of_infoinfo)|_->assert_failure"not good AST");(*-----------------------------------------------------------------------*)(* Sgrep *)(*-----------------------------------------------------------------------*)"parsing sgrep expressions">::(fun()->let_e=Parse_php.any_of_string"debug_rlog(1)"inassert_bool"it should not generate an error"true;let_e=Parse_php.any_of_string"debug_rlog(X)"inassert_bool"it should not generate an error"true;let_e=Parse_php.any_of_string"debug_rlog(X, 0)"inassert_bool"it should not generate an error"true;(trylet_e=Common.save_excursionFlag.show_parsing_errorfalse(fun()->Parse_php.any_of_string"debug_rlog(X, 0")inassert_failure"it should generate an error"with_exn->()););"parsing sgrep patterns">::(fun()->letany=Parse_php.any_of_string"foo();"inletok=matchanywithStmt2(ExprStmt(_))->true|_->falseinassert_bool"it should be the AST of a statement"ok;letany=Parse_php.any_of_string"foo()"inletok=matchanywithExpr(_)->true|_->falseinassert_bool"it should be the AST of an expression"ok;letany=Parse_php.any_of_string"<x:frag>x</x:frag>"inletok=matchanywithExpr(_)->true|_->falseinassert_bool"it should be the AST of an expression"ok;);(* todo:
* - ? sexp and json output
* - ? correctness of Ast (too many cases)
*)](*e: unit_parsing_php.ml *)