123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424(* Joust: a Java lexer, parser, and pretty-printer written in OCaml.
* Copyright (C) 2001 Eric C. Cooper <ecc@cmu.edu>
* Released under the GNU General Public License
*
* Yoann Padioleau:
* 2010 port to the pfff infrastructure.
* 2012 heavily modified to support annotations, generics, enum, foreach, etc
*)(*****************************************************************************)(* Prelude *)(*****************************************************************************)(*
* An AST for Java.
*
* For Java we directly do an AST, as opposed to a CST (Concrete
* Syntax Tree) as in lang_php/. This should be enough for higlight_java.ml
* I think (we just need the full list of tokens + the AST with position
* for the identifiers).
*
* todo:
* - support generic methods (there is support for generic classes though)
* - Look for featherweight Java
* - look for middleweight Java (mentioned in Coccinelle4J paper)
*)(*****************************************************************************)(* The AST java related types *)(*****************************************************************************)(* ------------------------------------------------------------------------- *)(* Token/info *)(* ------------------------------------------------------------------------- *)typetok=Parse_info.t(* with tarzan *)type'awrap='a*tok(* with tarzan *)type'alist1='alist(* really should be 'a * 'a list *)(* with tarzan *)(* ------------------------------------------------------------------------- *)(* Ident, qualifier *)(* ------------------------------------------------------------------------- *)(* for class/interface/enum names, method/field names, type parameter, ... *)typeident=stringwrap(* with tarzan *)(* for package, import, throw specification *)typequalified_ident=identlist(* with tarzan *)(* ------------------------------------------------------------------------- *)(* Types *)(* ------------------------------------------------------------------------- *)typetyp=(* 'void', 'int', and other primitive types; could be merged with TClass *)|TBasicofstringwrap|TClassofclass_type|TArrayoftyp(* class or interface or enum type actually *)andclass_type=(ident*type_argumentlist)list1andtype_argument=|TArgumentofref_type|TQuestionof(bool(* extends|super, true = super *)*ref_type)option(* A ref type should be a class type or an array of whatever, but not a
* primitive type. We don't enforce this invariant in the AST to simplify
* things.
*)andref_type=typ(* with tarzan *)typetype_parameter=|TParamofident*ref_typelist(* extends *)(* with tarzan *)(* ------------------------------------------------------------------------- *)(* Modifiers *)(* ------------------------------------------------------------------------- *)typemodifier=|Public|Protected|Private|Abstract|Static|Final|StrictFP|Transient|Volatile|Synchronized|Native|Annotationofannotationandannotation=name_or_class_type*(annotation_elementoption)andmodifiers=modifierwraplistandannotation_element=|AnnotArgValueofelement_value|AnnotArgPairInitofannotation_pairlist|EmptyAnnotArgandelement_value=|AnnotExprInitofexpr|AnnotNestedAnnotofannotation|AnnotArrayInitofelement_valuelistandannotation_pair=(ident*element_value)andname_or_class_type=identifier_listandidentifier_=|Idofident|Id_then_TypeArgsofident*type_argumentlist|TypeArgs_then_Idoftype_argumentlist*identifier_(* ------------------------------------------------------------------------- *)(* Expressions *)(* ------------------------------------------------------------------------- *)(* When do we need to have a name with actual type_argument?
* For certain calls like List.<Int>of(), which are rare.
* less: do a NameGeneric instead? the type_argument could then be
* only at the end?
*)andname=(type_argumentlist*ident)list1(* Can have nested anon class (=~ closures) in expressions hence
* the use of type ... and ... below
*)andexpr=(* Name is used for local variable, 'this' and 'super' special names,
* and statically computable entities such as Package1.subpackage.Class.
* Field or method accesses should use Dot (see below). Unfortunately
* the Java grammar is ambiguous and without contextual information,
* there is no way to know whether x.y.z is an access to the field z
* of field y of local variable x or the static field z of class y
* in package x. See the note on Dot below.
*)|Nameofname(* This is used only in the context of annotations *)|NameOrClassTypeofname_or_class_type|Literalofliteral(* Xxx.class *)|ClassLiteraloftyp(* the 'decls option' is for anon classes *)|NewClassoftyp*arguments*declsoption(* the int counts the number of [], new Foo[][] => 2 *)|NewArrayoftyp*arguments*int*initoption(* see tests/java/parsing/NewQualified.java *)|NewQualifiedClassofexpr*ident*arguments*declsoption|Callofexpr*arguments(* How is parsed X.y ? Could be a Name [X;y] or Dot (Name [X], y)?
* The static part should be a Name and the more dynamic part a Dot.
* So variable.field and variable.method should be parsed as
* Dot (Name [variable], field|method). Unfortunately
* variable.field1.field2 is currently parsed as
* Dot (Name [variable;field1], field2). You need semantic information
* about variable to disambiguate.
*
* Why the ambiguity? Names and packages are not
* first class citizens, so one cant pass a class/package name as an
* argument to a function, so when have X.Y.z in an expression, the
* last element has to be a field or a method (if it's a class,
* people should use X.Y.class), so it's safe to transform such
* a Name at parsing time in a Dot.
* The problem is that more things in x.y.z can be a Dot, but to know
* that requires semantic information about the type of x and y.
*)|Dotofexpr*ident|ArrayAccessofexpr*expr|UnaryofAst_generic.arithmetic_operator(* +/-/~/! *)wrap*expr|Postfixofexpr*Ast_generic.incr_decrwrap|PrefixofAst_generic.incr_decrwrap*expr|Infixofexpr*Ast_generic.arithmetic_operatorwrap*expr|Castoftyp*expr|InstanceOfofexpr*ref_type|Conditionalofexpr*expr*expr(* ugly java, like in C assignement is an expression not a statement :( *)|Assignofexpr*expr|AssignOpofexpr*Ast_generic.arithmetic_operatorwrap*expr(* sgrep-ext: *)|Ellipsesoftokandliteral=|Intofstringwrap|Floatofstringwrap|Stringofstringwrap|Charofstringwrap|Boolofboolwrap|Nulloftokandarguments=exprlist(* todo: split in more precise *)andop=string(* ------------------------------------------------------------------------- *)(* Statements *)(* ------------------------------------------------------------------------- *)andstmt=|Empty(* could be Block [] *)|Blockofstmts|Exprofexpr|Ifofexpr*stmt*stmt|Switchofexpr*(cases*stmts)list|Whileofexpr*stmt|Doofstmt*expr|Foroffor_control*stmt|Breakofidentoption|Continueofidentoption|Returnofexproption|Labelofident*stmt|Syncofexpr*stmt|Tryofstmt*catches*stmtoption|Throwofexpr(* decl as statement *)|LocalVarofvar_with_init|LocalClassofclass_decl(* javaext: http://java.sun.com/j2se/1.4.2/docs/guide/lang/assert.html *)|Assertofexpr*exproption(* assert e or assert e : e2 *)andstmts=stmtlistandcase=|Caseofexpr|Defaultandcases=caselistandfor_control=|ForClassicoffor_init*exprlist*exprlist|Foreachofvar*exprandfor_init=|ForInitVarsofvar_with_initlist|ForInitExprsofexprlistandcatch=var*stmtandcatches=catchlist(* ------------------------------------------------------------------------- *)(* variable (local var, parameter) declaration *)(* ------------------------------------------------------------------------- *)andvar={v_name:ident;v_mods:modifiers;v_type:typ;}andvars=varlist(* less: could be merged with var *)andvar_with_init={f_var:var;f_init:initoption}(* less: could merge with expr *)andinit=|ExprInitofexpr|ArrayInitofinitlist(* ------------------------------------------------------------------------- *)(* Methods, fields *)(* ------------------------------------------------------------------------- *)(* method or constructor *)andmethod_decl={(* m_var.v_type is a (TBasic void) for a constructor *)m_var:var;(* the var.v_mod in params can only be Final or Annotation *)m_formals:vars;m_throws:qualified_identlist;(* todo: m_tparams *)(* Empty for methods in interfaces.
* For constructor the first stmts can contain
* explicit_constructor_invocations.
*)m_body:stmt}andfield=var_with_init(* ------------------------------------------------------------------------- *)(* Enum *)(* ------------------------------------------------------------------------- *)andenum_decl={en_name:ident;en_mods:modifiers;en_impls:ref_typelist;en_body:enum_constantlist*decls;}andenum_constant=|EnumSimpleofident(* http://docs.oracle.com/javase/1.5.0/docs/guide/language/enums.html *)|EnumConstructorofident*arguments|EnumWithMethodsofident*method_decllist(* ------------------------------------------------------------------------- *)(* Class/Interface *)(* ------------------------------------------------------------------------- *)andclass_decl={cl_name:ident;cl_kind:class_kind;cl_tparams:type_parameterlist;cl_mods:modifiers;(* always at None for interface *)cl_extends:typoption;(* for interface this is actually the extends *)cl_impls:ref_typelist;(* the methods body are always empty for interface *)cl_body:decls}andclass_kind=ClassRegular|Interface(* ------------------------------------------------------------------------- *)(* Decls *)(* ------------------------------------------------------------------------- *)anddecl=|Classofclass_decl|Methodofmethod_decl|Fieldoffield|Enumofenum_decl|Initofbool(* static *)*stmtanddecls=decllist(* with tarzan *)(* ------------------------------------------------------------------------- *)(* The toplevel elements *)(* ------------------------------------------------------------------------- *)typecompilation_unit={package:qualified_identoption;(* The qualified ident can also contain "*" at the very end.
* The bool is for static import (javaext:)
*)imports:(bool*qualified_ident)list;(* todo? necessarily a (unique) class first? *)decls:decls;}(* with tarzan *)typeprogram=compilation_unit(* with tarzan *)(*****************************************************************************)(* Any *)(*****************************************************************************)typeany=|AIdentofident|AExprofexpr|AStmtofstmt|ATypoftyp|AVarofvar|AInitofinit|AMethodofmethod_decl|AFieldoffield|AClassofclass_decl|ADeclofdecl|AProgramofprogram(* with tarzan *)(*****************************************************************************)(* Helpers *)(*****************************************************************************)letunwrap=fstletfakeInfo?(next_to=None)str={Parse_info.token=Parse_info.FakeTokStr(str,next_to);transfo=Parse_info.NoTransfo;}letast_todo=[]letast_todo2=()letinfo_of_identident=sndidentletis_finalxs=letxs=List.mapfstxsinList.memFinalxsletis_final_staticxs=letxs=List.mapfstxsinList.memFinalxs&&List.memStaticxsletrecinfo_of_identifier_(id:identifier_):tok=matchidwith|Idid|Id_then_TypeArgs(id,_)->sndid|TypeArgs_then_Id(_,id_)->info_of_identifier_id_