123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687(*
Monomorphization of type expressions.
The goal is to inline each parametrized type definition as much as possible,
allowing code generators to create more efficient code directly:
type ('a, 'b) t = [ Foo of 'a | Bar of 'b ]
type int_t = (int, int) t
becomes:
type int_t = _1
type _1 = [ Foo of int | Bar of int ]
A secondary goal is to factor out type subexpressions in order for
the code generators to produce less code:
type x = { x : int list }
type y = { y : int list option }
becomes:
type x = { x : _1 }
type y = { y : _2 }
type _1 = int list (* `int list' now occurs only once *)
type _2 = _1 option
By default, only parameterless type definitions are returned.
The [keep_poly] option allows to return parametrized type definitions as
well.
Input:
type 'a abs = abstract
type int_abs = int abs
type 'a tree = [ Leaf of 'a | Node of ('a tree * 'a tree) ]
type t = int tree
type x = [ Foo | Bar ] tree
Output (pseudo-syntax where quoted strings indicate unique type identifiers):
type "int abs" = int abs
type int_abs = "int abs"
type 'a tree = [ Leaf of 'a | Node of ('a tree * 'a tree) ]
(* only if keep_poly = true *)
type "int tree" = [ Leaf of int | Node of ("int tree" * "int tree") ]
type t = "int tree"
type "[ Foo | Bar ] tree" =
[ Leaf of [ Foo | Bar ]
| Node of ("[ Foo | Bar ] tree" * "[ Foo | Bar ] tree") ]
type x = "[ Foo | Bar ] tree"
*)openImportopenAstmoduleS=Stdlib.Set.Make(String)moduleM=Stdlib.Map.Make(String)(*
To support -o-name-overlap, we need to generate a few type annotations.
But types generated by expansion like _1, _2, etc. are not actually
written out in the interface or implementation, so they must be mapped
back to the original polymorphic types for annotation purposes.
This table contains the mappings. Its format is:
key = generated type name
value = (original type name,
original number of parameters)
For example, if we have the generated output:
type 'a t = ...
type _1 = int t
Then the idea is, in the reader and writer functions, instead of using
_1 in the annotation, we use _ t. The entry in original_types would be:
("_1", ("t", 1))
(The alternate strategy of actually producing a definition for type _1
aliasing int t in the implementation doesn't work, because the annotations
will disagree with the interface in the case of recursive types.)
*)typeoriginal_types=(string,string*int)Hashtbl.t(*
Format of the table:
key = type name (without arguments)
value = (order in the file,
number of parameters,
original annotations of the right-hand type expression,
original type definition,
rewritten type definition)
Every entry has an original type definition except the predefined
atoms (int, string, etc.) and newly-created type definitions
(type _1 = ...).
*)letinit_table()=letseqnum=ref0inlettbl=Hashtbl.create20inList.iter(fun(k,n,opt_td)->incrseqnum;Hashtbl.addtblk(!seqnum,n,opt_td,None))Predef.list;seqnum,tblletrecmapvar_expr(f:string->string)(x:Ast.type_expr):Ast.type_expr=matchxwithSum(loc,vl,a)->Sum(loc,List.map(mapvar_variantf)vl,a)|Record(loc,fl,a)->Record(loc,List.map(mapvar_fieldf)fl,a)|Tuple(loc,tl,a)->Tuple(loc,List.map(fun(loc,x,a)->(loc,mapvar_exprfx,a))tl,a)|List(loc,t,a)->List(loc,mapvar_exprft,a)|Name(loc,(loc2,"list",[t]),a)->Name(loc,(loc2,"list",[mapvar_exprft]),a)|Option(loc,t,a)->Option(loc,mapvar_exprft,a)|Name(loc,(loc2,"option",[t]),a)->Name(loc,(loc2,"option",[mapvar_exprft]),a)|Nullable(loc,t,a)->Nullable(loc,mapvar_exprft,a)|Name(loc,(loc2,"nullable",[t]),a)->Name(loc,(loc2,"nullable",[mapvar_exprft]),a)|Shared(loc,t,a)->Shared(loc,mapvar_exprft,a)|Name(loc,(loc2,"shared",[t]),a)->Name(loc,(loc2,"shared",[mapvar_exprft]),a)|Wrap(loc,t,a)->Wrap(loc,mapvar_exprft,a)|Name(loc,(loc2,"wrap",[t]),a)->Name(loc,(loc2,"wrap",[mapvar_exprft]),a)|Tvar(loc,s)->Tvar(loc,fs)|Name(loc,(loc2,k,args),a)->Name(loc,(loc2,k,List.map(mapvar_exprf)args),a)andmapvar_fieldf=function`Field(loc,k,t)->`Field(loc,k,mapvar_exprft)|`Inherit(loc,t)->`Inherit(loc,mapvar_exprft)andmapvar_variantf=function|Variant(loc,k,opt_t)->Variant(loc,k,(Option.map(mapvar_exprf)opt_t))|Inherit(loc,t)->Inherit(loc,mapvar_exprft)letvar_of_inti=letletter=imod26inletnumber=i/26inletprefix=String.make1(Char.chr(letter+Char.code'a'))inifnumber=0thenprefixelseprefix^string_of_intnumberletvars_of_intn=List.initnvar_of_intletis_specials=String.lengths>0&&s.[0]='@'(*
Standardize a type expression by numbering the type variables
using the order in which they are encountered.
input:
(int, 'b, 'z) foo
output:
- new_name: "@(int, 'a, 'b) foo"
- new_args: [ 'b; 'z ]
- new_env: [ ('b, 'a); ('z, 'b) ]
new_name and new_args constitute the type expression that replaces the
original one:
(int, 'b, 'z) foo --> ('b, 'z) "@(int, 'a, 'b) foo"
new_env allows the substitution of the type variables of the original
type expression into the type variables defined by the new type definition.
*)letmake_type_namelocorig_nameargsan=lettbl=Hashtbl.create10inletn=ref0inletmapping=ref[]inletassign_names=tryHashtbl.findtblswithNot_found->letname=var_of_int!ninmapping:=(s,name)::!mapping;incrn;nameinletnormalized_args=List.map(mapvar_exprassign_name)argsinletnew_name=sprintf"@(%s)"(Print.string_of_type_nameorig_namenormalized_argsan)inletmapping=List.rev!mappinginletnew_args=List.map(fun(old_s,_)->Tvar(loc,old_s))mappinginletnew_env=List.map(fun(old_s,new_s)->old_s,Tvar(loc,new_s))mappinginnew_name,new_args,new_envletis_abstract(x:type_expr)=matchxwithName(_,(_,"abstract",_),_)->true|_->falseletexpr_of_lvaluelocnameparamannot=Name(loc,(loc,name,List.map(funs->Tvar(loc,s))param),annot)letis_cycliclnamet=matchtwithName(_,(_,rname,_),_)->lname=rname|_->falseletis_tvar=functionTvar_->true|_->falseletadd_annot(x:type_expr)a:type_expr=Ast.map_annot(funa0->Annot.merge(a@a0))xletexpand?(keep_builtins=false)?(keep_poly=false)(l:type_deflist):type_deflist*original_types=letseqnum,tbl=init_table()inletoriginal_types=Hashtbl.create16inletrecsubstenv(t:type_expr):type_expr=matchtwithSum(loc,vl,a)->Sum(loc,List.map(subst_variantenv)vl,a)|Record(loc,fl,a)->Record(loc,List.map(subst_fieldenv)fl,a)|Tuple(loc,tl,a)->Tuple(loc,List.map(fun(loc,x,a)->(loc,substenvx,a))tl,a)|List(locasloc2,t,a)|Name(loc,(loc2,"list",[t]),a)->lett'=substenvtinifkeep_builtinsthenName(loc,(loc2,"list",[t']),a)elsesubst_type_namelocloc2"list"[t']a|Option(locasloc2,t,a)|Name(loc,(loc2,"option",[t]),a)->lett'=substenvtinifkeep_builtinsthenName(loc,(loc2,"option",[t']),a)elsesubst_type_namelocloc2"option"[t']a|Nullable(locasloc2,t,a)|Name(loc,(loc2,"nullable",[t]),a)->lett'=substenvtinifkeep_builtinsthenName(loc,(loc2,"nullable",[t']),a)elsesubst_type_namelocloc2"nullable"[t']a|Shared(locasloc2,t,a)|Name(loc,(loc2,"shared",[t]),a)->lett'=substenvtinifkeep_builtinsthenName(loc,(loc2,"shared",[t']),a)elsesubst_type_namelocloc2"shared"[t']a|Wrap(locasloc2,t,a)|Name(loc,(loc2,"wrap",[t]),a)->lett'=substenvtinifkeep_builtinsthenName(loc,(loc2,"wrap",[t']),a)elsesubst_type_namelocloc2"wrap"[t']a|Tvar(_,s)asx->Option.value(List.assocsenv)~default:x|Name(loc,(loc2,name,args),a)->letargs'=List.map(substenv)argsinifList.for_allis_tvarargs'thenName(loc,(loc2,name,args'),a)elsesubst_type_namelocloc2nameargs'aandsubst_type_namelocloc2nameargsan=(*
Reduce the number of arguments of the type by creating
an intermediate type, e.g.:
('x, int) t becomes 'x "('a, int) t"
and the following type is created:
type 'a "('a, int) t" = ...
input:
- type name with arguments expressed in the environment where the
type expression was extracted
- annotations for that type expression
output:
- equivalent type expression valid in the same environment
side-effects:
- creation of a type definition for the output type expression.
*)letnew_name,new_args,new_env=make_type_nameloc2nameargsaninletn_param=List.lengthnew_envinifnot(Hashtbl.memtblnew_name)thencreate_type_deflocnameargsnew_envnew_namen_paraman;(*
Return new type name with new arguments.
The annotation has been transferred to the right-hand
expression of the new type definition.
*)Name(loc,(loc2,new_name,new_args),[])andcreate_type_deflocorig_nameorig_argsenvnamen_paraman0=(*
Create the type definition needed to support the new type name
[name] expecting [n_param] parameters.
The right-hand side of the definition is obtained by looking up the
definition for type [orig_name]:
type ('a, 'b) t = [ Foo of 'a | Bar of 'b ]
type 'c it = (int, 'c) t
output:
type ('a, 'b) t = [ Foo of 'a | Bar of 'b ]
type 'a _1 = [ Foo of int | Bar of 'a ] (* new name = _1, n_param = 1 *)
type 'c it = 'c _1
*)incrseqnum;leti=!seqnumin(* Create entry in the table, indicating that we are working on it *)Hashtbl.addtblname(i,n_param,None,None);Hashtbl.addoriginal_typesname(orig_name,List.lengthorig_args);(* Get the original type definition *)let(_,_,orig_opt_td,_)=tryHashtbl.findtblorig_namewithNot_found->assertfalse(* All original type definitions must
have been put in the table initially *)inlet((_,_,_)astd')=matchorig_opt_tdwithNone->assertfalse(* Original type definitions must all exist,
even for predefined types and abstract types. *)|Some(_,(k,pl,def_an),t)->assert(k=orig_name);letnew_params=vars_of_intn_paraminlett=add_annottan0inlett=set_type_expr_locloctin(*
First replace the type expression being specialized
(orig_name, orig_args) by the equivalent expression
in the new environment (variables 'a, 'b, ...)
(int, 'b) foo --> (int, 'a) foo
*)letargs=List.map(substenv)orig_argsin(*
Then expand the expression into its definition,
replacing each variable by the actual argument:
original definition:
type ('x, 'y) foo = [ Foo of 'x | Bar of 'y ]
new definition:
type 'a _1 = ...
right-hand expression becomes:
[ Foo of int | Bar of 'a ]
using the following environment:
'x -> int
'y -> 'a
*)letenv=List.map2(funvarvalue->(var,value))plargsinlett'=ifis_abstracttthen(*
e.g.: type 'a t = abstract
use 'a t and preserve "t"
*)lett=expr_of_lvaluelocorig_namepl(Ast.annot_of_type_exprt)insubst_only_argsenvtelselett'=substenvtinifis_cyclicnamet'thensubst_only_argsenvtelset'in(loc,(name,new_params,def_an),t')inHashtbl.replacetblname(i,n_param,None,Sometd')andsubst_fieldenv=function|`Field(loc,k,t)->`Field(loc,k,substenvt)|`Inherit(loc,t)->`Inherit(loc,substenvt)andsubst_variantenv=functionVariant(loc,k,opt_t)asx->(matchopt_twithNone->x|Somet->Variant(loc,k,Some(substenvt)))|Inherit(loc,t)->Inherit(loc,substenvt)andsubst_only_argsenv=functionList(loc,t,a)|Name(loc,(_,"list",[t]),a)->List(loc,substenvt,a)|Option(loc,t,a)|Name(loc,(_,"option",[t]),a)->Option(loc,substenvt,a)|Nullable(loc,t,a)|Name(loc,(_,"nullable",[t]),a)->Nullable(loc,substenvt,a)|Shared(loc,t,a)|Name(loc,(_,"shared",[t]),a)->Shared(loc,substenvt,a)|Wrap(loc,t,a)|Name(loc,(_,"wrap",[t]),a)->Wrap(loc,substenvt,a)|Name(loc,(loc2,name,args),an)->Name(loc,(loc2,name,List.map(substenv)args),an)|_->assertfalsein(* first pass: add all original definitions to the table *)List.iter(fun((_,(k,pl,_),_)astd)->incrseqnum;leti=!seqnuminletn=List.lengthplinHashtbl.addtblk(i,n,Sometd,None))l;(* second pass: perform substitutions and insert new definitions *)List.iter(fun((loc,(k,pl,a),t)astd)->ifpl=[]||keep_polythen(let(i,n,_,_)=tryHashtbl.findtblkwithNot_found->assertfalseinlett'=subst[]tinlettd'=(loc,(k,pl,a),t')inHashtbl.replacetblk(i,n,Sometd,Sometd')))l;(* third pass: collect all parameterless definitions *)letl=Hashtbl.fold(fun_(i,n,_,opt_td')l->matchopt_td'withNone->l|Sometd'->ifn=0||keep_polythen(i,td')::lelsel)tbl[]inletl=List.sort(fun(i,_)(j,_)->compareij)lin(List.mapsndl,original_types)letreplace_type_names(subst:string->string)(t:type_expr):type_expr=letrecreplace(t:type_expr):type_expr=matchtwithSum(loc,vl,a)->Sum(loc,List.mapreplace_variantvl,a)|Record(loc,fl,a)->Record(loc,List.mapreplace_fieldfl,a)|Tuple(loc,tl,a)->Tuple(loc,List.map(fun(loc,x,a)->loc,replacex,a)tl,a)|List(loc,t,a)->List(loc,replacet,a)|Option(loc,t,a)->Option(loc,replacet,a)|Nullable(loc,t,a)->Nullable(loc,replacet,a)|Shared(loc,t,a)->Shared(loc,replacet,a)|Wrap(loc,t,a)->Wrap(loc,replacet,a)|Tvar(_,_)ast->t|Name(loc,(loc2,k,l),a)->Name(loc,(loc2,substk,List.mapreplacel),a)andreplace_field=function`Field(loc,k,t)->`Field(loc,k,replacet)|`Inherit(loc,t)->`Inherit(loc,replacet)andreplace_variant=functionVariant(loc,k,opt_t)asx->(matchopt_twithNone->x|Somet->Variant(loc,k,Some(replacet)))|Inherit(loc,t)->Inherit(loc,replacet)inreplacet(* Prefer MD5 over Hashtbl.hash because it won't change. *)lethex_hash_strings=Digest.strings|>Digest.to_hex|>funs->String.subs07(*
Remove punctuation and non-ascii symbols from a name and replace them
with underscores. The original case is preserved.
The result is of the form [A-Za-z][A-Za-z0-9_]+.
Example:
"@((@(bool wrap_) * type_) option)" -> "bool_wrap_type_option"
The original name can contain ATD annotations. It would be nice to
ignore them but it's not clear how. Ideally we want this:
"@(string list <ocaml valid='fun l -> true'>)"
-> "string_list"
But we get this:
"true_6a9832c"
Since it's misleading, when we see a suspected annotation, we
use just "x" followed by a hash of the original contents.
The hash has the property of making the name stable i.e. it is unlikely
to change when unrelated type definitions change.
"x_6a9832c"
*)letsuggest_good_name=letrex=Re.Pcre.regexp"([^a-zA-Z0-9])+"infunname_with_punct->letcomponents=Re.Pcre.split~rexname_with_punct|>List.filter((<>)"")inletfull_name=String.concat"_"componentsinlethash=hex_hash_stringfull_nameinletname=ifString.containsname_with_punct'<'then(* Avoid misleading names to due ATD annotations embedded in the
type name. See earlier comments. *)"x_"^hashelseifList.lengthcomponents>5then(* Avoid insanely long type names *)matchList.revcomponentswith|[]->assertfalse|[_]->assertfalse|main::rev_details->(* Place the hash after the main name rather than before because
it often starts with a digit which would have to be prefixed
by an extra letter so it can be a valid name. *)main^"_"^hashelse(* A full name that's not too long and makes sense such as
'int_bracket' for the type 'int bracket'. *)String.concat"_"componentsin(* Ensure the name starts with a letter. *)ifname=""then"x"elsematchname.[0]with|'a'..'z'|'A'..'Z'->name|_(* digit *)->"x"^nameletstandardize_type_names~prefix~original_types(defs:type_deflist):type_deflist=letreserved_identifiers=List.map(fun(k,_,_)->k)Predef.list@List.filter_map(fun(_,(k,_,_),_)->ifis_specialkthenNoneelseSomek)defsinletname_registry=Unique_name.init~reserved_identifiers~reserved_prefixes:[]~safe_prefix:""in(* The value v of the type is for extracting a good, short fallback name *)letnew_idid=(* The leading underscore is used to identify generated type names
in other places. *)Unique_name.translatename_registry~preferred_translation:(prefix^suggest_good_nameid)idinletreplace_namek=ifis_specialkthenletk'=new_idkinbegintryletorig_info=Hashtbl.findoriginal_typeskinHashtbl.removeoriginal_typesk;Hashtbl.addoriginal_typesk'orig_infowithNot_found->assertfalse(* Must have been added during expand *)end;k'elsekinletdefs=List.map(fun(loc,(k,pl,a),t)->letk'=replace_namekin(loc,(k',pl,a),t))defsinletsubstid=matchUnique_name.translate_onlyname_registryidwith|Somex->x|None->(* must have been defined as abstract *)idinList.map(fun(loc,x,t)->(loc,x,replace_type_namessubstt))defsletexpand_module_body?(prefix="_")?keep_builtins?keep_poly?(debug=false)l=lettd_list=List.map(function(Typetd)->td)linlet(td_list,original_types)=expand?keep_builtins?keep_polytd_listinlettd_list=ifdebugthentd_listelsestandardize_type_names~prefix~original_typestd_listin(List.map(funtd->(Typetd))td_list,original_types)