123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627(*
Copyright 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2017 Anton Lavrik
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*)moduleC=Piqi_commonopenCmoduleL=Piq_lexer(* tokenize '[.:]'-separated string; return separator character as string
* between separated tokens;
*
* Also, treat '.' character inside domain name part of typenames as a normal
* name character
*)lettokenize_name?starts=letl=U.string_splits'.'?startinmatchlwith|h::t->h::(U.flatmap(funx->[".";x])t)|_->assertfalselettokenize_typenames=letstart=(* tokenize only the rightmost part of the pathname *)tryString.rindexs'/'+1withNot_found->0inletnames=tokenize_names~startin":"::nameslettokenize_namefirst_cs=letparts=U.string_splits':'inmatchfirst_cwith|":"->U.flatmaptokenize_typenameparts|"."->(matchpartswith|h::t->"."::(tokenize_nameh)@(U.flatmaptokenize_typenamet)|_->assertfalse)|_->assertfalseletcheck_namen=(* XXX: this should refer to piq rather than piqi name *)ifPiqi_name.is_valid_namen~allow:"."then()elseerrorn("invalid name: "^U.quoten)letcheck_typenamen=ifPiqi_name.is_valid_typenamen~allow:"."then()elseerrorn("invalid type name: "^U.quoten)letpiq_addrefretdst(src:piq_ast)=letfiner_src=Piqloc.addrefdstiner_src;Piqloc.addrefretdstsrc;inmatchsrcwith|`intx->fx|`uintx->fx|`floatx->fx|`boolx->fx|`stringx->fx|`binaryx->fx|`wordx->fx|`textx->fx|`namex->fx|`typenamex->fx|`namedx->fx|`typedx->fx|`listx->fx|`formx->fx|`raw_stringx->fx|`any_->assertfalseletpiq_referencefx=letres=fxinifObj.reprres==Obj.reprx(* the object is unchanged -- nothing to do *)thenreselsepiq_addrefretxresletmake_namednv:piq_ast=check_namen;matchvwith|None->`namen|Somev->letres=Piq_ast.Named.({name=n;value=v})inPiqloc.addrefnres;`namedresletmake_typednv:piq_ast=check_typenamen;matchvwith|None->`typenamen|Somev->letres=Piq_ast.Typed.({typename=n;value=v})inPiqloc.addrefnres;`typedresletmake_named_or_typedcnamenv=Piqloc.addrefnamen;letres=matchcwith|"."->make_namednv|":"->make_typednv|_->assertfalseinPiqloc.addrefretnameresletexpand_nameobjcnamevalue=ifnot(String.containsname'.')&¬(String.containsname':')thenobjelseletrecaux=function|[c;n]->make_named_or_typedcnamenvalue|c::n::t->letv=auxtinmake_named_or_typedcnamen(Somev)|_->assertfalseinaux(tokenize_namecname)letexpand_obj_names(obj:piq_ast):piq_ast=letexp=expand_nameobjinmatchobjwith|`namex->exp"."xNone|`typenamex->exp":"xNone|`namedx->exp"."x.Piq_ast.Named.name(Somex.Piq_ast.Named.value)|`typedx->exp":"x.Piq_ast.Typed.typename(Somex.Piq_ast.Typed.value)|x->xletexpand_obj_names=piq_referenceexpand_obj_names(* XXX: don't create new objects if included objects are not modified *)letexpand_names(x:piq_ast):piq_ast=letrecaux0obj=matchobjwith|`named({Piq_ast.Named.name=n;Piq_ast.Named.value=v}asnamed)->letv'=auxvinifv'!=vthennamed.Piq_ast.Named.value<-v';expand_obj_namesobj|`typed({Piq_ast.Typed.typename=n;Piq_ast.Typed.value=ast}astyped)->letast'=auxastinifast'!=ast(* changed? *)thentyped.Piq_ast.Typed.value<-ast';expand_obj_namesobj|`listl->`list(List.mapauxl)|`form(name,args)->(* at this stage, after we've run expand_splices, this can not be a
* named or typed form, so leaving name without a transformation *)`form(name,List.mapauxargs)|_->expand_obj_namesobjandauxobj=piq_referenceaux0objinauxx(* rewrite the ast to expand some of forms (...) such as
forms affecting parsing associativity, e.g.:
.foo (.bar)
.foo (:bar baz)
abbreviations for repeated fields, e.g.
(.foo a b c) -> .foo a .foo b .foo c
NOTE: we perform expansion here rather than during original parsing since we
want to preserve the original formatting of forms to be able to pretty-print
without altering symbolic ast representation.
*)letcons_namednv=letres=`named{Piq_ast.Named.name=n;Piq_ast.Named.value=v}inpiq_addrefretvresletcons_typednv=letres=`typed{Piq_ast.Typed.typename=n;Piq_ast.Typed.value=v}inpiq_addrefretvresletcons_named_or_typednamev=matchnamewith|`namen->cons_namednv|`typenamen->cons_typednv(* expand named and typed splices *)letexpand_splices(x:piq_ast):piq_ast=letrecaux0obj=matchobjwith|`form(name,args)whenargs=[]->(* a single name, typename or other ast element enclosed in
* parenthesis to control associativity -- removing parenthesis *)(matchnamewith|`word_->(* we can't remove parenthesis around words, because it can be a
* 0-aritity function or macro application *)obj|_->aux0name)|`form((`name_)asname,args)|`form((`typename_)asname,args)whenargs<>[]->(matchargswith|[v]->(* this is, in fact, typed or named -- it is time to convert
* whatever we have to them *)cons_named_or_typedname(auxv)|_->errorobj"named and typed forms are allowed only inside lists")|`named({Piq_ast.Named.name=n;Piq_ast.Named.value=v}asnamed)->letv'=auxvinifv'!=vthennamed.Piq_ast.Named.value<-v';(* return the original object taking advantage of object being mutable
*)obj|`typed({Piq_ast.Typed.typename=n;Piq_ast.Typed.value=ast}astyped)->letast'=auxastinifast'!=ast(* changed? *)thentyped.Piq_ast.Typed.value<-ast';(* return the original object taking advantage of object being mutable
*)obj|`listl->`list(expand_listl)|_->objandexpand_listl=(* small optimization *)ifList.exists(function`form(`name_,_)|`form(`typename_,_)->true|_->false)lthen(* expand and splice the results of named and typed form expansion *)U.flatmapexpand_list_elemlelse(* process inner elements *)List.mapauxlandexpand_list_elem=function|`form((`name_)asname,args)|`form((`typename_)asname,args)whenargs<>[]->letargs=expand_listargsinList.map(cons_named_or_typedname)args|x->[auxx]andauxobj=piq_referenceaux0objinauxx(* expand built-in syntax abbreviations *)letexpandx=(* expand (.foo ...) or .foo* [ ... ] when possible *)letx=expand_splicesxin(* expand multi-component names *)letx=expand_namesxin(*
(* check if expansion produces correct location bindings *)
let x = expand_splices x in
let x = expand_names x in
*)xletmake_stringlocstr_typesraw_s=letvalue=(s,raw_s)inletres=matchstr_typewith|L.String_a|L.String_u->`stringvalue|L.String_b->`binaryvalueinPiqloc.addloclocvalue;Piqloc.addloclocs;Piqloc.addretresletretry_parse_uints=Piqi_c.piqi_strtoullsletparse_uints=(* NOTE:
* OCaml doesn't support large unsingned decimal integer literals. For
* instance, this call failes with exception (Failure "int_of_string"):
*
* Int64.of_string (Printf.sprintf "%Lu" 0xffff_ffff_ffff_ffffL)
*
* However it works with hex representations:
*
* Int64.of_string (Printf.sprintf "%Lu" 0xffff_ffff_ffff_ffffL)
*
* We provide custom implementation based on C strtoull() function
* -- we're using if OCaml's conversion function fails on decimal integer.
*)tryInt64.of_stringswithFailure_->retry_parse_uintsletparse_ints=trymatchs.[0]with|'-'->(* negative integer *)leti=Int64.of_stringsinletres=(i,s)inPiqloc.addrefsi;Piqloc.addrefsres;`intres|_->leti=parse_uintsinletres=(i,s)inPiqloc.addrefsi;Piqloc.addrefsres;`uintreswithFailure_->failwith("invalid integer literal: "^U.quotes)letparse_floats=(* TODO: be more specific in defining floating point syntax, e.g. disallow
* omission of trailing '0' after '.' *)tryletf=matchswith|"0.nan"->Pervasives.nan|"0.inf"->Pervasives.infinity|"-0.inf"->Pervasives.neg_infinity|_->Pervasives.float_of_stringsinletres=(f,s)inPiqloc.addrefsres;`floatreswithFailure_->failwith("invalid floating point literal: "^U.quotes)letparse_numbers=ifString.containss'.'||String.containss'e'thenparse_floatselseparse_ints(*
* a simple piq parser
*)letread_next?(skip_trailing_comma=false)(fname,lexstream)=letlocation=ref(0,0)inletloc()=letline,col=!locationin(fname,line,col)inletnext_token()=lettok,loc=Stream.nextlexstreaminlocation:=loc;tokinletpeek_token()=matchStream.peeklexstreamwith|None->assertfalse|Some(tok,loc)->location:=loc;tokinletjunk_token()=Stream.junklexstreaminleterrors=error_at(loc())sinletrecparse_common?(chain=true)=function|L.Lbr->parse_list()|L.Rbr->error"unexpected `]'"|L.Lpar->parse_form()|L.Rpar->error"unexpected `)'"|L.String(t,s,raw_s)->letloc=loc()inmake_stringloctsraw_s|L.Names->parse_named_or_typeds~chain|L.Words->letword_loc=loc()inPiqloc.addlocword_locs;letres=parse_wordsinPiqloc.addlocretword_locres|L.Raw_strings->(* Used in pretty-printing mode and in some other cases, similar to
* String, but we don't parse it -- just pass it through. *)Piqloc.addloc(loc())s;Piqloc.addret(`raw_strings)|L.Texttext->lettext_loc=loc()inlet(fname,line,col)=text_locinlettext=parse_textlinetextin(* XXX: make on off by one correction in the line number *)lettext_loc=(fname,line-1,col)inPiqloc.addloctext_loctext;Piqloc.addret(`texttext)|L.Star->error"unexpected *"|L.Comma->error"unexpected ,"|L.EOF->error"unexpected end of input"(* TODO, XXX: move this functionality to the lexer *)(* join adjacent text lines *)andparse_textprev_lineaccu=lettok=peek_token()inlet_,line,_=loc()inmatchtokwith|L.Texttextwhenprev_line+1=line->(* add next line to the text unless there's a line between them *)junk_token();parse_textline(accu^"\n"^text)|_->(* something else -- end of text block *)accuandparse_form()=letstartloc=loc()in(* parse form name *)lett=next_token()inletname=matchtwith|L.EOF->error"unexpected end of input while reading a form"|_->(* specify that we don't want to chain `typename and `name and turn
* them into `typed and `named *)parse_commont~chain:falsein(* parse form args unil ) *)letargs=parse_elementsL.Rparin(* check that this is one of `typename, `name or `word and for example not a
* number *)(matchnamewith|#Piq_ast.form_nameasform_name->(* this is a valid form *)ifPiq_ast.is_infix_formform_nameargsthenC.warningform_name"this style of named expansion form is deprecated, use <name>* [...] instead"|objwhenargs<>[]->C.errorobj"invalid form name: only words, names and typenames are allowed"|_->(* this is an ast element in parenthesis -- passing it through; we
* allow use of parenthesis for explicit control of associativity
* and to be consisten with "identity" forms like (.foo) *)());(* construct a resulting form from name and args *)letpair=(name,args)inletres=`formpairinPiqloc.addlocstartlocpair;Piqloc.addretresandparse_words=letlen=String.lengthsinletparse_numbers=tryparse_numberswithFailuree->(* allowing a string which prefix looks like a number to be parsed as
* word in relaxed parsing mode
*
* TODO: need a more robust implementation of integer parsing; for
* instance, the current implementation doesn't distingwish between
* integer overlows and invalid integer literals *)if!Config.piq_relaxed_parsingthen`wordselseerroreinmatchswith|"true"->`booltrue|"false"->`boolfalse|_whens.[0]>='0'&&s.[0]<='9'->parse_numbers|_whenlen>1&&s.[0]='-'&&s.[1]>='0'&&s.[1]<='9'->parse_numbers|_->`words(* just a word *)andparse_named_or_typeds~chain=letloc=loc()in(* cut the first character which is '.' or ':' *)letn=String.subs1(String.lengths-1)inPiqloc.addloclocn;letmake_named_or_typednvalue=ifs.[0]='.'thenmake_namednvalueelsemake_typednvalue(* s.[0] = ':' *)inletres=ifnotchain(* inside of parsing a ( ... ) form *)then(make_named_or_typednNone)elseifpeek_token()=L.Starthen((* parsing infix form: .name * [ ... ] *)junk_token();ifnext_token()<>L.Lbrthenerror"[ expected after .<name> *"else(letname=make_named_or_typednNonein(* parse list elements until ] *)letargs=parse_elementsL.Rbrin(* construct a resulting form from name and args *)letpair=(name,args)inletres=`formpairinPiqloc.addloclocpair;res))else((* regular named or typed *)letvalue=parse_named_part()inmake_named_or_typednvalue)in(*
let res = expand_obj_names res in
*)Piqloc.addlocretlocresandparse_named_part()=lett=peek_token()inmatchtwith(* name delimiters *)|L.Names->(* other name or type *)None|L.Rbr|L.Rpar(* closing parenthesis or bracket *)|L.Comma|L.EOF->(* end of input *)None(* something else *)|_->junk_token();Some(parse_commont)(* parse named object *)andparse_list()=letstartloc=loc()in(* parse list elements until ] *)letl=parse_elementsL.Rbrinletres=`listlinPiqloc.addlocstartlocl;Piqloc.addretresandparse_elementsclosing_token=letparse_elementt=letnode=parse_commontin(* skip an optional comma *)ifpeek_token()=L.Commathenjunk_token();nodeinletrecauxaccu=lett=next_token()inift=closing_tokenthenList.revaccuelseaux((parse_elementt)::accu)inaux[]inletparse_top()=lett=next_token()inmatchtwith|L.EOF->None|_->letast=parse_commontin(* skip an optional trailing comma *)ifskip_trailing_comma&&peek_token()=L.Commathenjunk_token();Someastintryparse_top()withL.Error(s,(line,col))->(* convert lexer's errors *)error_at(fname,line,col)sletread_allpiq_parser=letrecauxaccu=matchread_nextpiq_parser~skip_trailing_comma:truewith|None->List.revaccu|Somex->aux(x::accu)inaux[]letmake_lexstreamlexbuf=letf_counter=lettok=L.tokenlexbufinletloc=L.locationlexbufinSome(tok,loc)inStream.fromfletinit_from_channelfnamech=letlexbuf=L.init_from_channelchin(fname,make_lexstreamlexbuf)letinit_from_stringfnames=letlexbuf=L.init_from_stringsin(fname,make_lexstreamlexbuf)letinit_from_token_listfnamel=(fname,Stream.of_listl)