123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529(* The package sedlex is released under the terms of an MIT-like license. *)(* See the attached LICENSE file. *)(* Copyright 2005, 2013 by Alain Frisch and LexiFi. *)openPpxlibopenAst_builder.DefaultopenAst_helper(* let ocaml_version = Versions.ocaml_408 *)moduleCset=Sedlex_cset(* Decision tree for partitions *)letdefault_loc=Location.noneletlident_loc~locs={loc;txt=lidents}typedecision_tree=|Lteofint*decision_tree*decision_tree|Tableofint*intarray|Returnofintletrecsimplify_decision_tree(x:decision_tree)=matchxwith|Table_|Return_->x|Lte(_,(Returnaasl),Returnb)whena=b->l|Lte(i,l,r)->(letl=simplify_decision_treelinletr=simplify_decision_treerinmatch(l,r)with|Returna,Returnbwhena=b->l|_->Lte(i,l,r))letdecisionl=letl=List.map(fun(a,b,i)->(a,b,Returni))linletrecmerge2=function|(a1,b1,d1)::(a2,b2,d2)::rest->letx=ifb1+1=a2thend2elseLte(a2-1,Return(-1),d2)in(a1,b2,Lte(b1,d1,x))::merge2rest|rest->restinletrecaux=function|[(a,b,d)]->Lte(a-1,Return(-1),Lte(b,d,Return(-1)))|[]->Return(-1)|l->aux(merge2l)inauxlletlimit=8192letdecision_tablel=letrecauxmaccu=function|((a,b,i)asx)::remwhenb<limit&&i<255->aux(minam)(x::accu)rem|rem->(m,accu,rem)inletmin,table,rest=auxmax_int[]linmatchtablewith|[]->decisionl|[(min,max,i)]->Lte(min-1,Return(-1),Lte(max,Returni,decisionrest))|(_,max,_)::_->letarr=Array.make(max-min+1)0inletset(a,b,i)=forj=atobdoarr.(j-min)<-i+1doneinList.itersettable;Lte(min-1,Return(-1),Lte(max,Table(min,arr),decisionrest))letrecsimplifyminmax=function|Lte(i,yes,no)->ifi>=maxthensimplifyminmaxyeselseifi<minthensimplifyminmaxnoelseLte(i,simplifyminiyes,simplify(i+1)maxno)|x->xletsegments_of_partitionp=letseg=ref[]inArray.iteri(funic->List.iter(fun(a,b)->seg:=(a,b,i)::!seg)(c:Sedlex_cset.t:>(int*int)list))p;List.sort(fun(a1,_,_)(a2,_,_)->comparea1a2)!segletdecision_tablep=simplify(-1)Cset.max_code(decision_table(segments_of_partitionp))(* Helpers to build AST *)letappfunsl=letloc=default_locineapply~loc(evar~locs)lletglb_valuenamedef=letloc=default_locinpstr_value~locNonrecursive[value_binding~loc~pat:(pvar~locname)~expr:def](* Named regexps *)moduleStringMap=Map.Make(structtypet=stringletcompare=compareend)letbuiltin_regexps=List.fold_left(funacc(n,c)->StringMap.addn(Sedlex.charsc)acc)StringMap.empty([("any",Cset.any);("eof",Cset.eof);("xml_letter",Xml.letter);("xml_digit",Xml.digit);("xml_extender",Xml.extender);("xml_base_char",Xml.base_char);("xml_ideographic",Xml.ideographic);("xml_combining_char",Xml.combining_char);("xml_blank",Xml.blank);("tr8876_ident_char",Iso.tr8876_ident_char);]@Unicode.Categories.list@Unicode.Properties.list)(* Tables (indexed mapping: codepoint -> next state) *)lettables=Hashtbl.create31lettable_counter=ref0letget_tables()=Hashtbl.fold(funkeyxaccu->(x,key)::accu)tables[]lettable_namex=tryHashtbl.findtablesxwithNot_found->incrtable_counter;lets=Printf.sprintf"__sedlex_table_%i"!table_counterinHashtbl.addtablesxs;slettable(name,v)=letn=Array.lengthvinlets=Bytes.createninfori=0ton-1doBytes.setsi(Char.chrv.(i))done;glb_valuename(estring~loc:default_loc(Bytes.to_strings))(* Partition (function: codepoint -> next state) *)letpartitions=Hashtbl.create31letpartition_counter=ref0letget_partitions()=Hashtbl.fold(funkeyxaccu->(x,key)::accu)partitions[]letpartition_namex=tryHashtbl.findpartitionsxwithNot_found->incrpartition_counter;lets=Printf.sprintf"__sedlex_partition_%i"!partition_counterinHashtbl.addpartitionsxs;s(* We duplicate the body for the EOF (-1) case rather than creating
an interior utility function. *)letpartition(name,p)=letloc=default_locinletrecgen_tree=function|Lte(i,yes,no)->[%exprifc<=[%eeint~loci]then[%egen_treeyes]else[%egen_treeno]]|Returni->eint~loc:default_loci|Table(offset,t)->letc=ifoffset=0then[%exprc]else[%exprc-[%eeint~locoffset]]in[%exprChar.code(String.unsafe_get[%eevar~loc(table_namet)][%ec])-1]inletbody=gen_tree(simplify_decision_tree(decision_tablep))inglb_valuename[%exprfunc->letopen!Stdlibin[%ebody]](* Code generation for the automata *)letbest_finalfinal=letfin=refNoneinfori=Array.lengthfinal-1downto0doiffinal.(i)thenfin:=Someidone;!finletstate_funstate=Printf.sprintf"__sedlex_state_%i"stateletcall_statelexbufautostate=lettrans,final=auto.(state)inifArray.lengthtrans=0then(matchbest_finalfinalwith|Somei->eint~loc:default_loci|None->assertfalse)elseappfun(state_funstate)[lexbuf]letgen_state(lexbuf_name,lexbuf)autoi(trans,final)=letloc=default_locinletpartition=Array.mapfsttransinletcases=Array.mapi(funi(_,j)->case~lhs:(pint~loci)~guard:None~rhs:(call_statelexbufautoj))transinletcases=Array.to_listcasesinletbody()=pexp_match~loc(appfun(partition_namepartition)[[%exprSedlexing.__private__next_int[%elexbuf]]])(cases@[case~lhs:[%pat?_]~guard:None~rhs:[%exprSedlexing.backtrack[%elexbuf]];])inletretbody=letlhs=pvar~loc:lexbuf.pexp_loclexbuf_namein[value_binding~loc~pat:(pvar~loc(state_funi))~expr:(Exp.fun_~locNolabelNonelhsbody);]inmatchbest_finalfinalwith|None->ret(body())|Some_whenArray.lengthtrans=0->[]|Somei->ret[%exprSedlexing.mark[%elexbuf][%eeint~loci];[%ebody()]]letgen_recflagauto=(* The generated function is not recursive if the transitions end
in states with no further transitions. *)tryArray.iter(fun(trans_i,_)->Array.iter(fun(_,j)->lettrans_j,_=auto.(j)inifArray.lengthtrans_j>0thenraiseExit)trans_i)auto;NonrecursivewithExit->Recursiveletgen_definition((_,lexbuf)aslexbuf_with_name)lerror=letloc=default_locinletbrs=Array.of_listlinletauto=Sedlex.compile(Array.mapfstbrs)inletcases=Array.to_list(Array.mapi(funi(_,e)->case~lhs:(pint~loci)~guard:None~rhs:e)brs)inletstates=Array.mapi(gen_statelexbuf_with_nameauto)autoinletstates=List.flatten(Array.to_liststates)inpexp_let~loc(gen_recflagauto)states(pexp_sequence~loc[%exprSedlexing.start[%elexbuf]](pexp_match~loc(appfun(state_fun0)[lexbuf])(cases@[case~lhs:(ppat_any~loc)~guard:None~rhs:error])))(* Lexer specification parser *)letcodepointi=ifi<0||i>Cset.max_codethenfailwith(Printf.sprintf"Invalid Unicode code point: %i"i);iletregexp_for_charc=Sedlex.chars(Cset.singleton(Char.codec))letregexp_for_strings=letrecauxn=ifn=String.lengthsthenSedlex.epselseSedlex.seq(regexp_for_chars.[n])(aux(succn))inaux0leterrlocs=raise(Location.Error(Location.Error.createf~loc"Sedlex: %s"s))letrecrepeatr=function|0,0->Sedlex.eps|0,m->Sedlex.altSedlex.eps(Sedlex.seqr(repeatr(0,m-1)))|n,m->Sedlex.seqr(repeatr(n-1,m-1))letregexp_of_patternenv=letrecchar_pair_opfuncnameptuple=(* Construct something like Sub(a,b) *)matchtuplewith|Some{ppat_desc=Ppat_tuple[p0;p1]}->beginmatchfunc(auxp0)(auxp1)with|Somer->r|None->errp.ppat_loc@@"the "^name^" operator can only applied to single-character length \
regexps"end|_->errp.ppat_loc@@"the "^name^" operator requires two arguments, like "^name^"(a,b)"andauxp=(* interpret one pattern node *)matchp.ppat_descwith|Ppat_or(p1,p2)->Sedlex.alt(auxp1)(auxp2)|Ppat_tuple(p::pl)->List.fold_left(funrp->Sedlex.seqr(auxp))(auxp)pl|Ppat_construct({txt=Lident"Star"},Some(_,p))->Sedlex.rep(auxp)|Ppat_construct({txt=Lident"Plus"},Some(_,p))->Sedlex.plus(auxp)|Ppat_construct({txt=Lident"Rep"},Some(_,{ppat_desc=Ppat_tuple[p0;{ppat_desc=Ppat_constant(i1asi2)|Ppat_interval(i1,i2);};];}))->beginmatch(i1,i2)with|Pconst_integer(i1,_),Pconst_integer(i2,_)->leti1=int_of_stringi1inleti2=int_of_stringi2inif0<=i1&&i1<=i2thenrepeat(auxp0)(i1,i2)elseerrp.ppat_loc"Invalid range for Rep operator"|_->errp.ppat_loc"Rep must take an integer constant or interval"end|Ppat_construct({txt=Lident"Rep"},_)->errp.ppat_loc"the Rep operator takes 2 arguments"|Ppat_construct({txt=Lident"Opt"},Some(_,p))->Sedlex.altSedlex.eps(auxp)|Ppat_construct({txt=Lident"Compl"},arg)->beginmatchargwith|Some(_,p0)->beginmatchSedlex.compl(auxp0)with|Somer->r|None->errp.ppat_loc"the Compl operator can only applied to a \
single-character length regexp"end|_->errp.ppat_loc"the Compl operator requires an argument"end|Ppat_construct({txt=Lident"Sub"},arg)->char_pair_opSedlex.subtract"Sub"p(Option.map(fun(_,arg)->arg)arg)|Ppat_construct({txt=Lident"Intersect"},arg)->char_pair_opSedlex.intersection"Intersect"p(Option.map(fun(_,arg)->arg)arg)|Ppat_construct({txt=Lident"Chars"},arg)->(letconst=matchargwith|Some(_,{ppat_desc=Ppat_constantconst})->Someconst|_->Noneinmatchconstwith|Some(Pconst_string(s,_,_))->letc=refCset.emptyinfori=0toString.lengths-1doc:=Cset.union!c(Cset.singleton(Char.codes.[i]))done;Sedlex.chars!c|_->errp.ppat_loc"the Chars operator requires a string argument")|Ppat_interval(i_start,i_end)->beginmatch(i_start,i_end)with|Pconst_charc1,Pconst_charc2->Sedlex.chars(Cset.interval(Char.codec1)(Char.codec2))|Pconst_integer(i1,_),Pconst_integer(i2,_)->Sedlex.chars(Cset.interval(codepoint(int_of_stringi1))(codepoint(int_of_stringi2)))|_->errp.ppat_loc"this pattern is not a valid interval regexp"end|Ppat_constantconst->beginmatchconstwith|Pconst_string(s,_,_)->regexp_for_strings|Pconst_charc->regexp_for_charc|Pconst_integer(i,_)->Sedlex.chars(Cset.singleton(codepoint(int_of_stringi)))|_->errp.ppat_loc"this pattern is not a valid regexp"end|Ppat_var{txt=x}->begintryStringMap.findxenvwithNot_found->errp.ppat_loc(Printf.sprintf"unbound regexp %s"x)end|_->errp.ppat_loc"this pattern is not a valid regexp"inauxletprevious=ref[]letregexps=ref[]letshould_set_cookies=reffalseletmapper=object(this)inheritAst_traverse.mapassupervalenv=builtin_regexpsmethoddefine_regexpnamep={<env=StringMap.addname(regexp_of_patternenvp)env>}method!expressione=matchewith|[%expr[%sedlex[%e?{pexp_desc=Pexp_match(lexbuf,cases)}]]]->letlexbuf=matchlexbufwith|{pexp_desc=Pexp_ident{txt=Lidenttxt}}->(txt,lexbuf)|_->errlexbuf.pexp_loc"the matched expression must be a single identifier"inletcases=List.revcasesinleterror=matchList.hdcaseswith|{pc_lhs=[%pat?_];pc_rhs=e;pc_guard=None}->this#expressione|{pc_lhs=p}->errp.ppat_loc"the last branch must be a catch-all error case"inletcases=List.rev(List.tlcases)inletcases=List.map(function|{pc_lhs=p;pc_rhs=e;pc_guard=None}->(regexp_of_patternenvp,this#expressione)|{pc_guard=Somee}->erre.pexp_loc"'when' guards are not supported")casesingen_definitionlexbufcaseserror|[%exprlet[%p?{ppat_desc=Ppat_var{txt=name}}]=[%sedlex.regexp?[%p?p]]in[%e?body]]->(this#define_regexpnamep)#expressionbody|[%expr[%sedlex[%e?_]]]->erre.pexp_loc"the %sedlex extension is only recognized on match expressions"|_->super#expressionevaltoplevel=truemethodstructure_with_regexpsl=letmapper=refthisinletregexps=ref[]inletl=List.concat(List.map(function|[%strilet[%p?{ppat_desc=Ppat_var{txt=name}}]=[%sedlex.regexp?[%p?p]]]asi->regexps:=i::!regexps;mapper:=!mapper#define_regexpnamep;[]|i->[!mapper#structure_itemi])l)in(l,List.rev!regexps)method!structurel=iftoplevelthen(letsub={<toplevel=false>}inletl,regexps'=sub#structure_with_regexps(!previous@l)inletparts=List.mappartition(get_partitions())inlettables=List.maptable(get_tables())inregexps:=regexps';should_set_cookies:=true;tables@parts@l)elsefst(this#structure_with_regexpsl)endletpre_handlercookies=previous:=matchDriver.Cookies.getcookies"sedlex.regexps"Ast_pattern.__with|Some{pexp_desc=Pexp_extension(_,PStrl)}->l|Some_->assertfalse|None->[]letpost_handlercookies=if!should_set_cookiesthen(letloc=default_locinDriver.Cookies.setcookies"sedlex.regexps"(pexp_extension~loc({loc;txt="regexps"},PStr!regexps)))letextensions=[Extension.declare"sedlex"Extension.Context.expressionAst_pattern.(single_expr_payload__)(fun~loc:_~path:_expr->mapper#expressionexpr);]let()=Driver.Cookies.add_handlerpre_handler;Driver.Cookies.add_post_handlerpost_handler;Driver.register_transformation"sedlex"~impl:mapper#structure