123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486openMigrate_parsetree(* Define the rewriter on OCaml 4.05 AST *)openAst_405letocaml_version=Versions.ocaml_405typeatom=stringmoduleCSet=Set.Make(Char)(** Regular expressions *)moduleRe=structtypet=|Epsilon|CharsofCSet.t|Atomofatom|Concatoftlist|Altoft*t|Interoft*t|Repofint*intoption*tletcompare=Pervasives.compareletequalxy=comparexy=0letepsilon=Epsilonletvoid=CharsCSet.emptyletatoms=Atomsletcharc=atom(String.make1c)letcharsetcs=Chars(CSet.of_listcs)letenumeratec1c2=ifc1>c2thenNoneelseletrecauxim=ifi>mthen[]elseChar.chri::aux(i+1)minSome(aux(Char.codec1)(Char.codec2))letconcatl=letrecaux=function|[]->[]|Concatl::l'->aux(l@l')|Epsilon::l->auxl|Charscs::_whenCSet.is_emptycs->[void]|x::l->x::auxlinmatchauxlwith|[]->Epsilon|[x]->x|l->Concatlletaltrere'=matchre,re'with|x,x'whenequalxx'->x|Charscs,xwhenCSet.is_emptycs->x|x,CharscswhenCSet.is_emptycs->x|Charscs1,Charscs2->Chars(CSet.unioncs1cs2)|a,b->leti=compareabinifi>=0thenAlt(a,b)elseAlt(b,a)letinterrere'=matchre,re'with|x,x'whenequalxx'->x|_,CharscswhenCSet.is_emptycs->void|Charscs,_whenCSet.is_emptycs->void|a,b->leti=compareabinifi>=0thenInter(a,b)elseInter(b,a)letrecrepijx=matchi,j,xwith|0,Some0,_->Epsilon|1,Some1,x->x|_,_,Epsilon->epsilon|_,_,CharscswhenCSet.is_emptycs->epsilon|_,_,Rep(i',None,x)|_,None,Rep(i',Some_,x)->rep(i*i')Nonex|_,Somej,Rep(i',Somej',x)->rep(i*i')(Some(j*j'))x|i,j,x->Rep(i,j,x)letstarx=rep0Nonexletplusx=rep1Nonexletoptx=rep0(Some1)xmoduleInfix=structlet(+)=altlet(*)xy=concat[x;y]let(!)=atomendendmoduleReMap=Map.Make(Re)moduleSMap=Map.Make(String)moduleVar:sigtypetvalpp:tFmt.tvalgen:unit->tvalname:t->stringend=structtypet=stringletppfmtx=Fmt.pffmt"'%s"xletgen=letr=ref0infun()->incrr;"v"^string_of_int!rletnamex=xend(** Type grammar *)typety=|Altoftransitionlist|Asofty*Var.t|AliasofVar.tandtransition=|Aofatom*ty|Eletrecpp_transitionppf=function|A(a,ty)->Fmt.pfppf"`%s of %a"app_tyty|E->Fmt.pfppf"`End"andpp_typpf=function|Aliasv->Var.ppppfv|As(ty,v)->Fmt.pfppf"(%a as %a)"pp_tytyVar.ppv|Altl->Fmt.pfppf"@[<hv2>[@ %a@ ]@]"Fmt.(list~sep:(unit"@ | ")pp_transition)l(** Derivatives *)letrechas_epsilon=function|Re.Epsilon->true|Atom_->false|Concatel->List.for_allhas_epsilonel|Alt(e1,e2)->has_epsilone1||has_epsilone2|Rep(0,_,_)->true|Rep(_,_,_)->false|Inter(e1,e2)->has_epsilone1&&has_epsilone2|Chars_->falseletprefixlre=letfre_c=Re.concat[re_c;re]inSMap.mapflletunion=SMap.union(fun_cre1re2->Some(Re.altre1re2))letinter=SMap.merge@@fun_cre1re2->matchre1,re2with|Somere1,Somere2->Some(Re.interre1re2)|_,_->Noneletcharsetcs=CSet.fold(funcm->SMap.add(String.make1c)Re.epsilonm)csSMap.emptyletrecheads=function|Re.Epsilon->SMap.empty|Atoma->SMap.singletonaRe.epsilon|Concatel->letrecaux=function|[]->SMap.empty|e::t->leth=prefix(headse)(Re.concatt)inifhas_epsilonethenunionh(auxt)elsehinauxel|Alt(e1,e2)->union(headse1)(headse2)|Rep(i,None,e)->prefix(headse)(Re.rep(max0(i-1))Nonee)|Rep(i,Somej,e)->prefix(headse)(Re.rep(max0(i-1))(Some(max0(j-1)))e)|Inter(e1,e2)->inter(headse1)(headse2)|Charscs->charsetcsletadd_new_tyrem=letv=Var.gen()inletm=ReMap.addre(Aliasv)minm,vletrecgotocre(map,l)=ifReMap.memremapthen(map,A(c,ReMap.findremap)::l)elseletmap,var=add_new_tyremapinletmap,ty=exploremaprein(map,A(c,As(ty,var))::l)andexploremapre=letl=headsreinletinit=ifhas_epsilonrethen[E]else[]inletmap,alts=SMap.foldgotol(map,init)inmap,Altaltsletmake_typere=letmap,var=add_new_tyreReMap.emptyinlet_,ty=exploremapreinAs(ty,var)(** Posix parser, borrowed from Re *)modulePosix=structexceptionParse_errorexceptionNot_supportedletparses=leti=ref0inletl=String.lengthsinleteos()=!i=linlettestc=not(eos())&&s.[!i]=cinletacceptc=letr=testcinifrthenincri;rinletget()=letr=s.[!i]inincri;rinletunget()=decriinletrecregexp()=regexp'(branch())andregexp'left=ifaccept'|'thenregexp'(Re.altleft(branch()))elseifaccept'&'thenregexp'(Re.interleft(branch()))elseleftandbranch()=branch'[]andbranch'left=ifeos()||test'|'||test'&'||test')'thenRe.concat(List.revleft)elsebranch'(piece()::left)andpiece()=letr=atom()inifaccept'*'thenRe.starrelseifaccept'+'thenRe.plusrelseifaccept'?'thenRe.optrelseifaccept'{'thenmatchinteger()withSomei->letj=ifaccept','theninteger()elseSomeiinifnot(accept'}')thenraiseParse_error;beginmatchjwithSomejwhenj<i->raiseParse_error|_->()end;Re.repijr|None->unget();relserandatom()=ifaccept'.'thenbeginraiseNot_supported(* if newline then Re.notnl else Re.any *)endelseifaccept'('thenbeginletr=regexp()inifnot(accept')')thenraiseParse_error;rendelseifaccept'^'thenbeginraiseNot_supported(* if newline then Re.bol else Re.bos *)endelseifaccept'$'thenbeginraiseNot_supported(* if newline then Re.eol else Re.eos *)endelseifaccept'['thenbeginifaccept'^'thenraiseNot_supported(* Re.diff (Re.compl (bracket [])) (Re.char '\n') *)elseRe.charset(bracket[])endelseifaccept'\\'thenbeginifeos()thenraiseParse_error;matchget()with'|'|'&'|'('|')'|'*'|'+'|'?'|'['|'.'|'^'|'$'|'{'|'\\'asc->Re.charc|_->raiseParse_errorendelsebeginifeos()thenraiseParse_error;matchget()with'*'|'+'|'?'|'{'|'\\'->raiseParse_error|c->Re.charcendandinteger()=ifeos()thenNoneelsematchget()with'0'..'9'asd->integer'(Char.coded-Char.code'0')|_->unget();Noneandinteger'i=ifeos()thenSomeielsematchget()with'0'..'9'asd->leti'=10*i+(Char.coded-Char.code'0')inifi'<ithenraiseParse_error;integer'i'|_->unget();Someiandbrackets=ifs<>[]&&accept']'thenselsebeginletc=char()inifaccept'-'thenbeginifaccept']'thenc::'-'::selsebeginletc'=char()inmatchRe.enumeratecc'with|None->raiseParse_error|Somel->bracket(l@s)endendelsebracket(c::s)endandchar()=ifeos()thenraiseParse_error;letc=get()inifc='['thenbeginifaccept'='thenraiseNot_supportedelseifaccept':'thenbeginraiseNot_supported(*XXX*)endelseifaccept'.'thenbeginifeos()thenraiseParse_error;letc=get()inifnot(accept'.')thenraiseNot_supported;ifnot(accept']')thenraiseParse_error;cendelsecendelsecinletres=regexp()inifnot(eos())thenraiseParse_error;resletrecsimplify(re:Re.t)=matchrewith|Re.Concatl->letrecaux=function|[]->[]|Re.Atoms::Re.Atoms'::l->aux(Re.atom(s^s')::l)|x::l->simplifyx::auxlinRe.concat(auxl)|Re.Rep(i,j,re)->Re.repij(simplifyre)|Re.Alt(re1,re2)->Re.alt(simplifyre1)(simplifyre2)|Re.Inter(re1,re2)->Re.inter(simplifyre1)(simplifyre2)|Re.Atom_|Re.Epsilon|Re.Chars_asre->reletmakelocx=trysimplify@@parsexwith|Parse_error->Location.raise_errorf~loc"This posix regular expression is invalid."|Not_supported->Location.raise_errorf~loc"This posix regular expression uses unsuported features."end(** Syntax *)openParsetreemoduleA=Ast_helperletrecre_of_parsetreepat=letloc=pat.ppat_locinmatchpat.ppat_descwith|Ppat_tuplel->Re.concat@@List.mapre_of_parsetreel|Ppat_or(p1,p2)->Re.alt(re_of_parsetreep1)(re_of_parsetreep2)|Ppat_construct({txt=Longident.Lident"Star"},None)->Location.raise_errorf~loc:pat.ppat_loc"Star constructors take an argument."|Ppat_construct({txt=Longident.Lident"Star"},Somep)->Re.star(re_of_parsetreep)|Ppat_construct({txt=Longident.Lident("Eps"|"Epsilon")},None)->Re.epsilon|Ppat_construct({txt=Longident.Lident("Eps"|"Epsilon")},Some_)->Location.raise_errorf~loc:pat.ppat_loc"Epsilon constructors take no arguments."|Ppat_construct({txt=Longident.Lident("()")},None)->Re.void|Ppat_construct({txt=Longident.Lident("()")},Some_)->Location.raise_errorf~loc:pat.ppat_loc"Void constructors take no arguments."|Ppat_construct({txt=Longident.Lident("Inter"|"::")},Some{ppat_desc=Ppat_tuple[p1;p2]})->Re.inter(re_of_parsetreep1)(re_of_parsetreep2)|Ppat_construct({txt=Longident.Lident("Inter"|"::")},_)->Location.raise_errorf~loc:pat.ppat_loc"Inter constructors take exactly two arguments."|Ppat_variant(s,None)->Re.atoms|Ppat_variant(_,Some_)->Location.raise_errorf~loc:pat.ppat_loc"Symbols can not have arguments."|Ppat_interval(Pconst_charc1,Pconst_charc2)->beginmatchRe.enumeratec1c2with|Somel->Re.charsetl|None->Location.raise_errorf~loc"This character range is ill-formed."end|_->Location.raise_errorf~loc:pat.ppat_loc"This is not a valid regular expression constructor."letmake_variant~locdirl=matchdirwith|`Eq->A.Typ.variant~loclClosedNone|`GEq->A.Typ.variant~loclOpenNone|`LEq->A.Typ.variant~loclClosed(Some[])letrecparsetree_of_tylocdirty=matchtywith|Altl->letaux=function|E->Rtag("End",[],true,[])|A(c,ty)->Rtag(c,[],false,[parsetree_of_tylocdirty])inmake_variant~locdir(List.mapauxl)|As(ty,v)->A.Typ.alias~loc(parsetree_of_tylocdirty)(Var.namev)|Aliasv->A.Typ.var~loc(Var.namev)letget_extensions=matchStr.split(Str.regexp_string".")swith|["pumping";"re"]|["re"]->Some`Eq|["pumping";"re";("le"|"less")]|["re";("le"|"less")]->Some`LEq|["pumping";"re";("ge"|"greater")]|["re";("ge"|"greater")]->Some`GEq|_->Noneletmapper=letmoduleAM=Ast_mapperinlettypmapperx=letloc=x.ptyp_locinmatchx.ptyp_descwith|Ptyp_extension({txt},payload)whenget_extensiontxt<>None->letdir=matchget_extensiontxtwithSomex->x|None->assertfalseinletp=matchpayloadwith|PStr[{pstr_desc=Pstr_eval({pexp_loc=loc;pexp_desc=Pexp_constant(Pconst_string(s,_))},_)}]|PPat({ppat_loc=loc;ppat_desc=Ppat_constant(Pconst_string(s,_))},None)->Posix.makelocs|PPat(p,None)->re_of_parsetreep|PStr_|PSig_|PTyp_|PPat(_,Some_)->Location.raise_errorf~loc"The payload of this extension should be either a pattern or a string."inparsetree_of_tylocdir@@make_typep|_->AM.default_mapper.typmapperxin{AM.default_mapperwithtyp}(* Register the rewriter in the driver *)let()=Driver.register~name:"pumping"ocaml_version(fun_config_cookies->mapper)(*
Pumping - Regular languages in types
Copyright (C) 2017 Gabriel Radanne <drupyog@zoho.com>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*)