123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444openImporttype('a,_)ast=|Alternative:'alist->('a,[>`Uncased])ast|No_case:'a->('a,[>`Cased])ast|Case:'a->('a,[>`Cased])astletdyn_of_astf=letopenDyninfunction|Alternativexs->variant"Alternative"(List.mapxs~f)|No_casea->variant"No_case"[fa]|Casea->variant"Case"[fa];;letempty_alternative:('a,'b)ast=Alternative[]letequal_ast(typea)eq(x:(a,[`Uncased])ast)(y:(a,[`Uncased])ast)=matchx,ywith|Alternativea,Alternativeb->List.equal~eqab;;letpp_ast(typeab)ffmt(ast:(a,b)ast)=letopenFmtinletvarsre=sexpfmtsfreinmatchastwith|Alternativealt->sexpfmt"Alternative"(listf)alt|Casec->var"Case"c|No_casec->var"No_case"c;;typecset=|CsetofCset.t|Intersectionofcsetlist|Complementofcsetlist|Differenceofcset*cset|Castof(cset,[`Cased|`Uncased])astletrecdyn_of_cset=letopenDyninfunction|Csetcset->variant"Cset"[Cset.to_dyncset]|Intersectionxs->variant"Intersection"(List.mapxs~f:dyn_of_cset)|Complementxs->variant"Complement"(List.mapxs~f:dyn_of_cset)|Difference(x,y)->variant"Difference"[dyn_of_csetx;dyn_of_csety]|Castc->variant"Cast"[dyn_of_astdyn_of_csetc];;type('a,'case)gen=|Setof'a|Astof(('a,'case)gen,'case)ast|Sequenceof('a,'case)genlist|Repeatof('a,'case)gen*int*intoption|Beg_of_line|End_of_line|Beg_of_word|End_of_word|Not_bound|Beg_of_str|End_of_str|Last_end_of_line|Start|Stop|Groupofstringoption*('a,'case)gen|No_groupof('a,'case)gen|Nestof('a,'case)gen|PmarkofPmark.t*('a,'case)gen|SemofAutomata.Sem.t*('a,'case)gen|Sem_greedyofAutomata.Rep_kind.t*('a,'case)genletrecdyn_of_genf=letopenDyninfunction|Seta->variant"Set"[fa]|Astast->variant"Ast"[dyn_of_ast(dyn_of_genf)ast]|Sequencexs->variant"Sequence"(List.mapxs~f:(dyn_of_genf))|Repeat(gen,min,max)->letbase=matchmaxwith|None->[]|Somex->[intx]invariant"Repeat"(dyn_of_genfgen::intmin::base)|Beg_of_line->enum"Beg_of_line"|End_of_line->enum"End_of_line"|Beg_of_word->enum"Beg_of_word"|End_of_word->enum"End_of_word"|Not_bound->enum"Not_bound"|Beg_of_str->enum"Beg_of_str"|End_of_str->enum"End_of_str"|Last_end_of_line->enum"Last_end_of_line"|Start->enum"Start"|Stop->enum"Stop"|Group(name,t)->letargs=letargs=[dyn_of_genft]inmatchnamewith|None->args|Somename->stringname::argsinvariant"Group"args|No_groupx->variant"No_group"[dyn_of_genfx]|Nestx->variant"Nest"[dyn_of_genfx]|Pmark(pmark,t)->variant"Pmark"[Pmark.to_dynpmark;dyn_of_genft]|Sem(sem,t)->variant"Sem"[Automata.Sem.to_dynsem;dyn_of_genft]|Sem_greedy(rep,t)->variant"Sem_greedy"[Automata.Rep_kind.to_dynrep;dyn_of_genft];;letrecpp_genpp_csetfmtt=letopenFormatinletopenFmtinletpp=pp_genpp_csetinletvarsre=sexpfmtsppreinletseqsrel=sexpfmts(listpp)relinmatchtwith|Setcset->pp_csetfmtcset|Sequencesq->seq"Sequence"sq|Repeat(re,start,stop)->letpp'fmt()=fprintffmt"%a@ %d%a"pprestartoptintstopinsexpfmt"Repeat"pp'()|Beg_of_line->strfmt"Beg_of_line"|End_of_line->strfmt"End_of_line"|Beg_of_word->strfmt"Beg_of_word"|End_of_word->strfmt"End_of_word"|Not_bound->strfmt"Not_bound"|Beg_of_str->strfmt"Beg_of_str"|End_of_str->strfmt"End_of_str"|Last_end_of_line->strfmt"Last_end_of_line"|Start->strfmt"Start"|Stop->strfmt"Stop"|Group(None,c)->var"Group"c|Group(Somen,c)->sexpfmt"Named_group"(pairstrpp)(n,c)|Nestc->var"Nest"c|Pmark(m,r)->sexpfmt"Pmark"(pairPmark.pppp)(m,r)|Asta->pp_astppfmta|Sem(sem,a)->sexpfmt"Sem"(pairAutomata.Sem.pppp)(sem,a)|Sem_greedy(k,re)->sexpfmt"Sem_greedy"(pairAutomata.Rep_kind.pppp)(k,re)|No_groupc->var"No_group"c;;letrecpp_csetfmtcset=letopenFmtinletseqsrel=sexpfmts(listpp_cset)relinmatchcsetwith|Casts->pp_astpp_csetfmts|Csets->sexpfmt"Set"Cset.pps|Intersectionc->seq"Intersection"c|Complementc->seq"Complement"c|Difference(a,b)->sexpfmt"Difference"(pairpp_csetpp_cset)(a,b);;letrecequalcsetx1x2=matchx1,x2with|Sets1,Sets2->csets1s2|Sequencel1,Sequencel2->List.equal~eq:(equalcset)l1l2|Repeat(x1',i1,j1),Repeat(x2',i2,j2)->Int.equali1i2&&Option.equalInt.equalj1j2&&equalcsetx1'x2'|Beg_of_line,Beg_of_line|End_of_line,End_of_line|Beg_of_word,Beg_of_word|End_of_word,End_of_word|Not_bound,Not_bound|Beg_of_str,Beg_of_str|End_of_str,End_of_str|Last_end_of_line,Last_end_of_line|Start,Start|Stop,Stop->true|Group_,Group_->(* Do not merge groups! *)false|Pmark(m1,r1),Pmark(m2,r2)->Pmark.equalm1m2&&equalcsetr1r2|Nestx,Nesty->equalcsetxy|Astx,Asty->equal_ast(equalcset)xy|Sem(sem,a),Sem(sem',a')->Poly.equalsemsem'&&equalcsetaa'|Sem_greedy(rep,a),Sem_greedy(rep',a')->Poly.equalreprep'&&equalcsetaa'|_->false;;typet=(cset,[`Cased|`Uncased])gentypeno_case=(Cset.t,[`Uncased])genletto_dyn=dyn_of_gendyn_of_csetletpp=pp_genpp_csetletcsetcset=Set(Csetcset)letrechandle_case_csetign_case=function|Csets->ifign_casethenCset.case_insensselses|Cast(Alternativel)->List.map~f:(handle_case_csetign_case)l|>Cset.union_all|Complementl->List.map~f:(handle_case_csetign_case)l|>Cset.union_all|>Cset.diffCset.cany|Difference(r,r')->Cset.inter(handle_case_csetign_caser)(Cset.diffCset.cany(handle_case_csetign_caser'))|Intersectionl->List.map~f:(handle_case_csetign_case)l|>Cset.intersect_all|Cast(No_casea)->handle_case_csettruea|Cast(Casea)->handle_case_csetfalsea;;letrechandle_caseign_case:t->(Cset.t,[`Uncased])gen=function|Sets->Set(handle_case_csetign_cases)|Sequencel->Sequence(List.map~f:(handle_caseign_case)l)|Ast(Alternativel)->letl=List.map~f:(handle_caseign_case)linAst(Alternativel)|Repeat(r,i,j)->Repeat(handle_caseign_caser,i,j)|(Beg_of_line|End_of_line|Beg_of_word|End_of_word|Not_bound|Beg_of_str|End_of_str|Last_end_of_line|Start|Stop)asr->r|Sem(k,r)->Sem(k,handle_caseign_caser)|Sem_greedy(k,r)->Sem_greedy(k,handle_caseign_caser)|Group(n,r)->Group(n,handle_caseign_caser)|No_groupr->No_group(handle_caseign_caser)|Nestr->Nest(handle_caseign_caser)|Ast(Caser)->handle_casefalser|Ast(No_caser)->handle_casetruer|Pmark(i,r)->Pmark(i,handle_caseign_caser);;moduleExport=structtypenonrect=tletpp=ppletseq=function|[r]->r|l->Sequencel;;letchar=letf=Dense_map.make~size:256~f:(funi->cset(Cset.csingle(Char.chri)))infunc->f(Char.codec);;letany=csetCset.canyletstrs:t=letl=ref[]infori=String.lengths-1downto0dol:=chars.[i]::!ldone;seq!l;;letas_set_elemselems=matchList.mapelems~f:(function|Sete->e|_->raise_notraceExit)with|exceptionExit->None|e->Somee;;letempty:t=Astempty_alternativeletalt(elems:tlist):t=matchelemswith|[]->empty|[x]->x|_->(matchas_set_elemselemswith|None->Ast(Alternativeelems)|Someelems->Set(Cast(Alternativeelems)));;letepsilon=seq[]letrepnrij=ifi<0theninvalid_arg"Re.repn";matchj,iwith|Somej,_whenj<i->invalid_arg"Re.repn"|Some0,0->epsilon|Some1,1->r|_->Repeat(r,i,j);;letrepr=repnr0Noneletrep1r=repnr1Noneletoptr=repnr0(Some1)letbol=Beg_of_lineleteol=End_of_lineletbow=Beg_of_wordleteow=End_of_wordletwordr=seq[bow;r;eow]letnot_boundary=Not_boundletbos=Beg_of_strleteos=End_of_strletwhole_stringr=seq[bos;r;eos]letleol=Last_end_of_lineletstart=Startletstop=Stoptype'bf={f:'a.'a->('a,'b)ast}letmake_setft=matchtwith|Setx->Set(Cast(f.fx))|_->Ast(f.ft);;letpreserve_setft=matchtwith|Set_->t|_->ft;;letlongest=preserve_set(funt->Sem(`Longest,t))letshortest=preserve_set(funt->Sem(`Shortest,t))letfirst=preserve_set(funt->Sem(`First,t))letgreedy=preserve_set(funt->Sem_greedy(`Greedy,t))letnon_greedy=preserve_set(funt->Sem_greedy(`Non_greedy,t))letgroup?namer=Group(name,r)letno_group=preserve_set(funt->No_groupt)letnestr=Nestrletsetstr=cset(Cset.setstr)letmarkr=leti=Pmark.gen()ini,Pmark(i,r);;(**** Character sets ****)letas_set_or_errornameelems=matchas_set_elemselemswith|None->invalid_argname|Somes->s;;letinterelems=Set(Intersection(as_set_or_error"Re.inter"elems))letcomplelems=Set(Complement(as_set_or_error"Re.compl"elems))letdiffrr'=matchr,r'with|Setr,Setr'->Set(Difference(r,r'))|_,_->invalid_arg"Re.diff";;letcase=letf={f=(funr->Caser)}infunt->make_setft;;letno_case=letf={f=(funr->No_caser)}infunt->make_setft;;letwitnesst=letrecwitness(t:no_case)=matchtwith|Setc->String.make1(Cset.to_char(Cset.pickc))|Sequencexs->String.concat""(List.map~f:witnessxs)|Ast(Alternative(x::_))->witnessx|Ast(Alternative[])->assertfalse|Repeat(r,from,_to)->letw=witnessrinletb=Buffer.create(String.lengthw*from)infor_i=1tofromdoBuffer.add_stringbwdone;Buffer.contentsb|No_groupr->witnessr|Sem_greedy(_,r)|Sem(_,r)|Nestr|Pmark(_,r)|Group(_,r)->witnessr|Beg_of_line|End_of_line|Beg_of_word|End_of_word|Not_bound|Beg_of_str|Last_end_of_line|Start|Stop|End_of_str->""inwitness(handle_casefalset);;endopenExportletrecmerge_sequences=function|[]->[]|Ast(Alternativel')::r->merge_sequences(l'@r)|Sequence(x::y)::r->(matchmerge_sequencesrwith|Sequence(x'::y')::r'whenequalCset.equalxx'->Sequence[x;Ast(Alternative[seqy;seqy'])]::r'|r'->Sequence(x::y)::r')|x::r->x::merge_sequencesr;;(*XXX Use a better algorithm allowing non-contiguous regions? *)letcolorizecolor_map(regexp:no_case)=letlnl=reffalseinletreccolorizeregexp=match(regexp:no_case)with|Sets->Color_map.splitcolor_maps|Sequencel->List.iter~f:colorizel|Ast(Alternativel)->List.iter~f:colorizel|Repeat(r,_,_)->colorizer|Beg_of_line|End_of_line->Color_map.splitcolor_mapCset.nl|Beg_of_word|End_of_word|Not_bound->Color_map.splitcolor_mapCset.cword|Beg_of_str|End_of_str|Start|Stop->()|Last_end_of_line->lnl:=true|No_groupr|Group(_,r)|Nestr|Pmark(_,r)->colorizer|Sem(_,r)|Sem_greedy(_,r)->colorizerincolorizeregexp;!lnl;;letrecanchored_ast:(t,_)ast->bool=function|Alternativeals->List.for_all~f:anchoredals|No_caser|Caser->anchoredrandanchored:t->bool=function|Asta->anchored_asta|Sequencel->List.exists~f:anchoredl|Repeat(r,i,_)->i>0&&anchoredr|No_groupr|Sem(_,r)|Sem_greedy(_,r)|Group(_,r)|Nestr|Pmark(_,r)->anchoredr|Set_|Beg_of_line|End_of_line|Beg_of_word|End_of_word|Not_bound|End_of_str|Last_end_of_line|Stop->false|Beg_of_str|Start->true;;lett_of_csetx=Setx