123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053(*
PCRE-OCAML - Perl Compatibility Regular Expressions for OCaml
Copyright (C) 1999- Markus Mottl
email: markus.mottl@gmail.com
WWW: http://www.ocaml.info
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*)(* Public exceptions and their registration with the C runtime *)typeerror=|Partial|BadPartial|BadPatternofstring*int|BadUTF8|BadUTF8Offset|MatchLimit|RecursionLimit|InternalErrorofstringexceptionErroroferrorexceptionBacktrackexceptionRegexp_orofstring*error(* Puts exceptions into global C-variables for fast retrieval *)externalpcre_ocaml_init:unit->unit="pcre_ocaml_init"(* Registers exceptions with the C runtime and caches polymorphic variants *)let()=Callback.register_exception"Pcre.Error"(Error(InternalError""));Callback.register_exception"Pcre.Backtrack"Backtrack;pcre_ocaml_init()(* Compilation and runtime flags and their conversion functions *)typeicflag=inttypeirflag=int(* Compilation flags *)typecflag=[|`CASELESS|`MULTILINE|`DOTALL|`EXTENDED|`ANCHORED|`DOLLAR_ENDONLY|`EXTRA|`UNGREEDY|`UTF8|`NO_UTF8_CHECK|`NO_AUTO_CAPTURE|`AUTO_CALLOUT|`FIRSTLINE]letint_of_cflag=function|`CASELESS->0x0001|`MULTILINE->0x0002|`DOTALL->0x0004|`EXTENDED->0x0008|`ANCHORED->0x0010|`DOLLAR_ENDONLY->0x0020|`EXTRA->0x0040|`UNGREEDY->0x0200|`UTF8->0x0800|`NO_AUTO_CAPTURE->0x1000|`NO_UTF8_CHECK->0x2000|`AUTO_CALLOUT->0x4000|`FIRSTLINE->0x40000letcoll_icflagicflagflag=int_of_cflagflagloricflagletcflagsflags=List.fold_leftcoll_icflag0flagsletcflag_of_int=function|0x0001->`CASELESS|0x0002->`MULTILINE|0x0004->`DOTALL|0x0008->`EXTENDED|0x0010->`ANCHORED|0x0020->`DOLLAR_ENDONLY|0x0040->`EXTRA|0x0200->`UNGREEDY|0x0800->`UTF8|0x1000->`NO_AUTO_CAPTURE|0x2000->`NO_UTF8_CHECK|0x4000->`AUTO_CALLOUT|0x40000->`FIRSTLINE|_->failwith"Pcre.cflag_list: unknown compilation flag"letall_cflags=[0x0001;0x0002;0x0004;0x0008;0x0010;0x0020;0x0040;0x0200;0x0800;0x1000;0x2000;0x4000;0x40000;]letcflag_listicflags=letcollflag_listflag=ificflagslandflag<>0thencflag_of_intflag::flag_listelseflag_listinList.fold_leftcoll[]all_cflags(* Runtime flags *)typerflag=[|`ANCHORED|`NOTBOL|`NOTEOL|`NOTEMPTY|`PARTIAL]letint_of_rflag=function|`ANCHORED->0x0010|`NOTBOL->0x0080|`NOTEOL->0x0100|`NOTEMPTY->0x0400|`PARTIAL->0x8000letcoll_irflagirflagflag=int_of_rflagflaglorirflagletrflagsflags=List.fold_leftcoll_irflag0flagsletrflag_of_int=function|0x0010->`ANCHORED|0x0080->`NOTBOL|0x0100->`NOTEOL|0x0400->`NOTEMPTY|0x8000->`PARTIAL|_->failwith"Pcre.rflag_list: unknown runtime flag"letall_rflags=[0x0010;0x0080;0x0100;0x0400;0x8000]letrflag_listirflags=letcollflag_listflag=ifirflagslandflag<>0thenrflag_of_intflag::flag_listelseflag_listinList.fold_leftcoll[]all_rflags(* Information on the PCRE-configuration (build-time options) *)externalpcre_version:unit->string="pcre_version_stub"externalpcre_config_utf8:unit->bool="pcre_config_utf8_stub"[@@noalloc]externalpcre_config_newline:unit->char="pcre_config_newline_stub"[@@noalloc]externalpcre_config_link_size:unit->(int[@untagged])="pcre_config_link_size_stub_bc""pcre_config_link_size_stub"[@@noalloc]externalpcre_config_match_limit:unit->(int[@untagged])="pcre_config_match_limit_stub_bc""pcre_config_match_limit_stub"[@@noalloc]externalpcre_config_match_limit_recursion:unit->(int[@untagged])="pcre_config_match_limit_recursion_stub_bc""pcre_config_match_limit_recursion_stub"[@@noalloc]externalpcre_config_stackrecurse:unit->bool="pcre_config_stackrecurse_stub"[@@noalloc]letversion=pcre_version()letconfig_utf8=pcre_config_utf8()letconfig_newline=pcre_config_newline()letconfig_link_size=pcre_config_link_size()letconfig_match_limit=pcre_config_match_limit()letconfig_match_limit_recursion=pcre_config_match_limit_recursion()letconfig_stackrecurse=pcre_config_stackrecurse()(* Information on patterns *)typefirstbyte_info=[`Charofchar|`Start_only|`ANCHORED]typestudy_stat=[`Not_studied|`Studied|`Optimal]typeregexpexternaloptions:regexp->(icflag[@untagged])="pcre_options_stub_bc""pcre_options_stub"externalsize:regexp->(int[@untagged])="pcre_size_stub_bc""pcre_size_stub"externalstudysize:regexp->(int[@untagged])="pcre_studysize_stub_bc""pcre_studysize_stub"externalcapturecount:regexp->(int[@untagged])="pcre_capturecount_stub_bc""pcre_capturecount_stub"externalbackrefmax:regexp->(int[@untagged])="pcre_backrefmax_stub_bc""pcre_backrefmax_stub"externalnamecount:regexp->(int[@untagged])="pcre_namecount_stub_bc""pcre_namecount_stub"externalnameentrysize:regexp->(int[@untagged])="pcre_nameentrysize_stub_bc""pcre_nameentrysize_stub"externalnames:regexp->stringarray="pcre_names_stub"externalfirstbyte:regexp->firstbyte_info="pcre_firstbyte_stub"externalfirsttable:regexp->stringoption="pcre_firsttable_stub"externallastliteral:regexp->charoption="pcre_lastliteral_stub"externalstudy_stat:regexp->study_stat="pcre_study_stat_stub"[@@noalloc](* Compilation of patterns *)typechtablesexternalmaketables:unit->chtables="pcre_maketables_stub"(* Internal use only! *)externalpcre_study:regexp->unit="pcre_study_stub"externalcompile:(icflag[@untagged])->chtablesoption->string->regexp="pcre_compile_stub_bc""pcre_compile_stub"externalget_match_limit:regexp->intoption="pcre_get_match_limit_stub"externalget_match_limit_recursion:regexp->intoption="pcre_get_match_limit_recursion_stub"(* Internal use only! *)externalset_imp_match_limit:regexp->(int[@untagged])->regexp="pcre_set_imp_match_limit_stub_bc""pcre_set_imp_match_limit_stub"[@@noalloc](* Internal use only! *)externalset_imp_match_limit_recursion:regexp->(int[@untagged])->regexp="pcre_set_imp_match_limit_recursion_stub_bc""pcre_set_imp_match_limit_recursion_stub"[@@noalloc]letregexp?(study=true)?limit?limit_recursion?(iflags=0)?flags?chtablespat=letrex=matchflagswith|Someflag_list->compile(cflagsflag_list)chtablespat|_->compileiflagschtablespatinifstudythenpcre_studyrex;letrex=matchlimitwith|None->rex|Somelim->set_imp_match_limitrexliminmatchlimit_recursionwith|None->rex|Somelim->set_imp_match_limit_recursionrexlimletregexp_or?study?limit?limit_recursion?(iflags=0)?flags?chtablespats=letcheckpat=tryignore(regexp~study:false~iflags?flags?chtablespat)withErrorerror->raise(Regexp_or(pat,error))inList.itercheckpats;letbig_pat=letcnvpat="(?:"^pat^")"inString.concat"|"(List.rev(List.rev_mapcnvpats))inregexp?study?limit?limit_recursion~iflags?flags?chtablesbig_patletbytes_unsafe_blit_stringstrstr_ofsbtsbts_ofslen=letstr_bts=Bytes.unsafe_of_stringstrinBytes.unsafe_blitstr_btsstr_ofsbtsbts_ofslenletstring_unsafe_substrofslen=letres=Bytes.createleninbytes_unsafe_blit_stringstrofsres0len;Bytes.unsafe_to_stringresletquotes=letlen=String.lengthsinletbuf=Bytes.create(lenlsl1)inletpos=ref0infori=0tolen-1domatchString.unsafe_getsiwith|'\\'|'^'|'$'|'.'|'['|'|'|'('|')'|'?'|'*'|'+'|'{'asc->Bytes.unsafe_setbuf!pos'\\';incrpos;Bytes.unsafe_setbuf!posc;incrpos|c->Bytes.unsafe_setbuf!posc;incrposdone;string_unsafe_sub(Bytes.unsafe_to_stringbuf)0!pos(* Matching of patterns and subpattern extraction *)(* Default regular expression when none is provided by the user *)letdef_rex=regexp"\\s+"typesubstrings=string*intarraytypecallout_data={callout_number:int;substrings:substrings;start_match:int;current_position:int;capture_top:int;capture_last:int;pattern_position:int;next_item_length:int;}typecallout=callout_data->unitletget_subject(subj,_)=subjletnum_of_subs(_,ovector)=Array.lengthovector/3letget_offset_startovectorstr_num=ifstr_num<0||str_num>=Array.lengthovector/3theninvalid_arg"Pcre.get_offset_start: illegal offset";letoffset=str_numlsl1inoffset,Array.unsafe_getovectoroffsetletget_substring_aux(subj,ovector)offsetstart=ifstart<0thenraiseNot_foundelsestring_unsafe_subsubjstart(Array.unsafe_getovector(offset+1)-start)letget_substring(_,ovectorassubstrings)str_num=letoffset,start=get_offset_startovectorstr_numinget_substring_auxsubstringsoffsetstartletget_substring_ofs(_subj,ovector)str_num=letoffset,start=get_offset_startovectorstr_numinifstart<0thenraiseNot_foundelsestart,Array.unsafe_getovector(offset+1)letunsafe_get_substring(_,ovectorassubstrings)str_num=letoffset=str_numlsl1intryget_substring_auxsubstringsoffset(Array.unsafe_getovectoroffset)withNot_found->""letget_substrings?(full_match=true)(_,ovectorassubstrings)=iffull_matchthenArray.init(Array.lengthovector/3)(unsafe_get_substringsubstrings)elseletlen=(Array.lengthovector/3)-1inArray.initlen(funn->unsafe_get_substringsubstrings(n+1))letunsafe_get_opt_substring(_,ovectorassubstrings)str_num=letoffset=str_numlsl1intryletstart=Array.unsafe_getovectoroffsetinletstr=get_substring_auxsubstringsoffsetstartinSomestrwithNot_found->Noneletget_opt_substrings?(full_match=true)(_,ovectorassubstrings)=iffull_matchthenArray.init(Array.lengthovector/3)(unsafe_get_opt_substringsubstrings)elseletlen=(Array.lengthovector/3)-1inArray.initlen(funn->unsafe_get_opt_substringsubstrings(n+1))externalget_stringnumber:regexp->string->(int[@untagged])="pcre_get_stringnumber_stub_bc""pcre_get_stringnumber_stub"letget_named_substringrexnamesubstrings=get_substringsubstrings(get_stringnumberrexname)letget_named_substring_ofsrexnamesubstrings=get_substring_ofssubstrings(get_stringnumberrexname)externalunsafe_pcre_exec:(irflag[@untagged])->regexp->pos:(int[@untagged])->subj_start:(int[@untagged])->subj:string->intarray->calloutoption->unit="pcre_exec_stub_bc""pcre_exec_stub"letmake_ovectorrex=letsubgroups1=capturecountrex+1inletsubgroups2=subgroups1lsl1insubgroups2,Array.make(subgroups1+subgroups2)0letpcre_exec?(iflags=0)?flags?(rex=def_rex)?pat?(pos=0)?calloutsubj=letrex=matchpatwithSomestr->regexpstr|_->rexinletiflags=matchflagswithSomeflags->rflagsflags|_->iflagsinlet_,ovector=make_ovectorrexinunsafe_pcre_execiflagsrex~pos~subj_start:0~subjovectorcallout;ovectorletexec?iflags?flags?rex?pat?pos?calloutsubj=subj,pcre_exec?iflags?flags?rex?pat?pos?calloutsubjletnext_match?iflags?flags?rex?pat?(pos=0)?callout(subj,ovector)=letpos=Array.unsafe_getovector1+posinletsubj_len=String.lengthsubjinifpos<0||pos>subj_lentheninvalid_arg"Pcre.next_match: illegal offset";subj,pcre_exec?iflags?flags?rex?pat~pos?calloutsubjletreccopy_lstarn=function|[]->ar|h::t->Array.unsafe_setarnh;copy_lstar(n-1)tletexec_all?(iflags=0)?flags?(rex=def_rex)?pat?pos?calloutsubj=letrex=matchpatwithSomestr->regexpstr|_->rexinletiflags=matchflagswithSomeflags->rflagsflags|_->iflagsinlet(_,ovectorassstrs)=exec~iflags~rex?pos?calloutsubjinletnull_flags=iflagslor0x0400inletsubj_len=String.lengthsubjinletreclooppos(subj,ovectorassstrs)nlst=letmaybe_ovector=tryletfirst=Array.unsafe_getovector0iniffirst=pos&&Array.unsafe_getovector1=posthenifpos=subj_lenthenNoneelseSome(pcre_exec~iflags:null_flags~rex~pos?calloutsubj)elseSome(pcre_exec~iflags~rex~pos?calloutsubj)withNot_found->Noneinmatchmaybe_ovectorwith|Someovector->letnew_pos=Array.unsafe_getovector1inloopnew_pos(subj,ovector)(n+1)(sstrs::lst)|None->copy_lst(Array.make(n+1)sstrs)(n-1)lstinloop(Array.unsafe_getovector1)sstrs0[]letextract?iflags?flags?rex?pat?pos?full_match?calloutsubj=get_substrings?full_match(exec?iflags?flags?rex?pat?pos?calloutsubj)letextract_opt?iflags?flags?rex?pat?pos?full_match?calloutsubj=get_opt_substrings?full_match(exec?iflags?flags?rex?pat?pos?calloutsubj)letextract_all?iflags?flags?rex?pat?pos?full_match?calloutsubj=letmany_sstrs=exec_all?iflags?flags?rex?pat?pos?calloutsubjinArray.map(get_substrings?full_match)many_sstrsletextract_all_opt?iflags?flags?rex?pat?pos?full_match?calloutsubj=letmany_sstrs=exec_all?iflags?flags?rex?pat?pos?calloutsubjinArray.map(get_opt_substrings?full_match)many_sstrsletpmatch?iflags?flags?rex?pat?pos?calloutsubj=tryignore(pcre_exec?iflags?flags?rex?pat?pos?calloutsubj);truewithNot_found->false(* String substitution *)(* Elements of a substitution pattern *)typesubst=|SubstStringofint*int(* Denotes a substring in the substitution *)|Backrefofint(* nth backreference ($0 is program name!) *)|Match(* The whole matched string *)|PreMatch(* The string before the match *)|PostMatch(* The string after the match *)|LastParenMatch(* The last matched group *)(* Information on substitution patterns *)typesubstitution=string(* The substitution string *)*int(* Highest group number of backreferences *)*bool(* Makes use of "LastParenMatch" *)*substlist(* The list of substitution elements *)(* Only used internally in "subst" *)exceptionFoundAtofintletzero=Char.code'0'letsubststr=letmax_br=ref0inletwith_lp=reffalseinletlix=String.lengthstr-1inletrecloopaccn=iflix<nthenaccelsetryfori=ntolixdoifString.unsafe_getstri='$'thenraise(FoundAti)done;SubstString(n,lix-n+1)::accwithFoundAti->ifi=lixthenSubstString(n,lix-n+1)::accelseleti1=i+1inletacc=ifn=ithenaccelseSubstString(n,i-n)::accinmatchString.unsafe_getstri1with|'0'..'9'asc->letsubpat_nr=ref(Char.codec-zero)in(tryforj=i1+1tolixdoletc=String.unsafe_getstrjinifc>='0'&&c<='9'thensubpat_nr:=10*!subpat_nr+Char.codec-zeroelseraise(FoundAtj)done;max_br:=max!subpat_nr!max_br;Backref!subpat_nr::accwithFoundAtj->max_br:=max!subpat_nr!max_br;loop(Backref!subpat_nr::acc)j)|'!'->loopacc(i1+1)|'$'->loop(SubstString(i1,1)::acc)(i1+1)|'&'->loop(Match::acc)(i1+1)|'`'->loop(PreMatch::acc)(i1+1)|'\''->loop(PostMatch::acc)(i1+1)|'+'->with_lp:=true;loop(LastParenMatch::acc)(i1+1)|_->loopacci1inletsubst_lst=loop[]0instr,!max_br,!with_lp,subst_lstletdef_subst=subst""(* Calculates a list of tuples (str, offset, len) which contain
substrings to be copied on substitutions. Internal use only! *)letcalc_trans_lstsubgroups2ovectorsubjtemplsubst_lst=letprefix_len=Array.unsafe_getovector0inletlast=Array.unsafe_getovector1inletcoll(res_len,trans_lstasaccu)=letreturn_lst(_str,_ix,lenasel)=iflen=0thenaccuelseres_len+len,el::trans_lstinfunction|SubstString(ix,len)->return_lst(templ,ix,len)|Backref0->letprog_name=Sys.argv.(0)inreturn_lst(prog_name,0,String.lengthprog_name)|Backrefn->letoffset=nlsl1inletstart=Array.unsafe_getovectoroffsetinletlen=Array.unsafe_getovector(offset+1)-startinreturn_lst(subj,start,len)|Match->return_lst(subj,prefix_len,last-prefix_len)|PreMatch->return_lst(subj,0,prefix_len)|PostMatch->return_lst(subj,last,String.lengthsubj-last)|LastParenMatch->letsubgroups2_2=subgroups2-2inletpos=refsubgroups2_2inletix=ref(Array.unsafe_getovectorsubgroups2_2)inwhile!ix<0doletpos_2=!pos-2inpos:=pos_2;ix:=Array.unsafe_getovectorpos_2done;return_lst(subj,!ix,Array.unsafe_getovector(!pos+1)-!ix)inList.fold_leftcoll(0,[])subst_lstletreplace?(iflags=0)?flags?(rex=def_rex)?pat?(pos=0)?(itempl=def_subst)?templ?calloutsubj=letrex=matchpatwithSomestr->regexpstr|_->rexinletiflags=matchflagswithSomeflags->rflagsflags|_->iflagsinlettempl,max_br,with_lp,subst_lst=matchtemplwith|Somestr->subststr|_->itemplinletsubj_len=String.lengthsubjinifpos<0||pos>subj_lentheninvalid_arg"Pcre.replace: illegal offset";letsubgroups2,ovector=make_ovectorrexinletnsubs=(subgroups2lsr1)-1inifmax_br>nsubsthenfailwith"Pcre.replace: backreference denotes nonexistent subpattern";ifwith_lp&&nsubs=0thenfailwith"Pcre.replace: no backreferences";letrecloopfull_lentrans_lstscur_pos=ifcur_pos>subj_len||tryunsafe_pcre_execiflagsrex~pos:cur_pos~subj_start:0~subjovectorcallout;falsewithNot_found->truethenletpostfix_len=max(subj_len-cur_pos)0inletleft=pos+full_leninletres=Bytes.create(left+postfix_len)inbytes_unsafe_blit_stringsubj0res0pos;bytes_unsafe_blit_stringsubjcur_posresleftpostfix_len;letinner_collofs(templ,ix,len)=bytes_unsafe_blit_stringtemplixresofslen;ofs+leninletcollofs(res_len,trans_lst)=letnew_ofs=ofs-res_leninlet_=List.fold_leftinner_collnew_ofstrans_lstinnew_ofsinlet_=List.fold_leftcolllefttrans_lstsinBytes.unsafe_to_stringreselseletfirst=Array.unsafe_getovector0inletlen=first-cur_posinletres_len,_astrans_lst_el=calc_trans_lstsubgroups2ovectorsubjtemplsubst_lstinlettrans_lsts=iflen>0thentrans_lst_el::(len,[(subj,cur_pos,len)])::trans_lstselsetrans_lst_el::trans_lstsinletfull_len=full_len+len+res_leninletnext=first+1inletlast=Array.unsafe_getovector1iniflast<nexttheniffirst<subj_lenthenletnew_trans_lsts=(1,[(subj,cur_pos+len,1)])::trans_lstsinloop(full_len+1)new_trans_lstsnextelseloopfull_lentrans_lstsnextelseloopfull_lentrans_lstslastinloop0[]posletqreplace?(iflags=0)?flags?(rex=def_rex)?pat?(pos=0)?(templ="")?calloutsubj=letrex=matchpatwithSomestr->regexpstr|_->rexinletiflags=matchflagswithSomeflags->rflagsflags|_->iflagsinletsubj_len=String.lengthsubjinifpos<0||pos>subj_lentheninvalid_arg"Pcre.qreplace: illegal offset";lettempl_len=String.lengthtemplinlet_,ovector=make_ovectorrexinletrecloopfull_lensubst_lstcur_pos=ifcur_pos>subj_len||tryunsafe_pcre_execiflagsrex~pos:cur_pos~subj_start:0~subjovectorcallout;falsewithNot_found->truethenletpostfix_len=max(subj_len-cur_pos)0inletleft=pos+full_leninletres=Bytes.create(left+postfix_len)inbytes_unsafe_blit_stringsubj0res0pos;bytes_unsafe_blit_stringsubjcur_posresleftpostfix_len;letcollofs=function|Some(substr,ix,len)->letnew_ofs=ofs-leninbytes_unsafe_blit_stringsubstrixresnew_ofslen;new_ofs|None->letnew_ofs=ofs-templ_leninbytes_unsafe_blit_stringtempl0resnew_ofstempl_len;new_ofsinlet_=List.fold_leftcollleftsubst_lstinBytes.unsafe_to_stringreselseletfirst=Array.unsafe_getovector0inletlen=first-cur_posinletsubst_lst=iflen>0thenNone::Some(subj,cur_pos,len)::subst_lstelseNone::subst_lstinletlast=Array.unsafe_getovector1inletfull_len=full_len+len+templ_leninletnext=first+1iniflast<nexttheniffirst<subj_lenthenloop(full_len+1)(Some(subj,cur_pos+len,1)::subst_lst)nextelseloopfull_lensubst_lstnextelseloopfull_lensubst_lstlastinloop0[]posletsubstitute_substrings?(iflags=0)?flags?(rex=def_rex)?pat?(pos=0)?callout~substsubj=letrex=matchpatwithSomestr->regexpstr|_->rexinletiflags=matchflagswithSomeflags->rflagsflags|_->iflagsinletsubj_len=String.lengthsubjinifpos<0||pos>subj_lentheninvalid_arg"Pcre.substitute: illegal offset";let_,ovector=make_ovectorrexinletrecloopfull_lensubst_lstcur_pos=ifcur_pos>subj_len||tryunsafe_pcre_execiflagsrex~pos:cur_pos~subj_start:0~subjovectorcallout;falsewithNot_found->truethenletpostfix_len=max(subj_len-cur_pos)0inletleft=pos+full_leninletres=Bytes.create(left+postfix_len)inbytes_unsafe_blit_stringsubj0res0pos;bytes_unsafe_blit_stringsubjcur_posresleftpostfix_len;letcollofs(templ,ix,len)=letnew_ofs=ofs-leninbytes_unsafe_blit_stringtemplixresnew_ofslen;new_ofsinlet_=List.fold_leftcollleftsubst_lstinBytes.unsafe_to_stringreselseletfirst=Array.unsafe_getovector0inletlen=first-cur_posinlettempl=subst(subj,ovector)inlettempl_len=String.lengthtemplinletsubst_lst=iflen>0then(templ,0,templ_len)::(subj,cur_pos,len)::subst_lstelse(templ,0,templ_len)::subst_lstinletlast=Array.unsafe_getovector1inletfull_len=full_len+len+templ_leninletnext=first+1iniflast<nexttheniffirst<subj_lenthenloop(full_len+1)((subj,cur_pos+len,1)::subst_lst)nextelseloopfull_lensubst_lstnextelseloopfull_lensubst_lstlastinloop0[]posletsubstitute?iflags?flags?rex?pat?pos?callout~subst:str_substsubj=letsubst(subj,ovector)=letfirst=Array.unsafe_getovector0inletlast=Array.unsafe_getovector1instr_subst(string_unsafe_subsubjfirst(last-first))insubstitute_substrings?iflags?flags?rex?pat?pos?callout~substsubjletreplace_first?(iflags=0)?flags?(rex=def_rex)?pat?(pos=0)?(itempl=def_subst)?templ?calloutsubj=letrex=matchpatwithSomestr->regexpstr|_->rexinletiflags=matchflagswithSomeflags->rflagsflags|_->iflagsinlettempl,max_br,with_lp,subst_lst=matchtemplwith|Somestr->subststr|_->itemplinletsubgroups2,ovector=make_ovectorrexinletnsubs=(subgroups2lsr1)-1inifmax_br>nsubsthenfailwith"Pcre.replace_first: backreference denotes nonexistent subpattern";ifwith_lp&&nsubs=0thenfailwith"Pcre.replace_first: no backreferences";tryunsafe_pcre_execiflagsrex~pos~subj_start:0~subjovectorcallout;letres_len,trans_lst=calc_trans_lstsubgroups2ovectorsubjtemplsubst_lstinletfirst=Array.unsafe_getovector0inletlast=Array.unsafe_getovector1inletrest=String.lengthsubj-lastinletres=Bytes.create(first+res_len+rest)inbytes_unsafe_blit_stringsubj0res0first;letcollofs(templ,ix,len)=bytes_unsafe_blit_stringtemplixresofslen;ofs+leninletofs=List.fold_leftcollfirsttrans_lstinbytes_unsafe_blit_stringsubjlastresofsrest;Bytes.unsafe_to_stringreswithNot_found->subjletqreplace_first?(iflags=0)?flags?(rex=def_rex)?pat?(pos=0)?(templ="")?calloutsubj=letrex=matchpatwithSomestr->regexpstr|_->rexinletiflags=matchflagswithSomeflags->rflagsflags|_->iflagsinlet_,ovector=make_ovectorrexintryunsafe_pcre_execiflagsrex~pos~subj_start:0~subjovectorcallout;letfirst=Array.unsafe_getovector0inletlast=Array.unsafe_getovector1inletlen=String.lengthtemplinletrest=String.lengthsubj-lastinletpostfix_start=first+leninletres=Bytes.create(postfix_start+rest)inbytes_unsafe_blit_stringsubj0res0first;bytes_unsafe_blit_stringtempl0resfirstlen;bytes_unsafe_blit_stringsubjlastrespostfix_startrest;Bytes.unsafe_to_stringreswithNot_found->subjletsubstitute_substrings_first?(iflags=0)?flags?(rex=def_rex)?pat?(pos=0)?callout~substsubj=letrex=matchpatwithSomestr->regexpstr|_->rexinletiflags=matchflagswithSomeflags->rflagsflags|_->iflagsinlet_,ovector=make_ovectorrexintryunsafe_pcre_execiflagsrex~pos~subj_start:0~subjovectorcallout;letsubj_len=String.lengthsubjinletprefix_len=Array.unsafe_getovector0inletlast=Array.unsafe_getovector1inlettempl=subst(subj,ovector)inletpostfix_len=subj_len-lastinlettempl_len=String.lengthtemplinletpostfix_start=prefix_len+templ_leninletres=Bytes.create(postfix_start+postfix_len)inbytes_unsafe_blit_stringsubj0res0prefix_len;bytes_unsafe_blit_stringtempl0resprefix_lentempl_len;bytes_unsafe_blit_stringsubjlastrespostfix_startpostfix_len;Bytes.unsafe_to_stringreswithNot_found->subjletsubstitute_first?iflags?flags?rex?pat?pos?callout~subst:str_substsubj=letsubst(subj,ovector)=letfirst=Array.unsafe_getovector0inletlast=Array.unsafe_getovector1instr_subst(string_unsafe_subsubjfirst(last-first))insubstitute_substrings_first?iflags?flags?rex?pat?pos?callout~substsubj(* Splitting *)letinternal_psplitflagsrexmaxposcalloutsubj=letsubj_len=String.lengthsubjinifsubj_len=0then[]elseifmax=1then[subj]elseletsubgroups2,ovector=make_ovectorrexin(* Adds contents of subgroups to the string accumulator *)lethandle_subgroupsstrs=letstrs=refstrsinleti=ref2inwhile!i<subgroups2doletfirst=Array.unsafe_getovector!iinincri;letlast=Array.unsafe_getovector!iinletstr=iffirst<0then""elsestring_unsafe_subsubjfirst(last-first)instrs:=str::!strs;incridone;!strsin(* Performs the recursive split *)letrecloopstrscntposprematch=letlen=subj_len-posiniflen<0thenstrselse(* Checks termination due to max restriction *)ifcnt=0thenifprematch&&tryunsafe_pcre_execflagsrex~pos~subj_start:pos~subjovectorcallout;truewithNot_found->falsethenletlast=Array.unsafe_getovector1inletstrs=handle_subgroupsstrsinstring_unsafe_subsubjlast(subj_len-last)::strselsestring_unsafe_subsubjposlen::strs(* Calculates next accumulator state for splitting *)elseiftryunsafe_pcre_execflagsrex~pos~subj_start:pos~subjovectorcallout;falsewithNot_found->truethenstring_unsafe_subsubjposlen::strselseletfirst=Array.unsafe_getovector0inletlast=Array.unsafe_getovector1iniffirst=postheniflast=posthenletstrs=ifprematchthenhandle_subgroupsstrselsestrsiniflen=0then""::strselseiftryunsafe_pcre_exec(flagslor0x0410)rex~pos~subj_start:pos~subjovectorcallout;truewithNot_found->falsethenletnew_strs=handle_subgroups(""::strs)inloopnew_strs(cnt-1)(Array.unsafe_getovector1)falseelseletnew_strs=string_unsafe_subsubjpos1::strsinloopnew_strs(cnt-1)(pos+1)trueelseifprematchthenloop(handle_subgroupsstrs)cntlastfalseelseloop(handle_subgroups(""::strs))(cnt-1)lastfalseelseletnew_strs=string_unsafe_subsubjpos(first-pos)::strsinloop(handle_subgroupsnew_strs)(cnt-1)lastfalseinloop[](max-1)posfalseletrecstrip_all_empty=function""::t->strip_all_emptyt|l->lexternalisspace:char->bool="pcre_isspace_stub"[@@noalloc]letrecfind_no_spaceixlenstr=ifix=len||not(isspace(String.unsafe_getstrix))thenixelsefind_no_space(ix+1)lenstrletsplit?(iflags=0)?flags?rex?pat?(pos=0)?(max=0)?calloutsubj=letiflags=matchflagswithSomeflags->rflagsflags|_->iflagsinletres=matchpat,rexwith|Somestr,_->internal_psplitiflags(regexpstr)maxposcalloutsubj|_,Somerex->internal_psplitiflagsrexmaxposcalloutsubj|_->(* special case for Perl-splitting semantics *)letlen=String.lengthsubjinifpos>len||pos<0thenfailwith"Pcre.split: illegal offset";letnew_pos=find_no_spaceposlensubjininternal_psplitiflagsdef_rexmaxnew_poscalloutsubjinList.rev(ifmax=0thenstrip_all_emptyreselseres)letasplit?iflags?flags?rex?pat?pos?max?calloutsubj=Array.of_list(split?iflags?flags?rex?pat?pos?max?calloutsubj)(* Full splitting *)typesplit_result=Textofstring|Delimofstring|Groupofint*string|NoGroupletrecstrip_all_empty_full=function|Delim_::rest->strip_all_empty_fullrest|l->lletfull_split?(iflags=0)?flags?(rex=def_rex)?pat?(pos=0)?(max=0)?calloutsubj=letrex=matchpatwithSomestr->regexpstr|_->rexinletiflags=matchflagswithSomeflags->rflagsflags|_->iflagsinletsubj_len=String.lengthsubjinifsubj_len=0then[]elseifmax=1then[Text(subj)]elseletsubgroups2,ovector=make_ovectorrexin(* Adds contents of subgroups to the string accumulator *)lethandle_subgroupsstrs=letstrs=refstrsinleti=ref2inwhile!i<subgroups2doletgroup_nr=!ilsr1inletfirst=Array.unsafe_getovector!iinincri;letlast=Array.unsafe_getovector!iinletstr=iffirst<0thenNoGroupelseletgroup_str=string_unsafe_subsubjfirst(last-first)inGroup(group_nr,group_str)instrs:=str::!strs;incridone;!strsin(* Performs the recursive split *)letrecloopstrscntposprematch=letlen=subj_len-posiniflen<0thenstrselse(* Checks termination due to max restriction *)ifcnt=0thenifprematch&&tryunsafe_pcre_execiflagsrex~pos~subj_start:pos~subjovectorcallout;truewithNot_found->falsethenletfirst=Array.unsafe_getovector0inletlast=Array.unsafe_getovector1inletdelim=Delim(string_unsafe_subsubjfirst(last-first))inText(string_unsafe_subsubjlast(subj_len-last))::handle_subgroups(delim::strs)elseiflen=0thenstrselseText(string_unsafe_subsubjposlen)::strs(* Calculates next accumulator state for splitting *)elseiftryunsafe_pcre_execiflagsrex~pos~subj_start:pos~subjovectorcallout;falsewithNot_found->truetheniflen=0thenstrselseText(string_unsafe_subsubjposlen)::strselseletfirst=Array.unsafe_getovector0inletlast=Array.unsafe_getovector1iniffirst=postheniflast=postheniflen=0thenhandle_subgroups(Delim""::strs)elseletempty_groups=handle_subgroups[]iniftryunsafe_pcre_exec(iflagslor0x0410)rex~pos~subj_start:pos~subjovectorcallout;truewithNot_found->falsethenletfirst=Array.unsafe_getovector0inletlast=Array.unsafe_getovector1inletdelim=Delim(string_unsafe_subsubjfirst(last-first))inletnew_strs=handle_subgroups(delim::(ifprematchthenstrselseempty_groups@(Delim""::strs)))inloopnew_strs(cnt-1)lastfalseelseletnew_strs=Text(string_unsafe_subsubjpos1)::empty_groups@Delim""::strsinloopnew_strs(cnt-1)(pos+1)trueelseletdelim=Delim(string_unsafe_subsubjfirst(last-first))inloop(handle_subgroups(delim::strs))cntlastfalseelseletdelim=Delim(string_unsafe_subsubjfirst(last-first))inletpre_strs=Text(string_unsafe_subsubjpos(first-pos))::strsinloop(handle_subgroups(delim::pre_strs))(cnt-1)lastfalseinletres=loop[](max-1)postrueinList.rev(ifmax=0thenstrip_all_empty_fullreselseres)(* Additional convenience functions useful in combination with this library *)letforeach_line?(ic=stdin)f=trywhiletruedof(input_lineic)donewithEnd_of_file->()letforeach_filefilenamesf=letdo_with_filefilename=letfile=open_infilenameintryffilenamefile;close_infilewithexn->close_infile;raiseexninList.iterdo_with_filefilenames