123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592(** N.B. when I say "[x] is a convenience function around [y]", that just means [x] can be
thought of in terms of [y]. In fact, [x] may not be implemented on top of [y], because
in many cases (e.g., find/find_all) the convenience functions assume certain defaults
that make it more efficient to drop down into C directly.
*)openCoretypettype'awithout_trailing_none='a[@@derivingsexp_of]letwithout_trailing_none=Fn.idmoduleOptions=Optionsexternalcre2__init:unit->unit="mlre2__init"externalcre2__create_re:Options.Private.C_repr.t->string->t="mlre2__create_re"externalcre2__num_submatches:t->int="mlre2__num_submatches"[@@noalloc]externalcre2__submatch_index:t->string->int="mlre2__submatch_index"[@@noalloc]externalcre2__pattern:t->string="mlre2__pattern"externalcre2__options:t->Options.Private.C_repr.t="mlre2__options"externalcre2__iter_next:t->int->int->string->int*(int*int)optionarrayoption="mlre2__iter_next"externalcre2__matches:t->string->bool="mlre2__matches"[@@noalloc]externalcre2__find_all:t->int->string->stringlist="mlre2__find_all"externalcre2__find_first:t->int->string->string="mlre2__find_first"externalcre2__rewrite_exn:t->string->string->string="mlre2__rewrite_exn"externalcre2__valid_rewrite_template:t->string->bool="mlre2__valid_rewrite_template"[@@noalloc]externalcre2__escape:string->string="mlre2__escape"typemultipleexternalcre2__multiple_create:Options.Private.C_repr.t->multiple="mlre2__multiple_create"externalcre2__multiple_add:multiple->string->int="mlre2__multiple_add"externalcre2__multiple_compile:multiple->unit="mlre2__multiple_compile"externalcre2__multiple_match:multiple->string->intarray="mlre2__multiple_match"typeregex=tmoduleExceptions=struct(** [Regex_no_such_subpattern (n, max)] means [n] was requested but only [max]
subpatterns are defined (so [max] - 1 is the highest valid index) *)exceptionRegex_no_such_subpatternofint*int(** [Regex_no_such_named_subpattern (name, pattern)] *)exceptionRegex_no_such_named_subpatternofstring*string(** [Match_failed pattern] *)exceptionRegex_match_failedofstring(** [Regex_submatch_did_not_capture (s, i)] means the [i]th subpattern in the
regex compiled from [s] did not capture a substring. *)exceptionRegex_submatch_did_not_captureofstring*int(** the string is the C library's error message, generally in the form of
"(human-readable error): (piece of pattern that did not compile)" *)exceptionRegex_compile_failedofstring(** [Regex_rewrite_template_invalid (template, error_msg)] *)exceptionRegex_rewrite_template_invalidofstring*stringlet()=(* register exceptions *)Callback.register_exception"mlre2__Regex_no_such_subpattern"(Regex_no_such_subpattern(-1,-1));Callback.register_exception"mlre2__Regex_no_such_named_subpattern"(Regex_no_such_named_subpattern("foo","bar"));Callback.register_exception"mlre2__Regex_match_failed"(Regex_match_failed"");Callback.register_exception"mlre2__Regex_submatch_did_not_capture"(Regex_submatch_did_not_capture("",0));Callback.register_exception"mlre2__Regex_compile_failed"(Regex_compile_failed"");Callback.register_exception"mlre2__Regex_rewrite_template_invalid"(Regex_rewrite_template_invalid("",""));;endincludeExceptionslet()=cre2__init()(* register custom operations *)letcreate_exn?(options=Options.default)pat=cre2__create_re(Options.Private.to_c_reproptions)pat;;letcreate?optionspat=Or_error.try_with(fun()->create_exn?optionspat)letnum_submatchest=cre2__num_submatchestletpatternt=cre2__patterntletoptionst=cre2__optionst|>Options.Private.of_c_reprletof_stringpat=create_exnpatletto_stringt=cre2__patterntmoduleStable=structopenCore.Core_stablemoduleV2=structmoduleRepr=structtypet={pattern:string;options:Options.Stable.V2.t}[@@derivingbin_io,compare,hash]let%expect_test_=print_endline[%bin_digest:t];[%expect{| 5081a6119bfacbe1515e8caf368f40e5 |}];;typet_long_sexp_serialization=string*Options.Stable.V2.t[@@derivingsexp]letsexp_of_t{pattern;options}=(* in the vast majority of cases, [t] is created with default options, therefore
we would like to treat that case with just a simple Sexp.Atom (more readable
in sexp representation) *)ifOptions.Stable.V2.is_defaultoptionsthenSexp.V1.Atompatternelse[%sexp_of:t_long_sexp_serialization](pattern,options);;lett_of_sexp=function|Sexp.V1.Atompattern->letoptions=Options.defaultin{pattern;options}|sexp->letpattern,options=[%of_sexp:t_long_sexp_serialization]sexpin{pattern;options};;endmoduleT=structtypenonrect=tletcaller_identity=Bin_prot.Shape.Uuid.of_string"1d372eb2-6c4e-11eb-bd12-aa000016704e";;letto_reprt={Repr.pattern=patternt;options=optionst}letof_repr{Repr.pattern;options}=create_exn~optionspatternletto_binable=to_reprletof_binable=of_reprletto_sexpable=to_reprletof_sexpable=of_reprendincludeTmoduleT_serializable_comparable=structincludeBinable.Of_binable.V2(Repr)(T)let%expect_test_=print_endline[%bin_digest:t];[%expect{| b22d9edbef943331b08f0d5df92d4b75 |}];;includeSexpable.Of_sexpable.V1(Repr)(T)letcomparet1t2=Repr.compare(T.to_reprt1)(T.to_reprt2)include(valComparator.V1.make~compare~sexp_of_t)endincludeT_serializable_comparablelethasht=Repr.hash(T.to_reprt)lethash_fold_tstatet=Repr.hash_fold_tstate(T.to_reprt)includeComparable.V1.Make(structincludeTincludeT_serializable_comparableend)endmoduleV1_no_options=structmoduleT=structtypenonrect=tletof_stringpat=create_exn~options:Options.defaultpatletto_stringt=patterntendincludeTmoduleTS=structincludeBinable.Of_stringable.V1[@alert"-legacy"](T)let%expect_test_=print_endline[%bin_digest:t];[%expect{| d9a8da25d5656b016fb4dbdc2e4197fb |}];;includeSexpable.Of_stringable.V1(T)letcomparet1t2=String.V1.compare(to_stringt1)(to_stringt2)lethasht=String.V1.hash(to_stringt)lethash_fold_tst=String.V1.hash_fold_ts(to_stringt)endincludeTSendendincludeStable.V2.T_serializable_comparabletypeid_t=[`Indexofint|`Nameofstring]letindex_of_id_exnt=function|`Indexi->letmax=num_submatchestinifi<0||i>maxthenraise(Regex_no_such_subpattern(i,max))elsei|`Namename->leti=cre2__submatch_indextnameinifi<0||i>num_submatchestthenraise(Regex_no_such_named_subpattern(name,patternt))elsei;;moduleMatch=structtypet={rex:(regex[@sexp.opaque]);input:string;captures:(int*int)optionarray}[@@derivingsexp_of]letget_pos_exn~subt=leti=index_of_id_exnt.rexsubinletlength=Array.lengtht.capturesinifi<0||i>=lengththenraise(Regex_no_such_subpattern(i,length))else(matcht.captures.(i)with|None->raise(Regex_submatch_did_not_capture(cre2__patternt.rex,i))|Someretval->retval);;letget_exn~subt=letpos,len=get_pos_exn~subtinString.subt.input~pos~len;;letget~subt=Option.map(tryt.captures.(index_of_id_exnt.rexsub)with|_->None)~f:(fun(pos,len)->String.subt.input~pos~len);;letget_all{captures;input;rex=_}=Array.mapcaptures~f:(Option.map~f:(fun(pos,len)->String.subinput~pos~len));;(* not exposed in mli *)letcreate~rexcaptures~input={rex;input;captures}endletto_sequence_exn?subtinput=letn=matchsubwith|None->-1|Some(`Indexn)->ifn>=0thennelse0|Some(`Name_asname)->index_of_id_exntnameinSequence.unfold~init:0~f:(funpos->ifpos<0thenNoneelse(letpos,matches=cre2__iter_nexttposninputinOption.mapmatches~f:(funm->Match.create~rex:t~inputm,pos)));;letfind_all_exn?(sub=`Index0)tinput=cre2__find_allt(index_of_id_exntsub)input;;letfind_all?subtinput=Or_error.try_with(fun()->find_all_exn?subtinput)letfind_first_exn?(sub=`Index0)tinput=cre2__find_firstt(index_of_id_exntsub)input;;letfind_first?subtinput=Or_error.try_with(fun()->find_first_exn?subtinput)letfind_submatches_exntinput=letn=num_submatchestinletseq=to_sequence_exn~sub:(`Indexn)tinputinletmatches=matchSequence.nextseqwith|None->raise(Regex_match_failed(cre2__patternt))|Some(m,_)->minArray.initn~f:(funi->Match.get~sub:(`Indexi)matches);;letfind_submatchestinput=Or_error.try_with(fun()->find_submatches_exntinput)letmatchestinput=cre2__matchestinputletget_matches_exn?sub?maxtinput=letseq=to_sequence_exn?subtinputinletseq=matchmaxwith|None->seq|Somelimit->Sequence.takeseqlimitinSequence.to_listseq;;letget_matches?sub?maxtinput=Or_error.try_with(fun()->get_matches_exn?sub?maxtinput);;letfirst_match_exntinput=List.hd_exn(get_matches_exntinput~max:1)letfirst_matchtinput=Or_error.try_with(fun()->first_match_exntinput)moduleSubstring=structtypet={src:string;src_pos:int;len:int}letcreate~pos~lensrc={src;src_pos=pos;len}letof_stringsrc={src;src_pos=0;len=String.lengthsrc}letconcat_string~lensubstrings:string=letdst=Bytes.createleninignore(List.fold_leftsubstrings~init:0~f:(fundst_pos{src;src_pos;len}->Bytes.From_string.blit~src~src_pos~dst~dst_pos~len;dst_pos+len):int);Bytes.unsafe_to_string~no_mutation_while_string_reachable:dst;;endmoduleReturn=structletsubstringsstr(pos,len)=Substring.create~pos~lenstrletstringssrc(src_pos,len)=String.subsrc~pos:src_pos~lenendletsplit_internal?(include_matches=false)returninput(matches:Match.tlist)=(* if additional speed is needed, maybe try optimizing away the closures *)letgaps~pos~acc~pos'~len':_=returninput(pos,pos'-pos)::accinletboth~pos~acc~pos'~len'=returninput(pos',len')::returninput(pos,pos'-pos)::accinletfg(pos,acc)m=letpos',len'=Match.get_pos_exn~sub:(`Index0)minpos'+len',g~pos~acc~pos'~len'inletlast_sep,acc=List.fold_left~init:(0,[])matches~f:(ifinclude_matchesthenfbothelsefgaps)inList.rev(returninput(last_sep,String.lengthinput-last_sep)::acc);;letsplit?max?(include_matches=false)tinput=letmatches=get_matches_exn?max~sub:(`Index1)tinputinsplit_internal~include_matchesReturn.stringsinputmatches;;letreplace_exn?sub?only~ftinput=letonly'=matchonlywith|Somei->Some(i+1)|None->Noneinletmatches=get_matches_exn?sub?max:only'tinputinletgaps=split_internalReturn.substringsinputmatchesinletreplacements=letwhole_matchm=Match.get_exn~sub:(`Index0)minletf'fm=Substring.of_string(fm)inmatchonlywith|None->List.rev_map~f:(f'f)matches|Someto_be_replaced->List.rev_mapimatches~f:(funi->f'(ifi=to_be_replacedthenfelsewhole_match))inletrecinterleave(len,acc)l=function|[]->letlen'=List.fold_left~init:0l~f:(funx{Substring.len;src=_;src_pos=_}->x+len)inlen+len',List.rev(List.rev_appendlacc)|h::tl->interleave(len+h.Substring.len,h::acc)tllinletlen,substrings=interleave(0,[])(List.revreplacements)gapsinSubstring.concat_string~lensubstrings;;letreplace?sub?only~ftinput=Or_error.try_with(fun()->replace_exn?sub?only~ftinput);;letrewrite_exnt~templateinput=cre2__rewrite_exntinputtemplateletrewritet~templateinput=Or_error.try_with(fun()->rewrite_exnt~templateinput);;letvalid_rewrite_templatet~template=cre2__valid_rewrite_templatettemplateletescapeinput=cre2__escapeinputmoduleMultiple=structtype'at={set:multiple;vals:'aarray}letcreate_exn?(options=Options.default)entries=lett={set=cre2__multiple_create(Options.Private.to_c_reproptions);vals=Array.of_list(List.map~f:sndentries)}inList.iterientries~f:(funexpected(pat,_)->letobserved=cre2__multiple_addt.setpatinifInt.(<>)expectedobservedthenraise_s[%message"cre2__multiple_add returned unexpected index."(expected:int)(observed:int)]);cre2__multiple_compilet.set;t;;letcreate?optionsentries=Or_error.try_with(fun()->create_exn?optionsentries)letvalues_of_indicestindices=Array.fold_rightindices~init:[]~f:(funiacc->t.vals.(i)::acc);;letmatches_no_orderts=values_of_indicest(cre2__multiple_matcht.sets)letmatchests=letindices=cre2__multiple_matcht.setsinArray.sortindices~compare:Int.compare;values_of_indicestindices;;endmoduleInfix=structlet(=~)inputt=matchestinputendlet%test_module_=(modulestructlet%test_=letre=create_exn"^(.*)\\\\"inletbuf=Bin_prot.Common.create_buf100inignore(Stable.V1_no_options.bin_write_tbuf~pos:0re:int);Int.(=)0(comparere(Stable.V1_no_options.bin_read_tbuf~pos_ref:(ref0)));;let%test_=letre=create_exn~options:{Options.defaultwithcase_sensitive=false}"^(.*)\\\\"inletbuf=Bin_prot.Common.create_buf100inignore(Stable.V2.bin_write_tbuf~pos:0re:int);Int.(=)0(Stable.V2.comparere(Stable.V2.bin_read_tbuf~pos_ref:(ref0)));;let%test_=letre=create_exn"^(.*)\\\\"inInt.(=)0(comparere(Stable.V2.t_of_sexp(sexp_of_tre)));;let%test_=letfoo,a_star,dot_capture=create_exn"foo",create_exn"a*",create_exn"(.)"inlet(<)ab=compareab<0ina_star<foo&&dot_capture<a_star&&dot_capture<foo;;let%test_unit_=letre=create_exn"^"inmatchget_matches_exnre"XYZ"with|[the_match]->[%test_eq:int*int](0,0)(Match.get_pos_exn~sub:(`Index0)the_match)|other->raise_s[%sexp"expected exactly one match",(other:Match.tlist)];;let%test_unit_=letre=create_exn"^"in[%test_eq:string]"aXYZ"(replace_exnre"XYZ"~f:(const"a"));;end);;let%bench_fun("find_submatches with many Nones"[@indexedn=[5;10;50;100;200]])=letregex="^"^String.concat~sep:"|"(List.initn~f:(funi->"("^Int.to_stringi^")"))^"$"|>create_exninfun()->let_r=find_submatchesregex(Int.to_stringn)in();;let%expect_test"roundtrip Stable.V2.t_of_sexp and Stable.V2.sexp_of_t"=[{|""|};"^sim?ple*";"(cAse ((case_insensitive)))";"(cAse ((case_insensitive) (encoding Latin1)))"]|>List.iter~f:(funs->Sexp.of_string_conv_exnsStable.V2.t_of_sexp|>Stable.V2.sexp_of_t|>print_s);[%expect{|
""
^sim?ple*
(cAse ((case_insensitive)))
(cAse ((case_insensitive) (encoding Latin1)))
|}];;let%expect_test"behavior of options wrt comparison/hashing"=lett1x=create_exn~options:{Options.defaultwithcase_sensitive=true}xinlett2x=create_exn~options:{Options.defaultwithcase_sensitive=false}xin(lett1=t1""andt2=t2""inassert(not([%compare.equal:t]t1t2));assert([%compare.equal:Stable.V1_no_options.t]t1t2);assert(not([%compare.equal:Stable.V2.t]t1t2)));letstable_v2_unequal=List.filter["";"1";"2";"3"]~f:(funstr->lethhash=hash(t1str)=hash(t2str)inassert(h[%hash:Stable.V1_no_options.t]);not(h[%hash:Stable.V2.t]))inassert(not(List.is_emptystable_v2_unequal));;let%test_unit"t preserved via Stable.V2.sexp_of_t and Stable.V2.t_of_sexp"=letf_pattern=patternandf_options=optionsinList.iter["",None;"^sim?ple*",None;"cAse",Some{Options.defaultwithcase_sensitive=false};"cAse",Some{Options.defaultwithcase_sensitive=false;encoding=Latin1}]~f:(fun(pattern,options)->lett=create_exn?optionspattern|>Stable.V2.sexp_of_t|>Stable.V2.t_of_sexpinletoptions=Option.valueoptions~default:Options.defaultin[%test_eq:string*Options.t](f_patternt,f_optionst)(pattern,options));;includeComparable.Make_plain_using_comparator(structtypenonrect=tincludeStable.V2.T_serializable_comparableend)includeHashable.Make_plain(structtypenonrect=tincludeStable.V2.T_serializable_comparablelethash=Stable.V2.hashlethash_fold_t=Stable.V2.hash_fold_tend)