123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488open!ImportmoduleArray=Array0moduleBytes=Bytes0includeString0letinvalid_argf=Printf.invalid_argfletraise_s=Error.raise_sletstage=Staged.stagemoduleT=structtypet=string[@@deriving_inlinehash,sexp,sexp_grammar]let(hash_fold_t:Ppx_hash_lib.Std.Hash.state->t->Ppx_hash_lib.Std.Hash.state)=hash_fold_stringand(hash:t->Ppx_hash_lib.Std.Hash.hash_value)=letfunc=hash_stringinfunx->funcx;;lett_of_sexp=(string_of_sexp:Ppx_sexp_conv_lib.Sexp.t->t)letsexp_of_t=(sexp_of_string:t->Ppx_sexp_conv_lib.Sexp.t)let(t_sexp_grammar:Ppx_sexp_conv_lib.Sexp.Private.Raw_grammar.t)=let(_the_generic_group:Ppx_sexp_conv_lib.Sexp.Private.Raw_grammar.generic_group)={implicit_vars=["string"];ggid="\146e\023\249\235eE\139c\132W\195\137\129\235\025";types=["t",Implicit_var0]}inlet(_the_group:Ppx_sexp_conv_lib.Sexp.Private.Raw_grammar.group)={gid=Ppx_sexp_conv_lib.Lazy_group_id.create();apply_implicit=[string_sexp_grammar];generic_group=_the_generic_group;origin="string.ml.T"}inlet(t_sexp_grammar:Ppx_sexp_conv_lib.Sexp.Private.Raw_grammar.t)=Ref("t",_the_group)int_sexp_grammar;;[@@@end]letcompare=compareendincludeTincludeComparator.Make(T)typeelt=charletinvariant(_:t)=()(* This is copied/adapted from 'blit.ml'.
[sub], [subo] could be implemented using [Blit.Make(Bytes)] plus unsafe casts to/from
string but were inlined here to avoid using [Bytes.unsafe_of_string] as much as possible.
*)letsubsrc~pos~len=ifpos=0&&len=String.lengthsrcthensrcelse(Ordered_collection_common.check_pos_len_exn~pos~len~total_length:(lengthsrc);letdst=Bytes.createleniniflen>0thenBytes.unsafe_blit_string~src~src_pos:pos~dst~dst_pos:0~len;Bytes.unsafe_to_string~no_mutation_while_string_reachable:dst);;letsubo?(pos=0)?lensrc=subsrc~pos~len:(matchlenwith|Somei->i|None->lengthsrc-pos);;letreccontains_unsafet~pos~end_char=pos<end_&&(Char.equal(unsafe_gettpos)char||contains_unsafet~pos:(pos+1)~end_char);;letcontains?(pos=0)?lentchar=lettotal_length=String.lengthtinletlen=Option.valuelen~default:(total_length-pos)inOrdered_collection_common.check_pos_len_exn~pos~len~total_length;contains_unsafet~pos~end_:(pos+len)char;;letis_emptyt=lengtht=0letrecindex_from_exn_internalstring~pos~len~not_foundchar=ifpos>=lenthenraisenot_foundelseifChar.equal(unsafe_getstringpos)charthenposelseindex_from_exn_internalstring~pos:(pos+1)~len~not_foundchar;;letindex_exn_internalt~not_foundchar=index_from_exn_internalt~pos:0~len:(lengtht)~not_foundchar;;letindex_exn=letnot_found=Not_found_s(Atom"String.index_exn: not found")inletindex_exntchar=index_exn_internalt~not_foundcharin(* named to preserve symbol in compiled binary *)index_exn;;letindex_from_exn=letnot_found=Not_found_s(Atom"String.index_from_exn: not found")inletindex_from_exntposchar=letlen=lengthtinifpos<0||pos>lentheninvalid_arg"String.index_from_exn"elseindex_from_exn_internalt~pos~len~not_foundcharin(* named to preserve symbol in compiled binary *)index_from_exn;;letrecrindex_from_exn_internalstring~pos~len~not_foundchar=ifpos<0thenraisenot_foundelseifChar.equal(unsafe_getstringpos)charthenposelserindex_from_exn_internalstring~pos:(pos-1)~len~not_foundchar;;letrindex_exn_internalt~not_foundchar=letlen=lengthtinrindex_from_exn_internalt~pos:(len-1)~len~not_foundchar;;letrindex_exn=letnot_found=Not_found_s(Atom"String.rindex_exn: not found")inletrindex_exntchar=rindex_exn_internalt~not_foundcharin(* named to preserve symbol in compiled binary *)rindex_exn;;letrindex_from_exn=letnot_found=Not_found_s(Atom"String.rindex_from_exn: not found")inletrindex_from_exntposchar=letlen=lengthtinifpos<-1||pos>=lentheninvalid_arg"String.rindex_from_exn"elserindex_from_exn_internalt~pos~len~not_foundcharin(* named to preserve symbol in compiled binary *)rindex_from_exn;;letindextchar=trySome(index_exntchar)with|Not_found_s_|Caml.Not_found->None;;letrindextchar=trySome(rindex_exntchar)with|Not_found_s_|Caml.Not_found->None;;letindex_fromtposchar=trySome(index_from_exntposchar)with|Not_found_s_|Caml.Not_found->None;;letrindex_fromtposchar=trySome(rindex_from_exntposchar)with|Not_found_s_|Caml.Not_found->None;;moduleSearch_pattern0=structtypet={pattern:string;case_sensitive:bool;kmp_array:intarray}letsexp_of_t{pattern;case_sensitive;kmp_array=_}:Sexp.t=List[List[Atom"pattern";sexp_of_stringpattern];List[Atom"case_sensitive";sexp_of_boolcase_sensitive]];;letpatternt=t.patternletcase_sensitivet=t.case_sensitive(* Find max number of matched characters at [next_text_char], given the current
[matched_chars]. Try to extend the current match, if chars don't match, try to match
fewer chars. If chars match then extend the match. *)letkmp_internal_loop~matched_chars~next_text_char~pattern~kmp_array~char_equal=letmatched_chars=refmatched_charsinwhile!matched_chars>0&¬(char_equalnext_text_char(unsafe_getpattern!matched_chars))domatched_chars:=Array.unsafe_getkmp_array(!matched_chars-1)done;ifchar_equalnext_text_char(unsafe_getpattern!matched_chars)thenmatched_chars:=!matched_chars+1;!matched_chars;;letget_char_equal~case_sensitive=matchcase_sensitivewith|true->Char.equal|false->Char.Caseless.equal;;(* Classic KMP pre-processing of the pattern: build the int array, which, for each i,
contains the length of the longest non-trivial prefix of s which is equal to a suffix
ending at s.[i] *)letcreatepattern~case_sensitive=letn=lengthpatterninletkmp_array=Array.create~len:n(-1)inifn>0then(letchar_equal=get_char_equal~case_sensitiveinArray.unsafe_setkmp_array00;letmatched_chars=ref0infori=1ton-1domatched_chars:=kmp_internal_loop~matched_chars:!matched_chars~next_text_char:(unsafe_getpatterni)~pattern~kmp_array~char_equal;Array.unsafe_setkmp_arrayi!matched_charsdone);{pattern;case_sensitive;kmp_array};;(* Classic KMP: use the pre-processed pattern to optimize look-behinds on non-matches.
We return int to avoid allocation in [index_exn]. -1 means no match. *)letindex_internal?(pos=0){pattern;case_sensitive;kmp_array}~in_:text=ifpos<0||pos>lengthtext-lengthpatternthen-1else(letchar_equal=get_char_equal~case_sensitiveinletj=refposinletmatched_chars=ref0inletk=lengthpatterninletn=lengthtextinwhile!j<n&&!matched_chars<kdoletnext_text_char=unsafe_gettext!jinmatched_chars:=kmp_internal_loop~matched_chars:!matched_chars~next_text_char~pattern~kmp_array~char_equal;j:=!j+1done;if!matched_chars=kthen!j-kelse-1);;letmatcheststr=index_internalt~in_:str>=0letindex?post~in_=letp=index_internal?post~in_inifp<0thenNoneelseSomep;;letindex_exn?post~in_=letp=index_internal?post~in_inifp>=0thenpelseraise_s(Sexp.message"Substring not found"["substring",sexp_of_stringt.pattern]);;letindex_all{pattern;case_sensitive;kmp_array}~may_overlap~in_:text=iflengthpattern=0thenList.init(1+lengthtext)~f:Fn.idelse(letchar_equal=get_char_equal~case_sensitiveinletmatched_chars=ref0inletk=lengthpatterninletn=lengthtextinletfound=ref[]inforj=0tondoif!matched_chars=kthen(found:=(j-k)::!found;(* we just found a match in the previous iteration *)matchmay_overlapwith|true->matched_chars:=Array.unsafe_getkmp_array(k-1)|false->matched_chars:=0);ifj<nthen(letnext_text_char=unsafe_gettextjinmatched_chars:=kmp_internal_loop~matched_chars:!matched_chars~next_text_char~pattern~kmp_array~char_equal)done;List.rev!found);;letreplace_first?post~in_:s~with_=matchindex?post~in_:swith|None->s|Somei->letlen_s=lengthsinletlen_t=lengtht.patterninletlen_with=lengthwith_inletdst=Bytes.create(len_s+len_with-len_t)inBytes.blit_string~src:s~src_pos:0~dst~dst_pos:0~len:i;Bytes.blit_string~src:with_~src_pos:0~dst~dst_pos:i~len:len_with;Bytes.blit_string~src:s~src_pos:(i+len_t)~dst~dst_pos:(i+len_with)~len:(len_s-i-len_t);Bytes.unsafe_to_string~no_mutation_while_string_reachable:dst;;letreplace_allt~in_:s~with_=letmatches=index_allt~may_overlap:false~in_:sinmatchmatcheswith|[]->s|_::_->letlen_s=lengthsinletlen_t=lengtht.patterninletlen_with=lengthwith_inletnum_matches=List.lengthmatchesinletdst=Bytes.create(len_s+((len_with-len_t)*num_matches))inletnext_dst_pos=ref0inletnext_src_pos=ref0inList.itermatches~f:(funi->letlen=i-!next_src_posinBytes.blit_string~src:s~src_pos:!next_src_pos~dst~dst_pos:!next_dst_pos~len;Bytes.blit_string~src:with_~src_pos:0~dst~dst_pos:(!next_dst_pos+len)~len:len_with;next_dst_pos:=!next_dst_pos+len+len_with;next_src_pos:=!next_src_pos+len+len_t);Bytes.blit_string~src:s~src_pos:!next_src_pos~dst~dst_pos:!next_dst_pos~len:(len_s-!next_src_pos);Bytes.unsafe_to_string~no_mutation_while_string_reachable:dst;;modulePrivate=structtypepublic=ttypenonrect=t={pattern:string;case_sensitive:bool;kmp_array:intarray}[@@deriving_inlineequal,sexp_of]letequal=(funa__001_b__002_->ifPpx_compare_lib.phys_equala__001_b__002_thentrueelsePpx_compare_lib.(&&)(equal_stringa__001_.patternb__002_.pattern)(Ppx_compare_lib.(&&)(equal_boola__001_.case_sensitiveb__002_.case_sensitive)(equal_arrayequal_inta__001_.kmp_arrayb__002_.kmp_array)):t->t->bool);;letsexp_of_t=(function|{pattern=v_pattern;case_sensitive=v_case_sensitive;kmp_array=v_kmp_array}->letbnds=[]inletbnds=letarg=sexp_of_arraysexp_of_intv_kmp_arrayinPpx_sexp_conv_lib.Sexp.List[Ppx_sexp_conv_lib.Sexp.Atom"kmp_array";arg]::bndsinletbnds=letarg=sexp_of_boolv_case_sensitiveinPpx_sexp_conv_lib.Sexp.List[Ppx_sexp_conv_lib.Sexp.Atom"case_sensitive";arg]::bndsinletbnds=letarg=sexp_of_stringv_patterninPpx_sexp_conv_lib.Sexp.List[Ppx_sexp_conv_lib.Sexp.Atom"pattern";arg]::bndsinPpx_sexp_conv_lib.Sexp.Listbnds:t->Ppx_sexp_conv_lib.Sexp.t);;[@@@end]letrepresentation=Fn.idendendmoduleSearch_pattern_helper=structmoduleSearch_pattern=Search_pattern0endopenSearch_pattern_helperletsubstr_index_gen~case_sensitive?post~pattern=Search_pattern.index?pos(Search_pattern.create~case_sensitivepattern)~in_:t;;letsubstr_index_exn_gen~case_sensitive?post~pattern=Search_pattern.index_exn?pos(Search_pattern.create~case_sensitivepattern)~in_:t;;letsubstr_index_all_gen~case_sensitivet~may_overlap~pattern=Search_pattern.index_all(Search_pattern.create~case_sensitivepattern)~may_overlap~in_:t;;letsubstr_replace_first_gen~case_sensitive?post~pattern=Search_pattern.replace_first?pos(Search_pattern.create~case_sensitivepattern)~in_:t;;letsubstr_replace_all_gen~case_sensitivet~pattern=Search_pattern.replace_all(Search_pattern.create~case_sensitivepattern)~in_:t;;letis_substring_gen~case_sensitivet~substring=Option.is_some(substr_index_gent~pattern:substring~case_sensitive);;letsubstr_index=substr_index_gen~case_sensitive:trueletsubstr_index_exn=substr_index_exn_gen~case_sensitive:trueletsubstr_index_all=substr_index_all_gen~case_sensitive:trueletsubstr_replace_first=substr_replace_first_gen~case_sensitive:trueletsubstr_replace_all=substr_replace_all_gen~case_sensitive:trueletis_substring=is_substring_gen~case_sensitive:trueletis_substring_at_gen=letrecloop~str~str_pos~sub~sub_pos~sub_len~char_equal=ifsub_pos=sub_lenthentrueelseifchar_equal(unsafe_getstrstr_pos)(unsafe_getsubsub_pos)thenloop~str~str_pos:(str_pos+1)~sub~sub_pos:(sub_pos+1)~sub_len~char_equalelsefalseinfunstr~pos:str_pos~substring:sub~char_equal->letstr_len=lengthstrinletsub_len=lengthsubinifstr_pos<0||str_pos>str_lentheninvalid_argf"String.is_substring_at: invalid index %d for string of length %d"str_posstr_len();str_pos+sub_len<=str_len&&loop~str~str_pos~sub~sub_pos:0~sub_len~char_equal;;letis_suffix_genstring~suffix~char_equal=letstring_len=lengthstringinletsuffix_len=lengthsuffixinstring_len>=suffix_len&&is_substring_at_genstring~pos:(string_len-suffix_len)~substring:suffix~char_equal;;letis_prefix_genstring~prefix~char_equal=letstring_len=lengthstringinletprefix_len=lengthprefixinstring_len>=prefix_len&&is_substring_at_genstring~pos:0~substring:prefix~char_equal;;moduleCaseless=structmoduleT=structtypet=string[@@deriving_inlinesexp]lett_of_sexp=(string_of_sexp:Ppx_sexp_conv_lib.Sexp.t->t)letsexp_of_t=(sexp_of_string:t->Ppx_sexp_conv_lib.Sexp.t)[@@@end]letchar_compare_caselessc1c2=Char.compare(Char.lowercasec1)(Char.lowercasec2);;letreccompare_loop~pos~string1~len1~string2~len2=ifpos=len1thenifpos=len2then0else-1elseifpos=len2then1else(letc=char_compare_caseless(unsafe_getstring1pos)(unsafe_getstring2pos)inmatchcwith|0->compare_loop~pos:(pos+1)~string1~len1~string2~len2|_->c);;letcomparestring1string2=ifphys_equalstring1string2then0elsecompare_loop~pos:0~string1~len1:(String.lengthstring1)~string2~len2:(String.lengthstring2);;lethash_fold_tstatet=letlen=lengthtinletstate=ref(hash_fold_intstatelen)inforpos=0tolen-1dostate:=hash_fold_char!state(Char.lowercase(unsafe_gettpos))done;!state;;lethasht=Hash.runhash_fold_ttletis_suffixs~suffix=is_suffix_gens~suffix~char_equal:Char.Caseless.equalletis_prefixs~prefix=is_prefix_gens~prefix~char_equal:Char.Caseless.equalletsubstr_index=substr_index_gen~case_sensitive:falseletsubstr_index_exn=substr_index_exn_gen~case_sensitive:falseletsubstr_index_all=substr_index_all_gen~case_sensitive:falseletsubstr_replace_first=substr_replace_first_gen~case_sensitive:falseletsubstr_replace_all=substr_replace_all_gen~case_sensitive:falseletis_substring=is_substring_gen~case_sensitive:falseletis_substring_at=is_substring_at_gen~char_equal:Char.Caseless.equalendincludeTincludeComparable.Make(T)endletof_string=Fn.idletto_string=Fn.idletinitn~f=ifn<0theninvalid_argf"String.init %d"n();lett=Bytes.createninfori=0ton-1doBytes.setti(fi)done;Bytes.unsafe_to_string~no_mutation_while_string_reachable:t;;letto_lists=letrecloopacci=ifi<0thenaccelseloop(s.[i]::acc)(i-1)inloop[](lengths-1);;letto_list_revs=letlen=lengthsinletrecloopacci=ifi=lenthenaccelseloop(s.[i]::acc)(i+1)inloop[]0;;letrevt=letlen=lengthtinletres=Bytes.createleninfori=0tolen-1dounsafe_setresi(unsafe_gett(len-1-i))done;Bytes.unsafe_to_string~no_mutation_while_string_reachable:res;;(** Efficient string splitting *)letlsplit2_exn=letnot_found=Not_found_s(Atom"String.lsplit2_exn: not found")inletlsplit2_exnline~on:delim=letpos=index_exn_internalline~not_founddeliminsubline~pos:0~len:pos,subline~pos:(pos+1)~len:(lengthline-pos-1)in(* named to preserve symbol in compiled binary *)lsplit2_exn;;letrsplit2_exn=letnot_found=Not_found_s(Atom"String.rsplit2_exn: not found")inletrsplit2_exnline~on:delim=letpos=rindex_exn_internalline~not_founddeliminsubline~pos:0~len:pos,subline~pos:(pos+1)~len:(lengthline-pos-1)in(* named to preserve symbol in compiled binary *)rsplit2_exn;;letlsplit2line~on=trySome(lsplit2_exnline~on)with|Not_found_s_|Caml.Not_found->None;;letrsplit2line~on=trySome(rsplit2_exnline~on)with|Not_found_s_|Caml.Not_found->None;;letrecchar_list_meml(c:char)=matchlwith|[]->false|hd::tl->Char.equalhdc||char_list_memtlc;;letsplit_genstr~on=letis_delim=matchonwith|`charc'->func->Char.equalcc'|`char_listl->func->char_list_memlcinletlen=lengthstrinletrecloopacclast_pospos=ifpos=-1thensubstr~pos:0~len:last_pos::accelseifis_delimstr.[pos]then(letpos1=pos+1inletsub_str=substr~pos:pos1~len:(last_pos-pos1)inloop(sub_str::acc)pos(pos-1))elseloopacclast_pos(pos-1)inloop[]len(len-1);;letsplitstr~on=split_genstr~on:(`charon)letsplit_on_charsstr~on:chars=split_genstr~on:(`char_listchars)letsplit_lines=letback_up_at_newline~t~pos~eol=pos:=!pos-if!pos>0&&Char.equalt.[!pos-1]'\r'then2else1;eol:=!pos+1infunt->letn=lengthtinifn=0then[]else((* Invariant: [-1 <= pos < eol]. *)letpos=ref(n-1)inleteol=refninletac=ref[]in(* We treat the end of the string specially, because if the string ends with a
newline, we don't want an extra empty string at the end of the output. *)ifChar.equalt.[!pos]'\n'thenback_up_at_newline~t~pos~eol;while!pos>=0doifChar.(<>)t.[!pos]'\n'thendecrposelse((* Because [pos < eol], we know that [start <= eol]. *)letstart=!pos+1inac:=subt~pos:start~len:(!eol-start)::!ac;back_up_at_newline~t~pos~eol)done;subt~pos:0~len:!eol::!ac);;letis_suffixs~suffix=is_suffix_gens~suffix~char_equal:Char.equalletis_prefixs~prefix=is_prefix_gens~prefix~char_equal:Char.equalletis_substring_ats~pos~substring=is_substring_at_gens~pos~substring~char_equal:Char.equal;;letwrap_sub_ntn~name~pos~len~on_error=ifn<0theninvalid_arg(name^" expecting nonnegative argument")else(trysubt~pos~lenwith|_->on_error);;letdrop_prefixtn=wrap_sub_n~name:"drop_prefix"tn~pos:n~len:(lengtht-n)~on_error:"";;letdrop_suffixtn=wrap_sub_n~name:"drop_suffix"tn~pos:0~len:(lengtht-n)~on_error:"";;letprefixtn=wrap_sub_n~name:"prefix"tn~pos:0~len:n~on_error:tletsuffixtn=wrap_sub_n~name:"suffix"tn~pos:(lengtht-n)~len:n~on_error:tletlfindi?(pos=0)t~f=letn=lengthtinletrecloopi=ifi=nthenNoneelseiffit.[i]thenSomeielseloop(i+1)inlooppos;;letfindt~f=matchlfindit~f:(fun_c->fc)with|None->None|Somei->Somet.[i];;letfind_mapt~f=letn=lengthtinletrecloopi=ifi=nthenNoneelse(matchft.[i]with|None->loop(i+1)|Some_asres->res)inloop0;;letrfindi?post~f=letrecloopi=ifi<0thenNoneelseiffit.[i]thenSomeielseloop(i-1)inletpos=matchposwith|Somepos->pos|None->lengtht-1inlooppos;;letlast_non_drop~dropt=rfindit~f:(fun_c->not(dropc))letrstrip?(drop=Char.is_whitespace)t=matchlast_non_dropt~dropwith|None->""|Somei->ifi=lengtht-1thentelseprefixt(i+1);;letfirst_non_drop~dropt=lfindit~f:(fun_c->not(dropc))letlstrip?(drop=Char.is_whitespace)t=matchfirst_non_dropt~dropwith|None->""|Some0->t|Somen->drop_prefixtn;;(* [strip t] could be implemented as [lstrip (rstrip t)]. The implementation
below saves (at least) a factor of two allocation, by only allocating the
final result. This also saves some amount of time. *)letstrip?(drop=Char.is_whitespace)t=letlength=lengthtiniflength=0||not(dropt.[0]||dropt.[length-1])thentelse(matchfirst_non_dropt~dropwith|None->""|Somefirst->(matchlast_non_dropt~dropwith|None->assertfalse|Somelast->subt~pos:first~len:(last-first+1)));;letmapit~f=letl=lengthtinlett'=Bytes.createlinfori=0tol-1doBytes.unsafe_sett'i(fit.[i])done;Bytes.unsafe_to_string~no_mutation_while_string_reachable:t';;(* repeated code to avoid requiring an extra allocation for a closure on each call. *)letmapt~f=letl=lengthtinlett'=Bytes.createlinfori=0tol-1doBytes.unsafe_sett'i(ft.[i])done;Bytes.unsafe_to_string~no_mutation_while_string_reachable:t';;letto_arrays=Array.init(lengths)~f:(funi->s.[i])letexists=letrecloopsi~len~f=i<len&&(fs.[i]||loops(i+1)~len~f)infuns~f->loops0~len:(lengths)~f;;letfor_all=letrecloopsi~len~f=i=len||(fs.[i]&&loops(i+1)~len~f)infuns~f->loops0~len:(lengths)~f;;letfoldt~init~f=letn=lengthtinletrecloopiac=ifi=nthenacelseloop(i+1)(fact.[i])inloop0init;;letfoldit~init~f=letn=lengthtinletrecloopiac=ifi=nthenacelseloop(i+1)(fiact.[i])inloop0init;;letcountt~f=Container.count~foldt~fletsummt~f=Container.sum~foldmt~fletmin_eltt=Container.min_elt~foldtletmax_eltt=Container.max_elt~foldtletfold_resultt~init~f=Container.fold_result~fold~init~ftletfold_untilt~init~f=Container.fold_until~fold~init~ftletmem=letreclooptc~pos:i~len=i<len&&(Char.equalc(unsafe_getti)||looptc~pos:(i+1)~len)infuntc->looptc~pos:0~len:(lengtht);;lettr~target~replacements=ifChar.equaltargetreplacementthenselseifmemstargetthenmaps~f:(func->ifChar.equalctargetthenreplacementelsec)elses;;lettr_multi~target~replacement=ifis_emptytargetthenstageFn.idelseifis_emptyreplacementtheninvalid_arg"tr_multi replacement is empty string"else(matchBytes_tr.tr_create_map~target~replacementwith|None->stageFn.id|Sometr_map->stage(funs->ifexistss~f:(func->Char.(<>)c(unsafe_gettr_map(Char.to_intc)))thenmaps~f:(func->unsafe_gettr_map(Char.to_intc))elses));;(* fast version, if we ever need it:
{[
let concat_array ~sep ar =
let ar_len = Array.length ar in
if ar_len = 0 then ""
else
let sep_len = length sep in
let res_len_ref = ref (sep_len * (ar_len - 1)) in
for i = 0 to ar_len - 1 do
res_len_ref := !res_len_ref + length ar.(i)
done;
let res = create !res_len_ref in
let str_0 = ar.(0) in
let len_0 = length str_0 in
blit ~src:str_0 ~src_pos:0 ~dst:res ~dst_pos:0 ~len:len_0;
let pos_ref = ref len_0 in
for i = 1 to ar_len - 1 do
let pos = !pos_ref in
blit ~src:sep ~src_pos:0 ~dst:res ~dst_pos:pos ~len:sep_len;
let new_pos = pos + sep_len in
let str_i = ar.(i) in
let len_i = length str_i in
blit ~src:str_i ~src_pos:0 ~dst:res ~dst_pos:new_pos ~len:len_i;
pos_ref := new_pos + len_i
done;
res
]} *)letconcat_array?separ=concat?sep(Array.to_listar)letconcat_map?seps~f=concat_array?sep(Array.map(to_arrays)~f)(* [filter t f] is implemented by the following algorithm.
Let [n = length t].
1. Find the lowest [i] such that [not (f t.[i])].
2. If there is no such [i], then return [t].
3. If there is such an [i], allocate a string, [out], to hold the result. [out] has
length [n - 1], which is the maximum possible output size given that there is at least
one character not satisfying [f].
4. Copy characters at indices 0 ... [i - 1] from [t] to [out].
5. Walk through characters at indices [i+1] ... [n-1] of [t], copying those that
satisfy [f] from [t] to [out].
6. If we completely filled [out], then return it. If not, return the prefix of [out]
that we did fill in.
This algorithm has the property that it doesn't allocate a new string if there's
nothing to filter, which is a common case. *)letfiltert~f=letn=lengthtinleti=ref0inwhile!i<n&&ft.[!i]doincridone;if!i=nthentelse(letout=Bytes.create(n-1)inBytes.blit_string~src:t~src_pos:0~dst:out~dst_pos:0~len:!i;letout_pos=ref!iinincri;while!i<ndoletc=t.[!i]iniffcthen(Bytes.setout!out_posc;incrout_pos);incridone;letout=Bytes.unsafe_to_string~no_mutation_while_string_reachable:outinif!out_pos=n-1thenoutelsesubout~pos:0~len:!out_pos);;letchop_prefixs~prefix=ifis_prefixs~prefixthenSome(drop_prefixs(lengthprefix))elseNone;;letchop_prefix_if_existss~prefix=ifis_prefixs~prefixthendrop_prefixs(lengthprefix)elses;;letchop_prefix_exns~prefix=matchchop_prefixs~prefixwith|Somestr->str|None->invalid_argf"String.chop_prefix_exn %S %S"sprefix();;letchop_suffixs~suffix=ifis_suffixs~suffixthenSome(drop_suffixs(lengthsuffix))elseNone;;letchop_suffix_if_existss~suffix=ifis_suffixs~suffixthendrop_suffixs(lengthsuffix)elses;;letchop_suffix_exns~suffix=matchchop_suffixs~suffixwith|Somestr->str|None->invalid_argf"String.chop_suffix_exn %S %S"ssuffix();;(* There used to be a custom implementation that was faster for very short strings
(peaking at 40% faster for 4-6 char long strings).
This new function is around 20% faster than the default hash function, but slower
than the previous custom implementation. However, the new OCaml function is well
behaved, and this implementation is less likely to diverge from the default OCaml
implementation does, which is a desirable property. (The only way to avoid the
divergence is to expose the macro redefined in hash_stubs.c in the hash.h header of
the OCaml compiler.) *)moduleHash=structexternalhash:string->int="Base_hash_string"[@@noalloc]end(* [include Hash] to make the [external] version override the [hash] from
[Hashable.Make_binable], so that we get a little bit of a speedup by exposing it as
external in the mli. *)let_=hashincludeHashincludeComparable.Validate(T)(* for interactive top-levels -- modules deriving from String should have String's pretty
printer. *)letpp=Caml.Format.pp_print_stringletof_charc=make1cletof_char_listl=lett=Bytes.create(List.lengthl)inList.iteril~f:(funic->Bytes.settic);Bytes.unsafe_to_string~no_mutation_while_string_reachable:t;;moduleEscaping=struct(* If this is changed, make sure to update [escape], which attempts to ensure all the
invariants checked here. *)letbuild_and_validate_escapeworthy_mapescapeworthy_mapescape_charfunc=letescapeworthy_map=ifList.Assoc.memescapeworthy_map~equal:Char.equalescape_charthenescapeworthy_mapelse(escape_char,escape_char)::escapeworthy_mapinletarr=Array.create~len:256(-1)inletvals=Array.create~len:256falseinletrecloop=function|[]->Okarr|(c_from,c_to)::l->letk,v=matchfuncwith|`Escape->Char.to_intc_from,c_to|`Unescape->Char.to_intc_to,c_frominifarr.(k)<>-1||vals.(Char.to_intv)thenOr_error.error_s(Sexp.message"escapeworthy_map not one-to-one"["c_from",sexp_of_charc_from;"c_to",sexp_of_charc_to;("escapeworthy_map",sexp_of_list(sexp_of_pairsexp_of_charsexp_of_char)escapeworthy_map)])else(arr.(k)<-Char.to_intv;vals.(Char.to_intv)<-true;loopl)inloopescapeworthy_map;;letescape_gen~escapeworthy_map~escape_char=matchbuild_and_validate_escapeworthy_mapescapeworthy_mapescape_char`Escapewith|Error_asx->x|Okescapeworthy->Ok(funsrc->(* calculate a list of (index of char to escape * escaped char) first, the order
is from tail to head *)letto_escape_len=ref0inletto_escape=foldisrc~init:[]~f:(funiaccc->matchescapeworthy.(Char.to_intc)with|-1->acc|n->(* (index of char to escape * escaped char) *)incrto_escape_len;(i,Char.unsafe_of_intn)::acc)inmatchto_escapewith|[]->src|_->(* [to_escape] divide [src] to [List.length to_escape + 1] pieces separated by
the chars to escape.
Lets take
{[
escape_gen_exn
~escapeworthy_map:[('a', 'A'); ('b', 'B'); ('c', 'C')]
~escape_char:'_'
]}
for example, and assume the string to escape is
"000a111b222c333"
then [to_escape] is [(11, 'C'); (7, 'B'); (3, 'A')].
Then we create a [dst] of length [length src + 3] to store the
result, copy piece "333" to [dst] directly, then copy '_' and 'C' to [dst];
then move on to next; after 3 iterations, copy piece "000" and we are done.
Finally the result will be
"000_A111_B222_C333" *)letsrc_len=lengthsrcinletdst_len=src_len+!to_escape_leninletdst=Bytes.createdst_leninletreclooplast_idxlast_dst_pos=function|[]->(* copy "000" at last *)Bytes.blit_string~src~src_pos:0~dst~dst_pos:0~len:last_idx|(idx,escaped_char)::to_escape->(*[idx] = the char to escape*)(* take first iteration for example *)(* calculate length of "333", minus 1 because we don't copy 'c' *)letlen=last_idx-idx-1in(* set the dst_pos to copy to *)letdst_pos=last_dst_pos-lenin(* copy "333", set [src_pos] to [idx + 1] to skip 'c' *)Bytes.blit_string~src~src_pos:(idx+1)~dst~dst_pos~len;(* backoff [dst_pos] by 2 to copy '_' and 'C' *)letdst_pos=dst_pos-2inBytes.setdstdst_posescape_char;Bytes.setdst(dst_pos+1)escaped_char;loopidxdst_posto_escapein(* set [last_dst_pos] and [last_idx] to length of [dst] and [src] first *)loopsrc_lendst_lento_escape;Bytes.unsafe_to_string~no_mutation_while_string_reachable:dst);;letescape_gen_exn~escapeworthy_map~escape_char=Or_error.ok_exn(escape_gen~escapeworthy_map~escape_char)|>stage;;letescape~escapeworthy~escape_char=(* For [escape_gen_exn], we don't know how to fix invalid escapeworthy_map so we have
to raise exception; but in this case, we know how to fix duplicated elements in
escapeworthy list, so we just fix it instead of raising exception to make this
function easier to use. *)letescapeworthy_map=escapeworthy|>List.dedup_and_sort~compare:Char.compare|>List.map~f:(func->c,c)inescape_gen_exn~escapeworthy_map~escape_char;;(* In an escaped string, any char is either `Escaping, `Escaped or `Literal. For
example, the escape statuses of chars in string "a_a__" with escape_char = '_' are
a : `Literal
_ : `Escaping
a : `Escaped
_ : `Escaping
_ : `Escaped
[update_escape_status str ~escape_char i previous_status] gets escape status of
str.[i] basing on escape status of str.[i - 1] *)letupdate_escape_statusstr~escape_chari=function|`Escaping->`Escaped|`Literal|`Escaped->ifChar.equalstr.[i]escape_charthen`Escapingelse`Literal;;letunescape_gen~escapeworthy_map~escape_char=matchbuild_and_validate_escapeworthy_mapescapeworthy_mapescape_char`Unescapewith|Error_asx->x|Okescapeworthy->Ok(funsrc->(* Continue the example in [escape_gen_exn], now we unescape
"000_A111_B222_C333"
back to
"000a111b222c333"
Then [to_unescape] is [14; 9; 4], which is indexes of '_'s.
Then we create a string [dst] to store the result, copy "333" to it, then copy
'c', then move on to next iteration. After 3 iterations copy "000" and we are
done. *)(* indexes of escape chars *)letto_unescape=letrecloopistatusacc=ifi>=lengthsrcthenaccelse(letstatus=update_escape_statussrc~escape_charistatusinloop(i+1)status(matchstatuswith|`Escaping->i::acc|`Escaped|`Literal->acc))inloop0`Literal[]inmatchto_unescapewith|[]->src|idx::to_unescape'->letdst=Bytes.create(lengthsrc-List.lengthto_unescape)inletreclooplast_idxlast_dst_pos=function|[]->(* copy "000" at last *)Bytes.blit_string~src~src_pos:0~dst~dst_pos:0~len:last_idx|idx::to_unescape->(* [idx] = index of escaping char *)(* take 1st iteration as example, calculate the length of "333", minus 2 to
skip '_C' *)letlen=last_idx-idx-2in(* point [dst_pos] to the position to copy "333" to *)letdst_pos=last_dst_pos-lenin(* copy "333" *)Bytes.blit_string~src~src_pos:(idx+2)~dst~dst_pos~len;(* backoff [dst_pos] by 1 to copy 'c' *)letdst_pos=dst_pos-1inBytes.setdstdst_pos(matchescapeworthy.(Char.to_intsrc.[idx+1])with|-1->src.[idx+1]|n->Char.unsafe_of_intn);(* update [last_dst_pos] and [last_idx] *)loopidxdst_posto_unescapeinifidx<lengthsrc-1then(* set [last_dst_pos] and [last_idx] to length of [dst] and [src] *)loop(lengthsrc)(Bytes.lengthdst)to_unescapeelse(* for escaped string ending with an escaping char like "000_", just ignore
the last escaping char *)loop(lengthsrc-1)(Bytes.lengthdst)to_unescape';Bytes.unsafe_to_string~no_mutation_while_string_reachable:dst);;letunescape_gen_exn~escapeworthy_map~escape_char=Or_error.ok_exn(unescape_gen~escapeworthy_map~escape_char)|>stage;;letunescape~escape_char=unescape_gen_exn~escapeworthy_map:[]~escape_charletpreceding_escape_charsstr~escape_charpos=letreclooppcnt=ifp<0||Char.(<>)str.[p]escape_charthencntelseloop(p-1)(cnt+1)inloop(pos-1)0;;(* In an escaped string, any char is either `Escaping, `Escaped or `Literal. For
example, the escape statuses of chars in string "a_a__" with escape_char = '_' are
a : `Literal
_ : `Escaping
a : `Escaped
_ : `Escaping
_ : `Escaped
[update_escape_status str ~escape_char i previous_status] gets escape status of
str.[i] basing on escape status of str.[i - 1] *)letupdate_escape_statusstr~escape_chari=function|`Escaping->`Escaped|`Literal|`Escaped->ifChar.equalstr.[i]escape_charthen`Escapingelse`Literal;;letescape_statusstr~escape_charpos=letodd=preceding_escape_charsstr~escape_charposmod2=1inmatchodd,Char.equalstr.[pos]escape_charwith|true,(true|false)->`Escaped|false,true->`Escaping|false,false->`Literal;;letcheck_boundstrposfunction_name=ifpos>=lengthstr||pos<0theninvalid_argf"%s: out of bounds"function_name();;letis_char_escapingstr~escape_charpos=check_boundstrpos"is_char_escaping";matchescape_statusstr~escape_charposwith|`Escaping->true|`Escaped|`Literal->false;;letis_char_escapedstr~escape_charpos=check_boundstrpos"is_char_escaped";matchescape_statusstr~escape_charposwith|`Escaped->true|`Escaping|`Literal->false;;letis_char_literalstr~escape_charpos=check_boundstrpos"is_char_literal";matchescape_statusstr~escape_charposwith|`Literal->true|`Escaped|`Escaping->false;;letindex_fromstr~escape_charposchar=check_boundstrpos"index_from";letrecloopistatus=ifi>=pos&&(matchstatuswith|`Literal->true|`Escaped|`Escaping->false)&&Char.equalstr.[i]charthenSomeielse(leti=i+1inifi>=lengthstrthenNoneelseloopi(update_escape_statusstr~escape_charistatus))inlooppos(escape_statusstr~escape_charpos);;letindex_from_exnstr~escape_charposchar=matchindex_fromstr~escape_charposcharwith|None->raise_s(Sexp.message"index_from_exn: not found"["str",sexp_of_tstr;"escape_char",sexp_of_charescape_char;"pos",sexp_of_intpos;"char",sexp_of_charchar])|Somepos->pos;;letindexstr~escape_charchar=index_fromstr~escape_char0charletindex_exnstr~escape_charchar=index_from_exnstr~escape_char0charletrindex_fromstr~escape_charposchar=check_boundstrpos"rindex_from";(* if the target char is the same as [escape_char], we have no way to determine which
escape_char is literal, so just return None *)ifChar.equalcharescape_charthenNoneelse(letreclooppos=ifpos<0thenNoneelse(letescape_chars=preceding_escape_charsstr~escape_charposinifescape_charsmod2=0&&Char.equalstr.[pos]charthenSomeposelseloop(pos-escape_chars-1))inlooppos);;letrindex_from_exnstr~escape_charposchar=matchrindex_fromstr~escape_charposcharwith|None->raise_s(Sexp.message"rindex_from_exn: not found"["str",sexp_of_tstr;"escape_char",sexp_of_charescape_char;"pos",sexp_of_intpos;"char",sexp_of_charchar])|Somepos->pos;;letrindexstr~escape_charchar=ifis_emptystrthenNoneelserindex_fromstr~escape_char(lengthstr-1)char;;letrindex_exnstr~escape_charchar=rindex_from_exnstr~escape_char(lengthstr-1)char;;(* [split_gen str ~escape_char ~on] works similarly to [String.split_gen], with an
additional requirement: only split on literal chars, not escaping or escaped *)letsplit_genstr~escape_char~on=letis_delim=matchonwith|`charc'->func->Char.equalcc'|`char_listl->func->char_list_memlcinletlen=lengthstrinletrecloopaccstatuslast_pospos=ifpos=lenthenList.rev(substr~pos:last_pos~len:(len-last_pos)::acc)else(letstatus=update_escape_statusstr~escape_charposstatusinif(matchstatuswith|`Literal->true|`Escaped|`Escaping->false)&&is_delimstr.[pos]then(letsub_str=substr~pos:last_pos~len:(pos-last_pos)inloop(sub_str::acc)status(pos+1)(pos+1))elseloopaccstatuslast_pos(pos+1))inloop[]`Literal00;;letsplitstr~on=split_genstr~on:(`charon)letsplit_on_charsstr~on:chars=split_genstr~on:(`char_listchars)letsplit_atstrpos=substr~pos:0~len:pos,substr~pos:(pos+1)~len:(lengthstr-pos-1);;letlsplit2str~on~escape_char=Option.map(indexstr~escape_charon)~f:(funx->split_atstrx);;letrsplit2str~on~escape_char=Option.map(rindexstr~escape_charon)~f:(funx->split_atstrx);;letlsplit2_exnstr~on~escape_char=split_atstr(index_exnstr~escape_charon)letrsplit2_exnstr~on~escape_char=split_atstr(rindex_exnstr~escape_charon)(* [last_non_drop_literal] and [first_non_drop_literal] are either both [None] or both
[Some]. If [Some], then the former is >= the latter. *)letlast_non_drop_literal~drop~escape_chart=rfindit~f:(funic->(not(dropc))||is_char_escapingt~escape_chari||is_char_escapedt~escape_chari);;letfirst_non_drop_literal~drop~escape_chart=lfindit~f:(funic->(not(dropc))||is_char_escapingt~escape_chari||is_char_escapedt~escape_chari);;letrstrip_literal?(drop=Char.is_whitespace)t~escape_char=matchlast_non_drop_literalt~drop~escape_charwith|None->""|Somei->ifi=lengtht-1thentelseprefixt(i+1);;letlstrip_literal?(drop=Char.is_whitespace)t~escape_char=matchfirst_non_drop_literalt~drop~escape_charwith|None->""|Some0->t|Somen->drop_prefixtn;;(* [strip t] could be implemented as [lstrip (rstrip t)]. The implementation
below saves (at least) a factor of two allocation, by only allocating the
final result. This also saves some amount of time. *)letstrip_literal?(drop=Char.is_whitespace)t~escape_char=letlength=lengthtin(* performance hack: avoid copying [t] in common cases *)iflength=0||not(dropt.[0]||dropt.[length-1])thentelse(matchfirst_non_drop_literalt~drop~escape_charwith|None->""|Somefirst->(matchlast_non_drop_literalt~drop~escape_charwith|None->assertfalse|Somelast->subt~pos:first~len:(last-first+1)));;end(* Open replace_polymorphic_compare after including functor instantiations so they do not
shadow its definitions. This is here so that efficient versions of the comparison
functions are available within this module. *)open!String_replace_polymorphic_compareletbetweent~low~high=low<=t&&t<=highletclamp_uncheckedt~min~max=ift<minthenminelseift<=maxthentelsemaxletclamp_exnt~min~max=assert(min<=max);clamp_uncheckedt~min~max;;letclampt~min~max=ifmin>maxthenOr_error.error_s(Sexp.message"clamp requires [min <= max]"["min",T.sexp_of_tmin;"max",T.sexp_of_tmax])elseOk(clamp_uncheckedt~min~max);;(* Override [Search_pattern] with default case-sensitivity argument at the end of the
file, so that call sites above are forced to supply case-sensitivity explicitly. *)moduleSearch_pattern=structincludeSearch_pattern0letcreate?(case_sensitive=true)pattern=createpattern~case_sensitiveend(* Include type-specific [Replace_polymorphic_compare] at the end, after
including functor application that could shadow its definitions. This is
here so that efficient versions of the comparison functions are exported by
this module. *)includeString_replace_polymorphic_compare