1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249(* This file is free software, part of containers. See file "license" for more details. *)(** {1 Basic String Utils} *)openCCShims_type'aiter=('a->unit)->unittype'agen=unit->'aoptiontype'asequence=('a->unit)->unittype'aklist=unit->[`Nil|`Consof'a*'aklist](* compatibility implementations *)letinitnf=letbuf=Bytes.initnfinBytes.unsafe_to_stringbuf(*$T
init 3 (fun i -> [|'a'; 'b'; 'c'|].(i)) = "abc"
init 0 (fun _ -> assert false) = ""
*)letuppercase_ascii=String.mapCCChar.uppercase_asciiletlowercase_ascii=String.mapCCChar.lowercase_asciiletmapifs=init(String.lengths)(funi->fis.[i])letcapitalize_asciis=mapi(funic->ifi=0thenCCChar.uppercase_asciicelsec)sletuncapitalize_asciis=mapi(funic->ifi=0thenCCChar.lowercase_asciicelsec)s(* standard implementations *)includeStringmoduletypeS=sigtypetvallength:t->int(** Return the length (number of characters) of the given string. *)valblit:t->int->Bytes.t->int->int->unit(** Like {!String.blit}.
Compatible with the [-safe-string] option.
@raise Invalid_argument if indices are not valid. *)(*
val blit_immut : t -> int -> t -> int -> int -> string
(** Immutable version of {!blit}, returning a new string.
[blit a i b j len] is the same as [b], but in which
the range [j, ..., j+len] is replaced by [a.[i], ..., a.[i + len]].
@raise Invalid_argument if indices are not valid. *)
*)valfold:('a->char->'a)->'a->t->'a(** Fold on chars by increasing index.
@since 0.7 *)(** {2 Conversions} *)valto_gen:t->chargen(** Return the [gen] of characters contained in the string. *)valto_iter:t->chariter(** Return the [iter] of characters contained in the string.
@since 2.8 *)valto_std_seq:t->charSeq.t(** [to_std_seq s] returns a [Seq.t] of the bytes in [s].
@since 2.8
*)valto_seq:t->charsequence(** Return the [sequence] of characters contained in the string.
@deprecated use {!to_iter} instead *)[@@ocaml.deprecated"use to_iter or to_std_seq"]valto_klist:t->charklist(** Return the [klist] of characters contained in the string.
@deprecated use {!to_std_seq} instead *)[@@ocaml.deprecated"use to_std_seq"]valto_list:t->charlist(** Return the list of characters contained in the string. *)valpp_buf:Buffer.t->t->unit(** Renamed from [pp] since 2.0. *)valpp:Format.formatter->t->unit(** Print the string within quotes.
Renamed from [print] since 2.0. *)endletequal(a:string)b=Stdlib.(=)abletcompare_int(a:int)b=Stdlib.compareabletcompare=String.comparelethashs=Hashtbl.hashsletlength=String.lengthletis_emptys=equals""letrevs=letn=lengthsininitn(funi->s.[n-i-1])(*$Q
Q.printable_string (fun s -> s = rev (rev s))
Q.printable_string (fun s -> length s = length (rev s))
*)(*$Q
Q.printable_string (fun s -> \
rev s = (to_list s |> List.rev |> of_list))
*)(*$=
"abc" (rev "cba")
"" (rev "")
" " (rev " ")
*)letrec_to_listsaccilen=iflen=0thenList.revaccelse_to_lists(s.[i]::acc)(i+1)(len-1)let_is_sub~subisj~len=letreccheckk=ifk=lenthentrueelseCCChar.equalsub.[i+k]s.[j+k]&&check(k+1)inj+len<=String.lengths&&check0letis_sub~subisj~len=ifi+len>String.lengthsubtheninvalid_arg"CCString.is_sub";_is_sub~subisj~lentype_direction=|Direct:[`Direct]direction|Reverse:[`Reverse]direction(* we follow https://en.wikipedia.org/wiki/Knuth–Morris–Pratt_algorithm *)moduleFind=structtype'akmp_pattern={failure:intarray;str:string;}(* invariant: [length failure = length str].
We use a phantom type to avoid mixing the directions. *)letkmp_pattern_lengthp=String.lengthp.str(* access the [i]-th element of [s] according to direction [dir] *)letget_:typea.dir:adirection->string->int->char=fun~dir->matchdirwith|Direct->String.get|Reverse->(funsi->s.[String.lengths-i-1])letkmp_compile_:typea.dir:adirection->string->akmp_pattern=fun~dirstr->letlen=lengthstrinletget=get_~dirin(* how to read elements of the string *)matchlenwith|0->{failure=[||];str;}|1->{failure=[|-1|];str;}|_->(* at least 2 elements, the algorithm can work *)letfailure=Array.makelen0infailure.(0)<--1;(* i: current index in str *)leti=ref2in(* j: index of candidate substring *)letj=ref0inwhile!i<lendomatch!jwith|_whenCCChar.equal(getstr(!i-1))(getstr!j)->(* substring starting at !j continues matching current char *)incrj;failure.(!i)<-!j;incri;|0->(* back to the beginning *)failure.(!i)<-0;incri;|_->(* fallback for the prefix string *)assert(!j>0);j:=failure.(!j)done;(* Format.printf "{@[failure:%a, str:%s@]}@." CCFormat.(array int) failure str; *){failure;str;}letkmp_compiles=kmp_compile_~dir:Directsletkmp_rcompiles=kmp_compile_~dir:Reverses(* proper search function.
[i] index in [s]
[j] index in [pattern]
[len] length of [s] *)letkmp_find~patternsidx=letlen=lengthsinleti=refidxinletj=ref0inletpat_len=kmp_pattern_lengthpatterninwhile!j<pat_len&&!i+!j<lendoletc=String.gets(!i+!j)inletexpected=String.getpattern.str!jinifCCChar.equalcexpectedthen((* char matches *)incrj;)else(letfail_offset=pattern.failure.(!j)iniffail_offset>=0then(assert(fail_offset<!j);(* follow the failure link *)i:=!i+!j-fail_offset;j:=fail_offset)else((* beginning of pattern *)j:=0;incri))done;if!j=pat_lenthen!ielse-1(* proper search function, from the right.
[i] index in [s]
[j] index in [pattern]
[len] length of [s] *)letkmp_rfind~patternsidx=letlen=lengthsinleti=ref(len-idx-1)inletj=ref0inletpat_len=kmp_pattern_lengthpatterninwhile!j<pat_len&&!i+!j<lendoletc=String.gets(len-!i-!j-1)inletexpected=String.getpattern.str(String.lengthpattern.str-!j-1)inifCCChar.equalcexpectedthen((* char matches *)incrj;)else(letfail_offset=pattern.failure.(!j)iniffail_offset>=0then(assert(fail_offset<!j);(* follow the failure link *)i:=!i+!j-fail_offset;j:=fail_offset)else((* beginning of pattern *)j:=0;incri))done;(* adjust result: first, [res = string.length s - res -1] to convert
back to real indices; then, what we got is actually the position
of the end of the pattern, so we subtract the [length of the pattern -1]
to obtain the real result. *)if!j=pat_lenthenlen-!i-kmp_pattern_lengthpatternelse-1type'apattern=|P_charofchar|P_KMPof'akmp_patternletpattern_length=function|P_char_->1|P_KMPp->kmp_pattern_lengthpletcompilesub:[`Direct]pattern=iflengthsub=1thenP_charsub.[0]elseP_KMP(kmp_compilesub)letrcompilesub:[`Reverse]pattern=iflengthsub=1thenP_charsub.[0]elseP_KMP(kmp_rcompilesub)letfind?(start=0)~(pattern:[`Direct]pattern)s=matchpatternwith|P_charc->(tryString.index_fromsstartcwithNot_found->-1)|P_KMPpattern->kmp_find~patternsstartletrfind?start~(pattern:[`Reverse]pattern)s=letstart=matchstartwith|Somen->n|None->String.lengths-1inmatchpatternwith|P_charc->(tryString.rindex_fromsstartcwithNot_found->-1)|P_KMPpattern->kmp_rfind~patternsstartendletfind?(start=0)~sub=letpattern=Find.compilesubinfuns->Find.find~start~patterns(*$= & ~printer:string_of_int
1 (find ~sub:"bc" "abcd")
~-1 (find ~sub:"bc" "abd")
1 (find ~sub:"a" "_a_a_a_")
6 (find ~start:5 ~sub:"a" "a1a234a")
*)(*$Q & ~count:10_000
Q.(pair printable_string printable_string) (fun (s1,s2) -> \
let i = find ~sub:s2 s1 in \
i < 0 || String.sub s1 i (length s2) = s2)
*)letfind_all?(start=0)~sub=letpattern=Find.compilesubinfuns->leti=refstartinfun()->letres=Find.find~start:!i~patternsinifres=~-1thenNoneelse(i:=res+1;(* possible overlap *)Someres)letfind_all_l?start~subs=letrecauxaccg=matchg()with|None->List.revacc|Somei->aux(i::acc)ginaux[](find_all?start~subs)(*$= & ~printer:Q.Print.(list int)
[1; 6] (find_all_l ~sub:"bc" "abc aabc aab")
[] (find_all_l ~sub:"bc" "abd")
[76] (find_all_l ~sub:"aaaaaa" \
"aabbaabbaaaaabbbbabababababbbbabbbabbaaababbbaaabaabbaabbaaaabbababaaaabbaabaaaaaabbbaaaabababaabaaabbaabaaaabbababbaabbaaabaabbabababbbaabababaaabaaababbbaaaabbbaabaaababbabaababbaabbaaaaabababbabaababbbaaabbabbabababaaaabaaababaaaaabbabbaabbabbbbbbbbbbbbbbaabbabbbbbabbaaabbabbbbabaaaaabbababbbaaaa")
*)letmem?start~subs=find?start~subs>=0(*$T
mem ~sub:"bc" "abcd"
not (mem ~sub:"a b" "abcd")
*)letrfind~sub=letpattern=Find.rcompilesubinfuns->Find.rfind~start:(String.lengths-1)~patterns(*$= & ~printer:string_of_int
1 (rfind ~sub:"bc" "abcd")
~-1 (rfind ~sub:"bc" "abd")
5 (rfind ~sub:"a" "_a_a_a_")
4 (rfind ~sub:"bc" "abcdbcd")
6 (rfind ~sub:"a" "a1a234a")
*)(*$Q & ~count:10_000
Q.(pair printable_string printable_string) (fun (s1,s2) -> \
let i = rfind ~sub:s2 s1 in \
i < 0 || String.sub s1 i (length s2) = s2)
*)(* Replace substring [s.[pos]....s.[pos+len-1]] by [by] in [s] *)letreplace_at_~pos~len~bys=letb=Buffer.create(lengths+lengthby-len)inBuffer.add_substringbs0pos;Buffer.add_stringbby;Buffer.add_substringbs(pos+len)(String.lengths-pos-len);Buffer.contentsbletreplace?(which=`All)~sub~bys=ifis_emptysubtheninvalid_arg"CCString.replace";matchwhichwith|`Left->leti=find~start:0~subsinifi>=0thenreplace_at_~pos:i~len:(String.lengthsub)~byselses|`Right->leti=rfind~subsinifi>=0thenreplace_at_~pos:i~len:(String.lengthsub)~byselses|`All->(* compile search pattern only once *)letpattern=Find.compilesubinletb=Buffer.create(String.lengths)inletstart=ref0inwhile!start<String.lengthsdoleti=Find.find~start:!start~patternsinifi>=0then((* between last and cur occurrences *)Buffer.add_substringbs!start(i-!start);Buffer.add_stringbby;start:=i+String.lengthsub)else((* add remainder *)Buffer.add_substringbs!start(String.lengths-!start);start:=String.lengths(* stop *))done;Buffer.contentsb(*$= & ~printer:CCFun.id
(replace ~which:`All ~sub:"a" ~by:"b" "abcdabcd") "bbcdbbcd"
(replace ~which:`Left ~sub:"a" ~by:"b" "abcdabcd") "bbcdabcd"
(replace ~which:`Right ~sub:"a" ~by:"b" "abcdabcd") "abcdbbcd"
(replace ~which:`All ~sub:"ab" ~by:"hello" " abab cdabb a") \
" hellohello cdhellob a"
(replace ~which:`Left ~sub:"ab" ~by:"nope" " a b c d ") " a b c d "
(replace ~sub:"a" ~by:"b" "1aa234a") "1bb234b"
*)moduleSplit=structtypedrop_if_empty={first:bool;last:bool;}letno_drop={first=false;last=false}letdefault_drop=no_droptypesplit_state=|SplitStop|SplitAtofint(* previous *)letrec_split~bysstate=matchstatewith|SplitStop->None|SplitAtprev->_split_search~bysprevand_split_search~bysprev=letj=Find.find~start:prev~pattern:bysinifj<0thenSome(SplitStop,prev,String.lengths-prev)elseSome(SplitAt(j+Find.pattern_lengthby),prev,j-prev)let_tuple3xyz=x,y,zlet_mkgen~drop~bysk=letstate=ref(SplitAt0)inletby=Find.compilebyinletrecnext()=match_split~bys!statewith|None->None|Some(state',0,0)whendrop.first->state:=state';next()|Some(_,i,0)whendrop.last&&i=lengths->None|Some(state',i,len)->state:=state';Some(ksilen)innextletgen?(drop=default_drop)~bys=_mkgen~drop~bys_tuple3letgen_cpy?(drop=default_drop)~bys=_mkgen~drop~bysString.sublet_mklist~drop~bysk=letby=Find.compilebyinletrecbuildaccstate=match_split~bysstatewith|None->List.revacc|Some(state',0,0)whendrop.first->buildaccstate'|Some(_,i,0)whendrop.last&&i=lengths->List.revacc|Some(state',i,len)->build(ksilen::acc)state'inbuild[](SplitAt0)letlist_?(drop=default_drop)~bys=_mklist~drop~bys_tuple3letlist_cpy?(drop=default_drop)~bys=_mklist~drop~bysString.sub(*$T
Split.list_cpy ~by:"," "aa,bb,cc" = ["aa"; "bb"; "cc"]
Split.list_cpy ~by:"--" "a--b----c--" = ["a"; "b"; ""; "c"; ""]
Split.list_cpy ~by:" " "hello world aie" = ["hello"; ""; "world"; "aie"]
*)let_mkseq~drop~bysk=letby=Find.compilebyinletrecmakestate()=match_split~bysstatewith|None->Seq.Nil|Some(state',0,0)whendrop.first->makestate'()|Some(_,i,0)whendrop.last&&i=lengths->Seq.Nil|Some(state',i,len)->Seq.Cons(ksilen,makestate')inmake(SplitAt0)letstd_seq?(drop=default_drop)~bys=_mkseq~drop~bys_tuple3letstd_seq_cpy?(drop=default_drop)~bys=_mkseq~drop~bysString.sublet_mkklist~drop~bysk=letby=Find.compilebyinletrecmakestate()=match_split~bysstatewith|None->`Nil|Some(state',0,0)whendrop.first->makestate'()|Some(_,i,0)whendrop.last&&i=lengths->`Nil|Some(state',i,len)->`Cons(ksilen,makestate')inmake(SplitAt0)letklist?(drop=default_drop)~bys=_mkklist~drop~bys_tuple3letklist_cpy?(drop=default_drop)~bys=_mkklist~drop~bysString.sublet_mk_iter~drop~bysfk=letby=Find.compilebyinletrecauxstate=match_split~bysstatewith|None->()|Some(state',0,0)whendrop.first->auxstate'|Some(_,i,0)whendrop.last&&i=lengths->()|Some(state',i,len)->k(fsilen);auxstate'inaux(SplitAt0)letiter?(drop=default_drop)~bys=_mk_iter~drop~bys_tuple3letiter_cpy?(drop=default_drop)~bys=_mk_iter~drop~bysString.subletseq=iterletseq_cpy=iter_cpyletleft_exn~bys=leti=find~sub:bysinifi=~-1thenraiseNot_foundelseletright=i+String.lengthbyinString.subs0i,String.subsright(String.lengths-right)letleft~bys=trySome(left_exn~bys)withNot_found->None(*$T
Split.left ~by:" " "ab cde f g " = Some ("ab", "cde f g ")
Split.left ~by:"__" "a__c__e_f" = Some ("a", "c__e_f")
Split.left ~by:"_" "abcde" = None
Split.left ~by:"bb" "abbc" = Some ("a", "c")
Split.left ~by:"a_" "abcde" = None
*)letright_exn~bys=leti=rfind~sub:bysinifi=~-1thenraiseNot_foundelseletright=i+String.lengthbyinString.subs0i,String.subsright(String.lengths-right)letright~bys=trySome(right_exn~bys)withNot_found->None(*$T
Split.right ~by:" " "ab cde f g" = Some ("ab cde f", "g")
Split.right ~by:"__" "a__c__e_f" = Some ("a__c", "e_f")
Split.right ~by:"_" "abcde" = None
Split.right ~by:"a_" "abcde" = None
*)endletsplit_on_charcs:_list=Split.list_cpy~drop:Split.no_drop~by:(String.make1c)s(*$= & ~printer:Q.Print.(list string)
["a"; "few"; "words"; "from"; "our"; "sponsors"] \
(split_on_char ' ' "a few words from our sponsors")
*)(*$Q
Q.(printable_string) (fun s -> \
let s = split_on_char ' ' s |> String.concat " " in \
s = (split_on_char ' ' s |> String.concat " "))
*)letsplit~bys=Split.list_cpy~bysletcompare_versionsab=letof_ints=trySome(int_of_strings)with_->Noneinletreccmp_recab=matcha(),b()with|None,None->0|Some_,None->1|None,Some_->-1|Somex,Somey->matchof_intx,of_intywith|None,None->letc=String.comparexyinifc<>0thencelsecmp_recab|Some_,None->1|None,Some_->-1|Somex,Somey->letc=compare_intxyinifc<>0thencelsecmp_recabincmp_rec(Split.gen_cpy~by:"."a)(Split.gen_cpy~by:"."b)(*$T
compare_versions "0.1.3" "0.1" > 0
compare_versions "10.1" "2.0" > 0
compare_versions "0.1.alpha" "0.1" > 0
compare_versions "0.3.dev" "0.4" < 0
compare_versions "0.foo" "0.0" < 0
compare_versions "1.2.3.4" "01.2.4.3" < 0
*)(*$Q
Q.(pair printable_string printable_string) (fun (a,b) -> \
CCOrd.equiv (compare_versions a b) (CCOrd.opp compare_versions b a))
*)typenat_chunk=|NC_charofchar|NC_intofintletcompare_naturalab=(* stream of chunks *)letchunkss:unit->nat_chunkoption=leti=ref0inletrecnext()=if!i=lengthsthenNoneelsematchString.gets!iwith|'0'..'9'asc->incri;read_int(Char.codec-Char.code'0')|c->incri;Some(NC_charc)andread_intn=if!i=lengthsthenSome(NC_intn)elsematchString.gets!iwith|'0'..'9'asc->incri;read_int(10*n+Char.codec-Char.code'0')|_->Some(NC_intn)innextinletreccmp_recab=matcha(),b()with|None,None->0|Some_,None->1|None,Some_->-1|Somex,Somey->matchx,ywith|NC_charx,NC_chary->letc=Char.comparexyinifc<>0thencelsecmp_recab|NC_int_,NC_char_->1|NC_char_,NC_int_->-1|NC_intx,NC_inty->letc=compare_intxyinifc<>0thencelsecmp_recabincmp_rec(chunksa)(chunksb)(*$T
compare_natural "foo1" "foo2" < 0
compare_natural "foo11" "foo2" > 0
compare_natural "foo11" "foo11" = 0
compare_natural "foo011" "foo11" = 0
compare_natural "foo1a" "foo1b" < 0
compare_natural "foo1a1" "foo1a2" < 0
compare_natural "foo1a17" "foo1a2" > 0
*)(*Q
(Q.pair printable_string printable_string) (fun (a,b) -> \
CCOrd.opp (compare_natural a b) = compare_natural b a)
(Q.printable_string) (fun a -> compare_natural a a = 0)
(Q.triple printable_string printable_string printable_string) (fun (a,b,c) -> \
if compare_natural a b < 0 && compare_natural b c < 0 \
then compare_natural a c < 0 else Q.assume_fail())
*)letedit_distances1s2=iflengths1=0thenlengths2elseiflengths2=0thenlengths1elseifequals1s2then0elsebegin(* distance vectors (v0=previous, v1=current) *)letv0=Array.make(lengths2+1)0inletv1=Array.make(lengths2+1)0in(* initialize v0: v0(i) = A(0)(i) = delete i chars from t *)fori=0tolengths2dov0.(i)<-idone;(* main loop for the bottom up dynamic algorithm *)fori=0tolengths1-1do(* first edit distance is the deletion of i+1 elements from s *)v1.(0)<-i+1;(* try add/delete/replace operations *)forj=0tolengths2-1doletcost=ifChar.compare(String.gets1i)(String.gets2j)=0then0else1inv1.(j+1)<-min(v1.(j)+1)(min(v0.(j+1)+1)(v0.(j)+cost));done;(* copy v1 into v0 for next iteration *)Array.blitv10v00(lengths2+1);done;v1.(lengths2)end(*$Q
Q.(string_of_size Gen.(0 -- 30)) (fun s -> \
edit_distance s s = 0)
*)(* test that building a from s, and mutating one char of s, yields
a string s' that is accepted by a.
--> generate triples (s, i, c) where c is a char, s a non empty string
and i a valid index in s.
*)(*$QR
(
let gen = Q.Gen.(
3 -- 10 >>= fun len ->
0 -- (len-1) >>= fun i ->
string_size (return len) >>= fun s ->
char >|= fun c -> (s,i,c)
) in
let small (s,_,_) = String.length s in
Q.make ~small gen
)
(fun (s,i,c) ->
let s' = Bytes.of_string s in
Bytes.set s' i c;
edit_distance s (Bytes.to_string s') <= 1)
*)letrepeatsn=assert(n>=0);letlen=String.lengthsinassert(len>0);init(len*n)(funi->s.[imodlen])letprefix~pres=letlen=String.lengthpreiniflen>String.lengthsthenfalseelse(letrecchecki=ifi=lenthentrueelseifStdlib.(<>)(String.unsafe_getsi)(String.unsafe_getprei)thenfalseelsecheck(i+1)incheck0)(*$T
prefix ~pre:"aab" "aabcd"
not (prefix ~pre:"ab" "aabcd")
not (prefix ~pre:"abcd" "abc")
prefix ~pre:"abc" "abcde"
prefix ~pre:"" ""
prefix ~pre:"" "abc"
prefix ~pre:"abc" "abc"
*)letsuffix~sufs=letlen=String.lengthsufiniflen>String.lengthsthenfalseelse(letoff=String.lengths-leninletrecchecki=ifi=lenthentrueelseifStdlib.(<>)(String.unsafe_gets(off+i))(String.unsafe_getsufi)thenfalseelsecheck(i+1)incheck0)(*$T
suffix ~suf:"cd" "abcd"
suffix ~suf:"" ""
suffix ~suf:"" "abc"
not (suffix ~suf:"cd" "abcde")
not (suffix ~suf:"abcd" "cd")
*)lettakens=ifn<String.lengthsthenString.subs0nelsesletdropns=ifn<String.lengthsthenString.subsn(String.lengths-n)else""lettake_dropns=takens,dropns(*$=
("ab", "cd") (take_drop 2 "abcd")
("abc", "") (take_drop 3 "abc")
("abc", "") (take_drop 5 "abc")
*)letchop_suffix~sufs=ifsuffix~sufsthenSome(String.subs0(String.lengths-String.lengthsuf))elseNone(*$= & ~printer:Q.Print.(option string)
(Some "ab") (chop_suffix ~suf:"cd" "abcd")
None (chop_suffix ~suf:"cd" "abcde")
None (chop_suffix ~suf:"abcd" "cd")
*)letchop_prefix~pres=ifprefix~presthenSome(String.subs(String.lengthpre)(String.lengths-String.lengthpre))elseNone(*$= & ~printer:Q.Print.(option string)
(Some "cd") (chop_prefix ~pre:"aab" "aabcd")
None (chop_prefix ~pre:"ab" "aabcd")
None (chop_prefix ~pre:"abcd" "abc")
*)letblit=String.blitletfoldfaccs=letrecfold_recfaccsi=ifi=String.lengthsthenaccelsefold_recf(faccs.[i])s(i+1)infold_recfaccs0letpad?(side=`Left)?(c=' ')ns=letlen_s=String.lengthsiniflen_s>=nthenselseletpad_len=n-len_sinmatchsidewith|`Left->initn(funi->ifi<pad_lenthencelses.[i-pad_len])|`Right->initn(funi->ifi<len_sthens.[i]elsec)(*$= & ~printer:Q.Print.string
" 42" (pad 4 "42")
"0042" (pad ~c:'0' 4 "42")
"4200" (pad ~side:`Right ~c:'0' 4 "42")
"hello" (pad 4 "hello")
"aaa" (pad ~c:'a' 3 "")
"aaa" (pad ~side:`Right ~c:'a' 3 "")
*)let_to_gensi0len=leti=refi0infun()->if!i=i0+lenthenNoneelse(letc=String.unsafe_gets!iinincri;Somec)letto_gens=_to_gens0(String.lengths)letof_charc=String.make1cletof_geng=letb=Buffer.create32inletrecaux()=matchg()with|None->Buffer.contentsb|Somec->Buffer.add_charbc;aux()inaux()letto_itersk=String.iterksletto_seq=to_iterletrec_to_std_seqsilen()=iflen=0thenSeq.NilelseSeq.Cons(s.[i],_to_std_seqs(i+1)(len-1))letto_std_seqs=_to_std_seqs0(String.lengths)letof_iteri=letb=Buffer.create32ini(Buffer.add_charb);Buffer.contentsbletof_std_seqseq=letb=Buffer.create32inSeq.iter(Buffer.add_charb)seq;Buffer.contentsbletof_seq=of_iterletrec_to_klistsilen()=iflen=0then`Nilelse`Cons(s.[i],_to_klists(i+1)(len-1))letof_klistl=letb=Buffer.create15inletrecauxl=matchl()with|`Nil->Buffer.contentsb|`Cons(x,l')->Buffer.add_charbx;auxl'inauxlletto_klists=_to_klists0(String.lengths)letto_lists=_to_lists[]0(String.lengths)letof_listl=letbuf=Buffer.create(List.lengthl)inList.iter(Buffer.add_charbuf)l;Buffer.contentsbuf(*$T
of_list ['a'; 'b'; 'c'] = "abc"
of_list [] = ""
*)letof_arraya=init(Array.lengtha)(funi->a.(i))letto_arrays=Array.init(String.lengths)(funi->s.[i])letlines_gens=Split.gen_cpy~drop:{Split.first=false;last=true}~by:"\n"sletliness=Split.list_cpy~drop:{Split.first=false;last=true}~by:"\n"s(*$= & ~printer:Q.Print.(list @@ Printf.sprintf "%S")
["ab"; "c"] (lines "ab\nc")
["ab"; "c"] (lines "ab\nc\n")
[] (lines "")
[""] (lines "\n")
[""; "a"] (lines "\na")
*)letconcat_gen_buf~sepg:Buffer.t=letb=Buffer.create256inletrecaux~first()=matchg()with|None->b|Somes->ifnotfirstthenBuffer.add_stringbsep;Buffer.add_stringbs;aux~first:false()inaux~first:true()letconcat_gen~sepg=letbuf=concat_gen_buf~sepginBuffer.contentsbufletunlinesl=letlen=List.fold_left(funns->n+1+String.lengths)0linletbuf=Bytes.createleninletrecaux_blitil=matchlwith|[]->assert(i=len);Bytes.to_stringbuf|s::tail->letlen_s=String.lengthsinBytes.blit_strings0bufilen_s;Bytes.setbuf(i+len_s)'\n';aux_blit(i+len_s+1)tailinaux_blit0lletunlines_geng=letbuf=concat_gen_buf~sep:"\n"ginBuffer.add_charbuf'\n';Buffer.contentsbuf(*$= & ~printer:CCFun.id
"" (unlines [])
"ab\nc\n" (unlines ["ab"; "c"])
*)(*$Q
Q.printable_string (fun s -> trim (unlines (lines s)) = trim s)
Q.printable_string (fun s -> trim (unlines_gen (lines_gen s)) = trim s)
*)(*$Q
Q.(small_list small_string) (fun l -> \
let l = unlines l |> lines in \
l = (unlines l |> lines))
*)letsetsic=ifi<0||i>=String.lengthstheninvalid_arg"CCString.set";init(String.lengths)(funj->ifi=jthencelses.[j])(*$T
set "abcd" 1 '_' = "a_cd"
set "abcd" 0 '-' = "-bcd"
(try ignore (set "abc" 5 '_'); false with Invalid_argument _ -> true)
*)letiter=String.iterletfilter_mapfs=letbuf=Buffer.create(String.lengths)initer(func->matchfcwith|None->()|Somec'->Buffer.add_charbufc')s;Buffer.contentsbuf(*$= & ~printer:Q.Print.string
"bcef" (filter_map \
(function 'c' -> None | c -> Some (Char.chr (Char.code c + 1))) "abcde")
*)letfilterfs=letbuf=Buffer.create(String.lengths)initer(func->iffcthenBuffer.add_charbufc)s;Buffer.contentsbuf(*$= & ~printer:Q.Print.string
"abde" (filter (function 'c' -> false | _ -> true) "abcdec")
*)(*$Q
Q.printable_string (fun s -> filter (fun _ -> true) s = s)
*)letflat_map?sepfs=letbuf=Buffer.create(String.lengths)initeri(funic->beginmatchsepwith|Some_wheni=0->()|None->()|Somesep->Buffer.add_stringbufsepend;Buffer.add_stringbuf(fc))s;Buffer.contentsbufexceptionMyExitletfor_allps=tryiter(func->ifnot(pc)thenraiseMyExit)s;truewithMyExit->falseletexistsps=tryiter(func->ifpcthenraiseMyExit)s;falsewithMyExit->trueletdrop_whilefs=leti=ref0inwhile!i<lengths&&f(unsafe_gets!i)doincridone;if!i>0thensubs!i(lengths-!i)elsesletrdrop_whilefs=leti=ref(lengths-1)inwhile!i>=0&&f(unsafe_gets!i)dodecridone;if!i<lengths-1thensubs0(!i+1)elses(* notion of whitespace for trim *)letis_space_=function|' '|'\012'|'\n'|'\r'|'\t'->true|_->falseletltrims=drop_whileis_space_sletrtrims=rdrop_whileis_space_s(*$= & ~printer:id
"abc " (ltrim " abc ")
" abc" (rtrim " abc ")
*)(*$Q
Q.(printable_string) (fun s -> \
String.trim s = (s |> ltrim |> rtrim))
Q.(printable_string) (fun s -> ltrim s = ltrim (ltrim s))
Q.(printable_string) (fun s -> rtrim s = rtrim (rtrim s))
Q.(printable_string) (fun s -> \
let s' = ltrim s in \
if s'="" then Q.assume_fail() else s'.[0] <> ' ')
Q.(printable_string) (fun s -> \
let s' = rtrim s in \
if s'="" then Q.assume_fail() else s'.[String.length s'-1] <> ' ')
*)letmap2fs1s2=iflengths1<>lengths2theninvalid_arg"CCString.map2";init(String.lengths1)(funi->fs1.[i]s2.[i])letiter2fs1s2=iflengths1<>lengths2theninvalid_arg"CCString.iter2";fori=0toString.lengths1-1dofs1.[i]s2.[i]doneletiteri2fs1s2=iflengths1<>lengths2theninvalid_arg"CCString.iteri2";fori=0toString.lengths1-1dofis1.[i]s2.[i]doneletfold2faccs1s2=iflengths1<>lengths2theninvalid_arg"CCString.fold2";letrecfold'accs1s2i=ifi=String.lengths1thenaccelsefold'(faccs1.[i]s2.[i])s1s2(i+1)infold'accs1s20letfor_all2ps1s2=tryiter2(func1c2->ifnot(pc1c2)thenraiseMyExit)s1s2;truewithMyExit->falseletexists2ps1s2=tryiter2(func1c2->ifpc1c2thenraiseMyExit)s1s2;falsewithMyExit->true(** {2 Ascii functions} *)letequal_caselesss1s2:bool=String.lengths1=String.lengths2&&for_all2(func1c2->CCChar.equal(CCChar.lowercase_asciic1)(CCChar.lowercase_asciic2))s1s2(*$T
equal_caseless "foo" "FoO"
equal_caseless "helLo" "HEllO"
*)(*$Q
Q.(pair printable_string printable_string) (fun (s1,s2) -> \
equal_caseless s1 s2 = (lowercase_ascii s1=lowercase_ascii s2))
Q.(printable_string) (fun s -> equal_caseless s s)
Q.(printable_string) (fun s -> equal_caseless (uppercase_ascii s) s)
*)letpp_bufbufs=Buffer.add_charbuf'"';Buffer.add_stringbufs;Buffer.add_charbuf'"'letppfmts=Format.fprintffmt"\"%s\""smoduleSub=structtypet=string*int*intletmakesi~len=ifi<0||len<0||i+len>String.lengthstheninvalid_arg"CCString.Sub.make";s,i,lenletfulls=s,0,String.lengthsletcopy(s,i,len)=String.subsilenletunderlying(s,_,_)=sletsub(s,i,len)i'len'=ifi+i'+len'>i+lentheninvalid_arg"CCString.Sub.sub";(s,i+i',len')letlength(_,_,l)=lletget(s,i,l)j=ifj<0||j>=ltheninvalid_arg"CCString.Sub.get";String.unsafe_gets(i+j)letblit(a1,i1,len1)o1a2o2len=ifo1+len>len1theninvalid_arg"CCString.Sub.blit";blita1(i1+o1)a2o2lenletfoldfacc(s,i,len)=letrecfold_recfaccsij=ifi=jthenaccelsefold_recf(faccs.[i])s(i+1)jinfold_recfaccsi(i+len)(*$T
let s = Sub.make "abcde" 1 3 in \
Sub.fold (fun acc x -> x::acc) [] s = ['d'; 'c'; 'b']
Sub.make "abcde" 1 3 |> Sub.copy = "bcd"
Sub.full "abcde" |> Sub.copy = "abcde"
*)(*$T
let sub = Sub.make " abc " 1 ~len:3 in \
"\"abc\"" = (CCFormat.to_string Sub.pp sub)
*)(*$= & ~printer:(String.make 1)
'b' Sub.(get (make "abc" 1 ~len:2) 0)
'c' Sub.(get (make "abc" 1 ~len:2) 1)
*)(*$QR
Q.(printable_string_of_size Gen.(3--10)) (fun s ->
let open Iter.Infix in
begin
(0 -- (length s-2)
>|= fun i -> i, Sub.make s i ~len:(length s-i))
>>= fun (i,sub) ->
(0 -- (Sub.length sub-1) >|= fun j -> i,j,sub)
end
|> Iter.for_all
(fun (i,j,sub) -> Sub.get sub j = s.[i+j]))
*)letto_gen(s,i,len)=_to_gensilenletto_iter(s,i,len)k=fori=itoi+len-1doks.[i]doneletto_std_seq(s,i,len)=_to_std_seqsilenletto_seq=to_iterletto_klist(s,i,len)=_to_klistsilenletto_list(s,i,len)=_to_lists[]ilenletpp_bufbuf(s,i,len)=Buffer.add_charbuf'"';Buffer.add_substringbufsilen;Buffer.add_charbuf'"'letppfmts=Format.fprintffmt"\"%s\""(copys)end(* test consistency of interfaces *)(*$inject
module type L = module type of CCString
module type LL = module type of CCStringLabels
*)(*$R
ignore (module CCStringLabels : L)
*)(*$R
ignore (module CCString : LL)
*)