Source file string_intf.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
open! Import

(** Interface for Unicode encodings, such as UTF-8. Written with an abstract type, and
    specialized below. *)
module type Utf = sig
  type t [@@deriving_inline sexp_grammar]

  val t_sexp_grammar : t Sexplib0.Sexp_grammar.t

  [@@@end]

  (** [t_of_sexp] and [of_string] will raise if the input is invalid in this encoding. See
      [sanitize] below to construct a valid [t] from arbitrary input. *)
  include Identifiable.S with type t := t

  (** Interpret [t] as a container of Unicode scalar values, rather than of ASCII
      characters. Indexes, length, etc. are with respect to [Uchar.t]. *)
  include Indexed_container.S0_with_creators with type t := t and type elt = Uchar0.t

  (** Produce a sequence of unicode characters. *)
  val to_sequence : t -> Uchar0.t Sequence.t

  (** Reports whether a string is valid in this encoding. *)
  val is_valid : string -> bool

  (** Create a [t] from a string by replacing any byte sequences that are invalid in this
      encoding with [Uchar.replacement_char]. This can be used to decode strings that may
      be encoded incorrectly. *)
  val sanitize : string -> t

  (** Decodes the Unicode scalar value at the given byte index in this encoding. Raises if
      [byte_pos] does not refer to the start of a Unicode scalar value. *)
  val get : t -> byte_pos:int -> Uchar0.t

  (** Creates a [t] without sanitizing or validating the string. Other functions in this
      interface may raise or produce unpredictable results if the string is invalid in
      this encoding. *)
  val of_string_unchecked : string -> t

  (** Similar to [String.split], but splits on a [Uchar.t] in [t]. If you want to split on
      a [char], first convert it with [Uchar.of_char], but note that the actual byte(s) on
      which [t] is split may not be the same as the [char] byte depending on both [char]
      and the encoding of [t]. For example, splitting on 'α' in UTF-8 or on '\n' in UTF-16
      is actually splitting on a 2-byte sequence. *)
  val split : t -> on:Uchar0.t -> t list

  (** The name of this encoding scheme; e.g., "UTF-8". *)
  val codec_name : string

  (** Counts the number of unicode scalar values in [t].

      This function is not a good proxy for display width, as some scalar values have
      display widths > 1. Many native applications such as terminal emulators use
      [wcwidth] (see [man 3 wcwidth]) to compute the display width of a scalar value. See
      the uucp library's [Uucp.Break.tty_width_hint] for an implementation of [wcwidth]'s
      logic. However, this is merely best-effort, as display widths will vary based on the
      font and underlying text shaping engine (see docs on [tty_width_hint] for details).

      For applications that support Grapheme clusters (many terminal emulators do not),
      [t] should first be split into Grapheme clusters and then the display width of each
      of those Grapheme clusters needs to be computed (which is the max display width of
      the scalars that are in the cluster).

      There are some active efforts to improve the current state of affairs:
      - https://github.com/wez/wezterm/issues/4320
      - https://www.unicode.org/L2/L2023/23194-text-terminal-wg-report.pdf *)
  val length_in_uchars : t -> int

  (** [length] could be misinterpreted as counting bytes. We direct users to other,
      clearer options. *)
  val length : t -> int
    [@@alert
      length_in_uchars
        "Use [length_in_uchars] to count unicode scalar values or [String.length] to \
         count bytes"]
end

(** Iterface for Unicode encodings, specialized for string representation. *)
module type Utf_as_string = Utf with type t = private string

module type String = sig
  (** An extension of the standard [StringLabels].  If you [open Base], you'll get these
      extensions in the [String] module. *)

  open! Import

  type t = string [@@deriving_inline globalize, sexp, sexp_grammar]

  val globalize : t -> t

  include Sexplib0.Sexpable.S with type t := t

  val t_sexp_grammar : t Sexplib0.Sexp_grammar.t

  [@@@end]

  val sub : (t, t) Blit.sub

  (** [sub] with no bounds checking, and always returns a new copy *)
  val unsafe_sub : t -> pos:int -> len:int -> t

  val subo : (t, t) Blit.subo

  include Indexed_container.S0_with_creators with type t := t with type elt = char
  include Identifiable.S with type t := t
  include Ppx_compare_lib.Comparable.S_local with type t := t
  include Invariant.S with type t := t

  (** Maximum length of a string. *)
  val max_length : int

  val mem : t -> char -> bool
  external length : (t[@local_opt]) -> int = "%string_length"
  external get : (t[@local_opt]) -> (int[@local_opt]) -> char = "%string_safe_get"

  (** [unsafe_get t i] is like [get t i] but does not perform bounds checking. The caller
      must ensure that it is a memory-safe operation. *)
  external unsafe_get
    :  (string[@local_opt])
    -> (int[@local_opt])
    -> char
    = "%string_unsafe_get"

  val make : int -> char -> t

  (** String append. Also available unqualified, but re-exported here for documentation
      purposes.

      Note that [a ^ b] must copy both [a] and [b] into a newly-allocated result string, so
      [a ^ b ^ c ^ ... ^ z] is quadratic in the number of strings.  [String.concat] does not
      have this problem -- it allocates the result buffer only once. *)
  val ( ^ ) : t -> t -> t

  (** Concatenates all strings in the list using separator [sep] (with a default separator
      [""]). *)
  val concat : ?sep:t -> t list -> t

  (** Special characters are represented by escape sequences, following the lexical
      conventions of OCaml. *)
  val escaped : t -> t

  val contains : ?pos:int -> ?len:int -> t -> char -> bool

  (** Operates on the whole string using the US-ASCII character set,
      e.g. [uppercase "foo" = "FOO"]. *)
  val uppercase : t -> t

  val lowercase : t -> t

  (** Operates on just the first character using the US-ASCII character set,
      e.g. [capitalize "foo" = "Foo"]. *)
  val capitalize : t -> t

  val uncapitalize : t -> t

  (** [Caseless] compares and hashes strings ignoring case, so that for example
      [Caseless.equal "OCaml" "ocaml"] and [Caseless.("apple" < "Banana")] are [true].

      [Caseless] also provides case-insensitive [is_suffix] and [is_prefix] functions, so
      that for example [Caseless.is_suffix "OCaml" ~suffix:"AmL"] and [Caseless.is_prefix
      "OCaml" ~prefix:"oc"] are [true]. *)
  module Caseless : sig
    type nonrec t = t [@@deriving_inline hash, sexp, sexp_grammar]

    include Ppx_hash_lib.Hashable.S with type t := t
    include Sexplib0.Sexpable.S with type t := t

    val t_sexp_grammar : t Sexplib0.Sexp_grammar.t

    [@@@end]

    include Comparable.S with type t := t
    include Ppx_compare_lib.Comparable.S_local with type t := t

    val is_suffix : t -> suffix:t -> bool
    val is_prefix : t -> prefix:t -> bool
    val is_substring : t -> substring:t -> bool
    val is_substring_at : t -> pos:int -> substring:t -> bool
    val substr_index : ?pos:int -> t -> pattern:t -> int option
    val substr_index_exn : ?pos:int -> t -> pattern:t -> int
    val substr_index_all : t -> may_overlap:bool -> pattern:t -> int list
    val substr_replace_first : ?pos:int -> t -> pattern:t -> with_:t -> t
    val substr_replace_all : t -> pattern:t -> with_:t -> t
  end

  (** [index] gives the index of the first appearance of [char] in the string when
      searching from left to right, or [None] if it's not found. [rindex] does the same but
      searches from the right.

      For example, [String.index "Foo" 'o'] is [Some 1] while [String.rindex "Foo" 'o'] is
      [Some 2].

      The [_exn] versions return the actual index (instead of an option) when [char] is
      found, and raise [Stdlib.Not_found] or [Not_found_s] otherwise.
  *)

  val index : t -> char -> int option
  val index_exn : t -> char -> int
  val index_from : t -> int -> char -> int option
  val index_from_exn : t -> int -> char -> int
  val rindex : t -> char -> int option
  val rindex_exn : t -> char -> int
  val rindex_from : t -> int -> char -> int option
  val rindex_from_exn : t -> int -> char -> int

  (** Produce a sequence of the characters in a string. *)
  val to_sequence : t -> char Sequence.t

  (** Read the characters in a full sequence and produce a string. *)
  val of_sequence : char Sequence.t -> t

  (** Substring search and replace functions.  They use the Knuth-Morris-Pratt algorithm
      (KMP) under the hood.

      The functions in the [Search_pattern] module allow the program to preprocess the
      searched pattern once and then use it many times without further allocations. *)
  module Search_pattern : sig
    type t [@@deriving_inline sexp_of]

    val sexp_of_t : t -> Sexplib0.Sexp.t

    [@@@end]

    (** [create pattern] preprocesses [pattern] as per KMP, building an [int array] of
        length [length pattern].  All inputs are valid. *)
    val create : ?case_sensitive:bool (** default = true *) -> string -> t

    (** [pattern t] returns the string pattern used to create [t]. *)
    val pattern : t -> string

    (** [case_sensitive t] returns whether [t] matches strings case-sensitively. *)
    val case_sensitive : t -> bool

    (** [matches pat str] returns true if [str] matches [pat] *)
    val matches : t -> string -> bool

    (** [pos < 0] or [pos >= length string] result in no match (hence [index] returns
        [None] and [index_exn] raises). *)
    val index : ?pos:int -> t -> in_:string -> int option

    val index_exn : ?pos:int -> t -> in_:string -> int

    (** [may_overlap] determines whether after a successful match, [index_all] should start
        looking for another one at the very next position ([~may_overlap:true]), or jump to
        the end of that match and continue from there ([~may_overlap:false]), e.g.:

        - [index_all (create "aaa") ~may_overlap:false ~in_:"aaaaBaaaaaa" = [0; 5; 8]]
        - [index_all (create "aaa") ~may_overlap:true ~in_:"aaaaBaaaaaa" = [0; 1; 5; 6; 7;
          8]]

        E.g., [replace_all] internally calls [index_all ~may_overlap:false]. *)
    val index_all : t -> may_overlap:bool -> in_:string -> int list

    (** Note that the result of [replace_all pattern ~in_:text ~with_:r] may still
        contain [pattern], e.g.,

        {[
          replace_all (create "bc") ~in_:"aabbcc" ~with_:"cb" = "aabcbc"
        ]} *)
    val replace_first : ?pos:int -> t -> in_:string -> with_:string -> string

    val replace_all : t -> in_:string -> with_:string -> string

    (** Similar to [String.split] or [String.split_on_chars], but instead uses a given
        search pattern as the separator.  Separators are non-overlapping.  *)
    val split_on : t -> string -> string list

    (**/**)

    (*_ See the Jane Street Style Guide for an explanation of [Private] submodules:

      https://opensource.janestreet.com/standards/#private-submodules *)
    module Private : sig
      type public = t

      type t =
        { pattern : string
        ; case_sensitive : bool
        ; kmp_array : int array
        }
      [@@deriving_inline equal ~localize, sexp_of]

      include Ppx_compare_lib.Equal.S with type t := t
      include Ppx_compare_lib.Equal.S_local with type t := t

      val sexp_of_t : t -> Sexplib0.Sexp.t

      [@@@end]

      val representation : public -> t
    end
  end

  (** Substring search and replace convenience functions.  They call [Search_pattern.create]
      and then forget the preprocessed pattern when the search is complete.  [pos < 0] or
      [pos >= length t] result in no match (hence [substr_index] returns [None] and
      [substr_index_exn] raises).  [may_overlap] indicates whether to report overlapping
      matches, see [Search_pattern.index_all]. *)
  val substr_index : ?pos:int -> t -> pattern:t -> int option

  val substr_index_exn : ?pos:int -> t -> pattern:t -> int
  val substr_index_all : t -> may_overlap:bool -> pattern:t -> int list
  val substr_replace_first : ?pos:int -> t -> pattern:t -> with_:t -> t

  (** As with [Search_pattern.replace_all], the result may still contain [pattern]. *)
  val substr_replace_all : t -> pattern:t -> with_:t -> t

  (** [is_substring ~substring:"bar" "foo bar baz"] is true. *)
  val is_substring : t -> substring:t -> bool

  (** [is_substring_at "foo bar baz" ~pos:4 ~substring:"bar"] is true. *)
  val is_substring_at : t -> pos:int -> substring:t -> bool

  (** Returns the reversed list of characters contained in a list. *)
  val to_list_rev : t -> char list

  (** [rev t] returns [t] in reverse order. *)
  val rev : t -> t

  (** [is_suffix s ~suffix] returns [true] if [s] ends with [suffix]. *)

  val is_suffix : t -> suffix:t -> bool

  (** [is_prefix s ~prefix] returns [true] if [s] starts with [prefix]. *)
  val is_prefix : t -> prefix:t -> bool

  (** If the string [s] contains the character [on], then [lsplit2_exn s ~on] returns a pair
      containing [s] split around the first appearance of [on] (from the left). Raises
      [Stdlib.Not_found] or [Not_found_s] when [on] cannot be found in [s]. *)
  val lsplit2_exn : t -> on:char -> t * t

  (** If the string [s] contains the character [on], then [rsplit2_exn s ~on] returns a pair
      containing [s] split around the first appearance of [on] (from the right). Raises
      [Stdlib.Not_found] or [Not_found_s] when [on] cannot be found in [s]. *)
  val rsplit2_exn : t -> on:char -> t * t

  (** [lsplit2 s ~on] optionally returns [s] split into two strings around the
      first appearance of [on] from the left. *)
  val lsplit2 : t -> on:char -> (t * t) option

  (** [rsplit2 s ~on] optionally returns [s] split into two strings around the first
      appearance of [on] from the right. *)
  val rsplit2 : t -> on:char -> (t * t) option

  (** [split s ~on] returns a list of substrings of [s] that are separated by [on].
      Consecutive [on] characters will cause multiple empty strings in the result.
      Splitting the empty string returns a list of the empty string, not the empty list. *)
  val split : t -> on:char -> t list

  (** [split_on_chars s ~on] returns a list of all substrings of [s] that are separated by
      one of the chars from [on].  [on] are not grouped.  So a grouping of [on] in the
      source string will produce multiple empty string splits in the result.  *)
  val split_on_chars : t -> on:char list -> t list

  (** [split_lines t] returns the list of lines that comprise [t].  The lines do not include
      the trailing ["\n"] or ["\r\n"]. *)
  val split_lines : t -> t list

  (** [lfindi ?pos t ~f] returns the smallest [i >= pos] such that [f i t.[i]], if there is
      such an [i].  By default, [pos = 0]. *)
  val lfindi : ?pos:int -> t -> f:(int -> char -> bool) -> int option

  (** [rfindi ?pos t ~f] returns the largest [i <= pos] such that [f i t.[i]], if there is
      such an [i].  By default [pos = length t - 1]. *)
  val rfindi : ?pos:int -> t -> f:(int -> char -> bool) -> int option

  (** [lstrip ?drop s] returns a string with consecutive chars satisfying [drop] (by default
      white space, e.g. tabs, spaces, newlines, and carriage returns) stripped from the
      beginning of [s]. *)
  val lstrip : ?drop:(char -> bool) -> t -> t

  (** [rstrip ?drop s] returns a string with consecutive chars satisfying [drop] (by default
      white space, e.g. tabs, spaces, newlines, and carriage returns) stripped from the end
      of [s]. *)
  val rstrip : ?drop:(char -> bool) -> t -> t

  (** [strip ?drop s] returns a string with consecutive chars satisfying [drop] (by default
      white space, e.g. tabs, spaces, newlines, and carriage returns) stripped from the
      beginning and end of [s]. *)
  val strip : ?drop:(char -> bool) -> t -> t

  (** Like [map], but allows the replacement of a single character with zero or two or more
      characters. *)
  val concat_map : ?sep:t -> t -> f:(char -> t) -> t

  val concat_mapi : ?sep:t -> t -> f:(int -> char -> t) -> t

  (** [tr ~target ~replacement s] replaces every instance of [target] in [s] with
      [replacement]. *)
  val tr : target:char -> replacement:char -> t -> t

  (** [tr_multi ~target ~replacement] returns a function that replaces every
      instance of a character in [target] with the corresponding character in
      [replacement].

      If [replacement] is shorter than [target], it is lengthened by repeating
      its last character. Empty [replacement] is illegal unless [target] also is.

      If [target] contains multiple copies of the same character, the last
      corresponding [replacement] character is used. Note that character ranges
      are {b not} supported, so [~target:"a-z"] means the literal characters ['a'],
      ['-'], and ['z']. *)
  val tr_multi : target:t -> replacement:t -> (t -> t) Staged.t

  (** [chop_suffix_exn s ~suffix] returns [s] without the trailing [suffix],
      raising [Invalid_argument] if [suffix] is not a suffix of [s]. *)
  val chop_suffix_exn : t -> suffix:t -> t

  (** [chop_prefix_exn s ~prefix] returns [s] without the leading [prefix],
      raising [Invalid_argument] if [prefix] is not a prefix of [s]. *)
  val chop_prefix_exn : t -> prefix:t -> t

  val chop_suffix : t -> suffix:t -> t option
  val chop_prefix : t -> prefix:t -> t option

  (** [chop_suffix_if_exists s ~suffix] returns [s] without the trailing [suffix], or just
      [s] if [suffix] isn't a suffix of [s].

      Equivalent to [chop_suffix s ~suffix |> Option.value ~default:s], but avoids
      allocating the intermediate option. *)
  val chop_suffix_if_exists : t -> suffix:t -> t

  (** [chop_prefix_if_exists s ~prefix] returns [s] without the leading [prefix], or just
      [s] if [prefix] isn't a prefix of [s].

      Equivalent to [chop_prefix s ~prefix |> Option.value ~default:s], but avoids
      allocating the intermediate option. *)
  val chop_prefix_if_exists : t -> prefix:t -> t

  (** [suffix s n] returns the longest suffix of [s] of length less than or equal to [n]. *)
  val suffix : t -> int -> t

  (** [prefix s n] returns the longest prefix of [s] of length less than or equal to [n]. *)
  val prefix : t -> int -> t

  (** [drop_suffix s n] drops the longest suffix of [s] of length less than or equal to
      [n]. *)
  val drop_suffix : t -> int -> t

  (** [drop_prefix s n] drops the longest prefix of [s] of length less than or equal to
      [n]. *)
  val drop_prefix : t -> int -> t

  (** Produces the longest common suffix, or [""] if the list is empty. *)
  val common_suffix : t list -> t

  (** Produces the longest common prefix, or [""] if the list is empty. *)
  val common_prefix : t list -> t

  (** Produces the length of the longest common suffix, or 0 if the list is empty. *)
  val common_suffix_length : t list -> int

  (** Produces the length of the longest common prefix, or 0 if the list is empty. *)
  val common_prefix_length : t list -> int

  (** Produces the longest common suffix. *)
  val common_suffix2 : t -> t -> t

  (** Produces the longest common prefix. *)
  val common_prefix2 : t -> t -> t

  (** Produces the length of the longest common suffix. *)
  val common_suffix2_length : t -> t -> int

  (** Produces the length of the longest common prefix. *)
  val common_prefix2_length : t -> t -> int

  (** [concat_array sep ar] like {!String.concat}, but operates on arrays. *)
  val concat_array : ?sep:t -> t array -> t

  (** Builds a multiline text from a list of lines. Each line is terminated and then
      concatenated. Equivalent to:

      {[
        String.concat (List.map lines ~f:(fun line ->
          line ^ if crlf then "\r\n" else "\n"))
      ]}
  *)
  val concat_lines : ?crlf:bool (** default [false] *) -> string list -> string

  (** Slightly faster hash function on strings. *)
  external hash : t -> int = "Base_hash_string"
    [@@noalloc]

  (** Fast equality function on strings, doesn't use [compare_val]. *)
  val equal : t -> t -> bool

  val equal__local : t -> t -> bool
  val of_char : char -> t
  val of_char_list : char list -> t

  (** [pad_left ?char s ~len] returns [s] padded to the length [len] by adding characters
      [char] to the beginning of the string. If s is already longer than [len] it is
      returned unchanged. *)
  val pad_left : ?char:char (** default is [' '] *) -> string -> len:int -> string

  (** [pad_right ?char ~s len] returns [s] padded to the length [len] by adding characters
      [char] to the end of the string. If s is already longer than [len] it is returned
      unchanged. *)
  val pad_right : ?char:char (** default is [' '] *) -> string -> len:int -> string

  (** Reports the Levenshtein edit distance between two strings. Computes the minimum number
      of single-character insertions, deletions, and substitutions needed to transform one
      into the other.

      For strings of length M and N, its time complexity is O(M*N) and its space complexity
      is O(min(M,N)). *)
  val edit_distance : string -> string -> int

  (** Operations for escaping and unescaping strings, with parameterized escape and
      escapeworthy characters.  Escaping/unescaping using this module is more efficient than
      using Pcre. Benchmark code can be found in core/benchmarks/string_escaping.ml. *)
  module Escaping : sig
    (** [escape_gen_exn escapeworthy_map escape_char] returns a function that will escape a
        string [s] as follows: if [(c1,c2)] is in [escapeworthy_map], then all occurrences
        of [c1] are replaced by [escape_char] concatenated to [c2].

        Raises an exception if [escapeworthy_map] is not one-to-one.  If [escape_char] is
        not in [escapeworthy_map], then it will be escaped to itself.*)
    val escape_gen_exn
      :  escapeworthy_map:(char * char) list
      -> escape_char:char
      -> (string -> string) Staged.t

    val escape_gen
      :  escapeworthy_map:(char * char) list
      -> escape_char:char
      -> (string -> string) Or_error.t

    (** [escape ~escapeworthy ~escape_char s] is
        {[
          escape_gen_exn ~escapeworthy_map:(List.zip_exn escapeworthy escapeworthy)
            ~escape_char
        ]}
        Duplicates and [escape_char] will be removed from [escapeworthy].  So, no
        exception will be raised *)
    val escape : escapeworthy:char list -> escape_char:char -> (string -> string) Staged.t

    (** [unescape_gen_exn] is the inverse operation of [escape_gen_exn]. That is,
        {[
          let escape = Staged.unstage (escape_gen_exn ~escapeworthy_map ~escape_char) in
          let unescape = Staged.unstage (unescape_gen_exn ~escapeworthy_map ~escape_char) in
          assert (s = unescape (escape s))
        ]}
        always succeed when ~escapeworthy_map is not causing exceptions. *)
    val unescape_gen_exn
      :  escapeworthy_map:(char * char) list
      -> escape_char:char
      -> (string -> string) Staged.t

    val unescape_gen
      :  escapeworthy_map:(char * char) list
      -> escape_char:char
      -> (string -> string) Or_error.t

    (** [unescape ~escape_char] is defined as [unescape_gen_exn ~map:\[\] ~escape_char] *)
    val unescape : escape_char:char -> (string -> string) Staged.t

    (** Any char in an escaped string is either escaping, escaped, or literal. For example,
        for escaped string ["0_a0__0"] with [escape_char] as ['_'], pos 1 and 4 are
        escaping, 2 and 5 are escaped, and the rest are literal.

        [is_char_escaping s ~escape_char pos] returns true if the char at [pos] is escaping,
        false otherwise. *)
    val is_char_escaping : string -> escape_char:char -> int -> bool

    (** [is_char_escaped s ~escape_char pos] returns true if the char at [pos] is escaped,
        false otherwise. *)
    val is_char_escaped : string -> escape_char:char -> int -> bool

    (** [is_char_literal s ~escape_char pos] returns true if the char at [pos] is not
        escaped or escaping. *)
    val is_char_literal : string -> escape_char:char -> int -> bool

    (** [index s ~escape_char char] finds the first literal (not escaped) instance of [char]
        in s starting from 0. *)
    val index : string -> escape_char:char -> char -> int option

    val index_exn : string -> escape_char:char -> char -> int

    (** [rindex s ~escape_char char] finds the first literal (not escaped) instance of
        [char] in [s] starting from the end of [s] and proceeding towards 0. *)
    val rindex : string -> escape_char:char -> char -> int option

    val rindex_exn : string -> escape_char:char -> char -> int

    (** [index_from s ~escape_char pos char] finds the first literal (not escaped) instance
        of [char] in [s] starting from [pos] and proceeding towards the end of [s]. *)
    val index_from : string -> escape_char:char -> int -> char -> int option

    val index_from_exn : string -> escape_char:char -> int -> char -> int

    (** [rindex_from s ~escape_char pos char] finds the first literal (not escaped)
        instance of [char] in [s] starting from [pos] and towards 0. *)
    val rindex_from : string -> escape_char:char -> int -> char -> int option

    val rindex_from_exn : string -> escape_char:char -> int -> char -> int

    (** [split s ~escape_char ~on] returns a list of substrings of [s] that are separated by
        literal versions of [on].  Consecutive [on] characters will cause multiple empty
        strings in the result.  Splitting the empty string returns a list of the empty
        string, not the empty list.

        E.g., [split ~escape_char:'_' ~on:',' "foo,bar_,baz" = ["foo"; "bar_,baz"]]. *)
    val split : string -> on:char -> escape_char:char -> string list

    (** [split_on_chars s ~on] returns a list of all substrings of [s] that are separated by
        one of the literal chars from [on].  [on] are not grouped.  So a grouping of [on] in
        the source string will produce multiple empty string splits in the result.

        E.g., [split_on_chars ~escape_char:'_' ~on:[',';'|'] "foo_|bar,baz|0" ->
        ["foo_|bar"; "baz"; "0"]]. *)
    val split_on_chars : string -> on:char list -> escape_char:char -> string list

    (** [lsplit2 s ~on ~escape_char] splits s into a pair on the first literal instance of
        [on] (meaning the first unescaped instance) starting from the left. *)
    val lsplit2 : string -> on:char -> escape_char:char -> (string * string) option

    val lsplit2_exn : string -> on:char -> escape_char:char -> string * string

    (** [rsplit2 s ~on ~escape_char] splits [s] into a pair on the first literal
        instance of [on] (meaning the first unescaped instance) starting from the
        right. *)
    val rsplit2 : string -> on:char -> escape_char:char -> (string * string) option

    val rsplit2_exn : string -> on:char -> escape_char:char -> string * string

    (** These are the same as [lstrip], [rstrip], and [strip] for generic strings, except
        that they only drop literal characters -- they do not drop characters that are
        escaping or escaped.  This makes sense if you're trying to get rid of junk
        whitespace (for example), because escaped whitespace seems more likely to be
        deliberate and not junk. *)
    val lstrip_literal : ?drop:(char -> bool) -> t -> escape_char:char -> t

    val rstrip_literal : ?drop:(char -> bool) -> t -> escape_char:char -> t
    val strip_literal : ?drop:(char -> bool) -> t -> escape_char:char -> t
  end

  (** UTF-8 encoding. See [Utf] interface. *)
  module Utf8 : Utf_as_string

  (** UTF-16 little-endian encoding. See [Utf] interface. *)
  module Utf16le : Utf_as_string

  (** UTF-16 big-endian encoding. See [Utf] interface. *)
  module Utf16be : Utf_as_string

  (** UTF-32 little-endian encoding. See [Utf] interface. *)
  module Utf32le : Utf_as_string

  (** UTF-32 big-endian encoding. See [Utf] interface. *)
  module Utf32be : Utf_as_string

  module type Utf = Utf
  module type Utf_as_string = Utf_as_string
end