1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
type t = { start : int; stop : int; s : string }
let v ?(start = 0) ?stop s =
let s_len = String.length s in
let stop = match stop with None -> s_len | Some stop -> stop in
if start < 0 || stop > s_len || stop < start then invalid_arg "Out of bounds";
{ start; stop; s }
let empty = { start = 0; stop = 0; s = "" }
let length { start; stop; _ } = stop - start
let head ?(rev = false) { s; start; stop } =
if start = stop then None else Some s.[if rev then stop - 1 else start]
let tail ?(rev = false) ({ s; start; stop } as sub) =
if start = stop then sub
else if rev then { s; start; stop = stop - 1 }
else { s; start = start + 1; stop }
let is_empty { start; stop; _ } = stop - start = 0
let is_prefix ~affix:({ s = affix; start = astart; _ } as affix_sub)
({ s; start = sstart; _ } as s_sub) =
let len_a = length affix_sub in
let len_s = length s_sub in
if len_a > len_s then false
else
let max_zidx = len_a - 1 in
let rec loop i =
if i > max_zidx then true
else if affix.[astart + i] <> s.[sstart + i] then false
else loop (i + 1)
in
loop 0
let is_suffix ~affix:({ s = affix; stop = astop; _ } as affix_sub)
({ s; stop = sstop; _ } as s_sub) =
let len_a = length affix_sub in
let len_s = length s_sub in
if len_a > len_s then false
else
let max_zidx = len_a - 1 in
let max_idx_a = astop - 1 in
let max_idx_s = sstop - 1 in
let rec loop i =
if i > max_zidx then true
else if affix.[max_idx_a - i] <> s.[max_idx_s - i] then false
else loop (i + 1)
in
loop 0
let fspan ~min ~max ~sat ({ s; start; stop } as sub) =
if min < 0 then invalid_arg "fspan"
else if max < 0 then invalid_arg "fspan"
else if min > max || max = 0 then ({ s; start; stop = start }, sub)
else
let max_idx = stop - 1 in
let max_idx =
let k = start + max - 1 in
if k > max_idx || k < 0 then max_idx else k
in
let need_idx = start + min in
let rec loop i =
if i <= max_idx && sat s.[i] then loop (i + 1)
else if i < need_idx || i = 0 then ({ s; start; stop = start }, sub)
else if i = stop then (sub, { s; start = stop; stop })
else ({ s; start; stop = i }, { s; start = i; stop })
in
loop start
let rspan ~min ~max ~sat ({ s; start; stop } as sub) =
if min < 0 then invalid_arg "rspan"
else if max < 0 then invalid_arg "rspan"
else if min > max || max = 0 then (sub, { s; start = stop; stop })
else
let max_idx = stop - 1 in
let min_idx =
let k = stop - max in
if k < start then start else k
in
let need_idx = stop - min - 1 in
let rec loop i =
if i >= min_idx && sat s.[i] then loop (i - 1)
else if i > need_idx || i = max_idx then (sub, { s; start = stop; stop })
else if i = start - 1 then ({ s; start; stop = start }, sub)
else ({ s; start; stop = i + 1 }, { s; start = i + 1; stop })
in
loop max_idx
let span ?(rev = false) ?(min = 0) ?(max = max_int) ?(sat = fun _ -> true) sub =
match rev with
| true -> rspan ~min ~max ~sat sub
| false -> fspan ~min ~max ~sat sub
let to_string { s; start; stop } =
if start = stop then ""
else if start = 0 && stop = String.length s then s
else String.sub s start (stop - start)
let rebase ({ start; stop; _ } as sub) =
{ s = to_string sub; start = 0; stop = stop - start }
let concat ?sep:({ s = sep; start = sep_start; _ } as sep_sub = empty) =
function
| [] -> empty
| [ s ] -> rebase s
| ({ s; start; _ } as sub) :: ss ->
let sub_len = length sub in
let sep_len = length sep_sub in
let rec cat_len sep_count l ss =
if l < 0 then l
else
match ss with
| s :: ss -> cat_len (sep_count + 1) (l + length s) ss
| [] ->
if sep_len = 0 then l
else
let max_sep_count = Sys.max_string_length / sep_len in
if sep_count < 0 || sep_count > max_sep_count then -1
else (sep_count * sep_len) + l
in
let cat_len = cat_len 0 sub_len ss in
if cat_len < 0 then invalid_arg "concat"
else
let b = Bytes.create cat_len in
Bytes.blit_string s start b 0 sub_len;
let rec loop i = function
| [] -> Bytes.unsafe_to_string b
| ({ s = str; start = str_start; _ } as str_sub) :: ss ->
let sep_pos = i in
let str_pos = i + sep_len in
let str_len = length str_sub in
Bytes.blit_string sep sep_start b sep_pos sep_len;
Bytes.blit_string str str_start b str_pos str_len;
loop (str_pos + str_len) ss
in
{ s = loop sub_len ss; start = 0; stop = cat_len }
let exists sat { s; start; stop } =
let rec loop i =
if i > stop - 1 then false else if sat s.[i] then true else loop (succ i)
in
loop start
let for_all sat { s; start; stop } =
let rec loop i =
if i > stop - 1 then true else if sat s.[i] then loop (succ i) else false
in
loop start
let is_white = function ' ' | '\t' .. '\r' -> true | _ -> false
let trim ?(drop = is_white) ({ s; start; stop } as sub) =
let len = stop - start in
if len = 0 then sub
else
let max_pos = stop in
let max_idx = stop - 1 in
let rec left_pos i =
if i > max_idx then max_pos
else if drop s.[i] then left_pos (i + 1)
else i
in
let rec right_pos i =
if i < start then start
else if drop s.[i] then right_pos (i - 1)
else i + 1
in
let left = left_pos start in
if left = max_pos then
{ s; start = (start + stop) / 2; stop = (start + stop) / 2 }
else
let right = right_pos max_idx in
if left = start && right = max_pos then sub
else { s; start = left; stop = right }
let equal_bytes { s = s0; start = start0; stop = stop0 }
{ s = s1; start = start1; stop = stop1 } =
if s0 == s1 && start0 = start1 && stop0 = stop1 then true
else
let len0 = stop0 - start0 in
let len1 = stop1 - start1 in
if len0 <> len1 then false
else
let max_zidx = len0 - 1 in
let rec loop i =
if i > max_zidx then true
else if s0.[start0 + i] <> s1.[start1 + i] then false
else loop (i + 1)
in
loop 0
let fcut ~sep:{ s = sep; start = sep_start; stop = sep_stop } { s; start; stop }
=
let sep_len = sep_stop - sep_start in
if sep_len = 0 then invalid_arg "fcut"
else
let max_sep_zidx = sep_len - 1 in
let max_s_idx = stop - sep_len in
let rec check_sep i k =
if k > max_sep_zidx then
Some ({ s; start; stop = i }, { s; start = i + sep_len; stop })
else if s.[i + k] = sep.[sep_start + k] then check_sep i (k + 1)
else scan (i + 1)
and scan i =
if i > max_s_idx then None
else if s.[i] = sep.[sep_start] then check_sep i 1
else scan (i + 1)
in
scan start
let rcut ~sep:{ s = sep; start = sep_start; stop = sep_stop } { s; start; stop }
=
let sep_len = sep_stop - sep_start in
if sep_len = 0 then invalid_arg "rcut"
else
let max_sep_zidx = sep_len - 1 in
let max_s_idx = stop - 1 in
let rec check_sep i k =
if k > max_sep_zidx then
Some ({ s; start; stop = i }, { s; start = i + sep_len; stop })
else if s.[i + k] = sep.[sep_start + k] then check_sep i (k + 1)
else rscan (i - 1)
and rscan i =
if i < start then None
else if s.[i] = sep.[sep_start] then check_sep i 1
else rscan (i - 1)
in
rscan (max_s_idx - max_sep_zidx)
let cut ?(rev = false) ~sep s =
match rev with true -> rcut ~sep s | false -> fcut ~sep s
let with_range ?(first = 0) ?(len = max_int) { s; start; stop } =
if len < 0 then invalid_arg "with_range"
else
let s_len = stop - start in
let max_idx = s_len - 1 in
let empty = function
| first when first < 0 -> { s; start; stop = start }
| first when first > max_idx -> { s; start = stop; stop }
| first -> { s; start = start + first; stop = start + first }
in
if len = 0 then empty first
else
let last =
match len with
| len when len = max_int -> max_idx
| len ->
let last = first + len - 1 in
if last > max_idx then max_idx else last
in
let first = if first < 0 then 0 else first in
if first > max_idx || last < 0 || first > last then empty first
else { s; start = start + first; stop = start + last + 1 }
let drop ?(rev = false) ?min ?max ?sat t =
(if rev then fst else snd) @@ span ~rev ?min ?max ?sat t
let add_sub ~no_empty s ~start ~stop acc =
if start = stop then
if no_empty then acc else { s; start; stop = start } :: acc
else { s; start; stop } :: acc
let fcuts ~no_empty ~sep:{ s = sep; start = sep_start; stop = sep_stop }
({ s; start; stop } as sub) =
let sep_len = sep_stop - sep_start in
if sep_len = 0 then invalid_arg "fcuts"
else
let s_len = stop - start in
let max_sep_zidx = sep_len - 1 in
let max_s_idx = stop - sep_len in
let rec check_sep sstart i k acc =
if k > max_sep_zidx then
let new_start = i + sep_len in
scan new_start new_start (add_sub ~no_empty s ~start:sstart ~stop:i acc)
else if s.[i + k] = sep.[sep_start + k] then
check_sep sstart i (k + 1) acc
else scan sstart (i + 1) acc
and scan sstart i acc =
if i > max_s_idx then
if sstart = start then if no_empty && s_len = 0 then [] else [ sub ]
else List.rev (add_sub ~no_empty s ~start:sstart ~stop acc)
else if s.[i] = sep.[sep_start] then check_sep sstart i 1 acc
else scan sstart (i + 1) acc
in
scan start start []
let rcuts ~no_empty ~sep:{ s = sep; start = sep_start; stop = sep_stop }
({ s; start; stop } as sub) =
let sep_len = sep_stop - sep_start in
if sep_len = 0 then invalid_arg "rcuts"
else
let s_len = stop - start in
let max_sep_zidx = sep_len - 1 in
let max_s_idx = stop - 1 in
let rec check_sep sstop i k acc =
if k > max_sep_zidx then
let start = i + sep_len in
rscan i (i - sep_len) (add_sub ~no_empty s ~start ~stop:sstop acc)
else if s.[i + k] = sep.[sep_start + k] then check_sep sstop i (k + 1) acc
else rscan sstop (i - 1) acc
and rscan sstop i acc =
if i < start then
if sstop = stop then if no_empty && s_len = 0 then [] else [ sub ]
else add_sub ~no_empty s ~start ~stop:sstop acc
else if s.[i] = sep.[sep_start] then check_sep sstop i 1 acc
else rscan sstop (i - 1) acc
in
rscan stop (max_s_idx - max_sep_zidx) []
let cuts ?(rev = false) ?(empty = true) ~sep s =
match rev with
| true -> rcuts ~no_empty:(not empty) ~sep s
| false -> fcuts ~no_empty:(not empty) ~sep s